Source code for netneurotools.metrics.statistical

"""Functions for calculating statistical network metrics."""

import numpy as np

from .. import has_numba
if has_numba:
    from numba import njit

from .metrics_utils import _graph_laplacian


def _network_pearsonr_vectorized(annot1, annot2, weight):
    annot1 = annot1 - np.mean(annot1)
    annot2 = annot2 - np.mean(annot2)
    upper = np.sum(np.multiply(weight, np.outer(annot1, annot2)))
    lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1)))
    lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2)))
    return upper / np.sqrt(lower1) / np.sqrt(lower2)


def _network_pearsonr_numba(annot1, annot2, weight):
    n = annot1.shape[0]
    annot1 = annot1 - np.mean(annot1)
    annot2 = annot2 - np.mean(annot2)
    upper, lower1, lower2 = 0.0, 0.0, 0.0
    for i in range(n):
        for j in range(n):
            upper += annot1[i] * annot2[j] * weight[i, j]
            lower1 += annot1[i] * annot1[j] * weight[i, j]
            lower2 += annot2[i] * annot2[j] * weight[i, j]
    return upper / np.sqrt(lower1) / np.sqrt(lower2)


if has_numba:
    _network_pearsonr_numba = njit(_network_pearsonr_numba)


[docs] def network_pearsonr(annot1, annot2, weight, use_numba=has_numba): r""" Calculate pearson correlation between two annotation vectors. .. warning:: Test before use. Parameters ---------- annot1 : (N,) array_like First annotation vector, demean will be applied. annot2 : (N,) array_like Second annotation vector, demean will be applied. weight : (N, N) array_like Weight matrix. Diagonal elements should be 1. use_numba : bool, optional Whether to use numba for calculation. Default: True (if numba is available). Returns ------- corr : float Network correlation between `annot1` and `annot2` Notes ----- If Pearson correlation is represented as .. math:: \rho_{x,y} = \dfrac{ \mathrm{sum}(I \times (\hat{x} \otimes \hat{y})) }{ \sigma_x \sigma_y } The network correlation is defined analogously as .. math:: \rho_{x,y,G} = \dfrac{ \mathrm{sum}(W \times (\hat{x} \otimes \hat{y})) }{ \sigma_{x,W} \sigma_{y,W} } where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors, The weight matrix :math:`W` is used to represent the network structure. It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the length matrix and :math:`k` is a decay parameter. Example using shortest path length as weight .. code:: python spl, _ = distance_wei_floyd(D) # input should be distance matrix spl_wei = 1 / np.exp(spl) netcorr = network_pearsonr(annot1, annot2, spl_wei) Example using (inverse) effective resistance as weight .. code:: python R_eff = effective_resistance(W) R_eff_norm = R_eff / np.max(R_eff) W = 1 / R_eff_norm W = W / np.max(W) np.fill_diagonal(W, 1.0) netcorr = network_pearsonr(annot1, annot2, W) References ---------- .. [1] Coscia, M. (2021). Pearson correlations on complex networks. Journal of Complex Networks, 9(6), cnab036. https://doi.org/10.1093/comnet/cnab036 See Also -------- netneurotools.stats.network_pearsonr_pairwise """ if use_numba: if not has_numba: raise ValueError("Numba not installed; cannot use numba for calculation") return _network_pearsonr_numba(annot1, annot2, weight) else: return _network_pearsonr_vectorized(annot1, annot2, weight)
def _cross_outer(annot_mat): """ Calculate cross outer product of input matrix. This functions is only used in `network_pearsonr_pairwise`. Parameters ---------- annot_mat : (N, D) array_like Input matrix Returns ------- cross_outer : (N, N, D, D) numpy.ndarray Cross outer product of `annot_mat` """ n_samp, n_feat = annot_mat.shape cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype) for a in range(n_samp): for b in range(n_samp): for c in range(n_feat): for d in range(n_feat): cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d] return cross_outer if has_numba: # ("float64[:,:,:,::1](float64[:,::1])") _cross_outer = njit(_cross_outer) def _multiply_sum(cross_outer, weight): """ Multiply and sum cross outer product. This functions is only used in `network_pearsonr_pairwise`. Parameters ---------- cross_outer : (N, N, D, D) array_like Cross outer product of `annot_mat` weight : (D, D) array_like Weight matrix Returns ------- cross_outer_after : (N, N) numpy.ndarray Result of multiplying and summing `cross_outer` """ n_samp, _, n_dim, _ = cross_outer.shape cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype) for i in range(n_samp): for j in range(n_samp): curr_sum = 0.0 for k in range(n_dim): for l in range(n_dim): # noqa: E741 curr_sum += weight[k, l] * cross_outer[i, j, k, l] cross_outer_after[i, j] = curr_sum return cross_outer_after if has_numba: # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])") _multiply_sum = njit(_multiply_sum)
[docs] def network_pearsonr_pairwise(annot_mat, weight): """ Calculate pairwise network correlation between rows of `annot_mat`. .. warning:: Test before use. Parameters ---------- annot_mat : (N, D) array_like Input matrix weight : (D, D) array_like Weight matrix. Diagonal elements should be 1. Returns ------- corr_mat : (N, N) numpy.ndarray Pairwise network correlation matrix Notes ----- This is a faster version of :meth:`netneurotools.stats.network_pearsonr` for calculating pairwise network correlation between rows of `annot_mat`. Check :meth:`netneurotools.stats.network_pearsonr` for details. See Also -------- netneurotools.stats.network_pearsonr """ annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True) if has_numba: cross_outer = _cross_outer(annot_mat_demean) cross_outer_after = _multiply_sum(cross_outer, weight) else: # https://stackoverflow.com/questions/24839481/python-matrix-outer-product cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean) cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3)) # translating the two lines below in numba does not speed up much lower = np.sqrt(np.diagonal(cross_outer_after)) return cross_outer_after / np.einsum('i,j', lower, lower)
def _onehot_quadratic_form_broadcast(Q_star): """ Calculate one-hot quadratic form of input matrix. This functions is only used in `effective_resistance`. Parameters ---------- Q_star : (N, N) array_like Input matrix Returns ------- R_eff : (N, N) numpy.ndarray One-hot quadratic form of `Q_star` """ n = Q_star.shape[0] R_eff = np.empty((n, n), Q_star.dtype) for i in range(n): for j in range(n): R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j] return R_eff if has_numba: # ("float64[:,::1](float64[:,::1])") _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast)
[docs] def effective_resistance(W, directed=True): """ Calculate effective resistance matrix. The effective resistance between two nodes in a graph, often used in the context of electrical networks, is a measure that stems from the inverse of the Laplacian matrix of the graph. .. warning:: Test before use. Parameters ---------- W : (N, N) array_like Weight matrix. directed : bool, optional Whether the graph is directed. This is used to determine whether to turn on the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are using a symmetric weight matrix (while real-valued implying hermitian), you can set this to False for better performance. Default: True Returns ------- R_eff : (N, N) numpy.ndarray Effective resistance matrix Notes ----- The effective resistance between two nodes :math:`i` and :math:`j` is defined as .. math:: R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j) where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector. References ---------- .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij, R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications, 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024 See Also -------- netneurotools.stats.network_polarisation """ L = _graph_laplacian(W) Q_star = np.linalg.pinv(L, hermitian=not directed) if has_numba: R_eff = _onehot_quadratic_form_broadcast(Q_star) else: Q_star_diag = np.diag(Q_star) R_eff = \ Q_star_diag[:, np.newaxis] \ - Q_star \ - Q_star.T \ + Q_star_diag[np.newaxis, :] return R_eff
def _polariz_diff(vec): """ Calculate difference between positive and negative parts of a vector. This functions is only used in `network_polarisation`. Parameters ---------- vec : (N,) array_like Input vector. Must have both positive and negative values. Returns ------- vec_diff : (N,) numpy.ndarray Difference between positive and negative parts of `vec` """ # vec_pos = np.maximum(vec, 0.0) vec_pos /= np.max(vec_pos) # vec_neg = np.minimum(vec, 0.0) vec_neg = np.abs(vec_neg) vec_neg /= np.max(vec_neg) return (vec_pos - vec_neg) if has_numba: _polariz_diff = njit(_polariz_diff) def _quadratic_form(W, vec_left, vec_right, squared=False): """ Calculate quadratic form :math:`v_{left}^T W v_{right}`. Parameters ---------- W : (N, N) array_like Input matrix. vec_left : (N,) array_like Left weight vector. vec_right : (N,) array_like Right weight vector. squared : bool, optional Whether to square the input weight matrix. Default: False Returns ------- quadratic_form : float Quadratic form from `W`, `vec_left`, and `vec_right` """ # [numpy] # (vec_left.T @ W @ vec_right)[0, 0] # [numba] # vec = np.ascontiguousarray(vec[np.newaxis, :]) n = W.shape[0] ret = 0.0 for i in range(n): for j in range(n): if squared: ret += vec_left[i] * vec_right[j] * W[i, j]**2 else: ret += vec_left[i] * vec_right[j] * W[i, j] return ret if has_numba: _quadratic_form = njit(_quadratic_form)
[docs] def network_polarisation(vec, W, directed=True): r""" Calculate polarisation of a vector on a graph. Network polarisation is a measure of polizzartion taken into account all the three factors below [1]_: - how extreme the opinions of the people are - how much they organize into echo chambers, and - how these echo chambers organize in the network .. warning:: Test before use. Parameters ---------- vec : (N,) array_like Polarization vector. Must have both positive and negative values. Will be normalized between -1 and 1 internally. W : (N, N) array_like Weight matrix. directed : bool, optional Whether the graph is directed. This is used to determine whether to turn on the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are using a symmetric weight matrix (while real-valued implying hermitian), you can set this to False for better performance. Default: True Returns ------- polariz : float Polarization of `vec` on `W` Notes ----- The measure is based on the genralized Eucledian distance, defined as .. math:: \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)} where :math:`o^+` and :math:`o^-` are the positive and negative parts of the polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance` for similarity. References ---------- .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological polarization on a network using generalized Euclidean distance. Science Advances, 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044 See Also -------- netneurotools.stats.effective_resistance """ L = _graph_laplacian(W) Q_star = np.linalg.pinv(L, hermitian=not directed) diff = _polariz_diff(vec) if has_numba: polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False) else: polariz_sq = (diff.T @ Q_star @ diff) return np.sqrt(polariz_sq)
def _network_variance_vectorized(vec, D): p = vec / np.sum(vec) return 0.5 * (p.T @ np.multiply(D, D) @ p) def _network_variance_numba(vec, D): p = vec / np.sum(vec) return 0.5 * _quadratic_form(D, p, p, squared=True) if has_numba: _network_variance_numba = njit(_network_variance_numba)
[docs] def network_variance(vec, D, use_numba=has_numba): r""" Calculate variance of a vector on a graph. Network variance is a measure of variance taken into account the network structure. .. warning:: Test before use. Parameters ---------- vec : (N,) array_like Input vector. Must be all positive. Will be normalized internally as a probability distribution. D : (N, N) array_like Distance matrix. use_numba : bool, optional Whether to use numba for calculation. Default: True (if numba is available). Returns ------- network_variance : float Network variance of `vec` on `D` Notes ----- The network variance is defined as .. math:: var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j) where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)` is the distance between node :math:`i` and :math:`j`. The distance matrix :math:`D` can make use of effective resistance or its square root. Example using effective resistance as weight matrix .. code:: python R_eff = effective_resistance(W) netvar = network_variance(vec, R_eff) References ---------- .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). Variance and covariance of distributions on graphs. SIAM Review, 64(2), 343–359. https://doi.org/10.1137/20M1361328 See Also -------- netneurotools.stats.network_covariance """ if use_numba: if not has_numba: raise ValueError("Numba not installed; cannot use numba for calculation") return _network_variance_numba(vec, D) else: return _network_variance_vectorized(vec, D)
def _network_covariance_vectorized(joint_pmat, D, calc_marginal=True): p = np.sum(joint_pmat, axis=1) q = np.sum(joint_pmat, axis=0) D_sq = np.multiply(D, D) cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq)) if calc_marginal: var_p = p.T @ D_sq @ p var_q = q.T @ D_sq @ q else: var_p, var_q = 0, 0 return 0.5 * cov, 0.5 * var_p, 0.5 * var_q def _network_covariance_numba(joint_pmat, D, calc_marginal=True): n = joint_pmat.shape[0] p = np.sum(joint_pmat, axis=1) q = np.sum(joint_pmat, axis=0) cov = 0.0 var_p, var_q = 0.0, 0.0 for i in range(n): for j in range(n): cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2 if calc_marginal: var_p += p[i] * p[j] * D[i, j]**2 var_q += q[i] * q[j] * D[i, j]**2 return 0.5 * cov, 0.5 * var_p, 0.5 * var_q if has_numba: _network_covariance_numba = njit(_network_covariance_numba)
[docs] def network_covariance(joint_pmat, D, calc_marginal=True, use_numba=has_numba): r""" Calculate covariance of a joint probability matrix on a graph. .. warning:: Test before use. Parameters ---------- joint_pmat : (N, N) array_like Joint probability matrix. Please make sure that it is valid. D : (N, N) array_like Distance matrix. calc_marginal : bool, optional Whether to calculate marginal variance. It will be marginally faster if :code:`calc_marginal=False` (returning marginal variances as 0). Default: True use_numba : bool, optional Whether to use numba for calculation. Default: True (if numba is available). Returns ------- network_covariance : float Covariance of `joint_pmat` on `D` var_p : float Marginal variance of `joint_pmat` on `D`. Will be 0 if :code:`calc_marginal=False` var_q : float Marginal variance of `joint_pmat` on `D`. Will be 0 if :code:`calc_marginal=False` Notes ----- The network variance is defined as .. math:: cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j) where :math:`P` is the joint probability matrix, :math:`p` and :math:`q` are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)` is the distance between node :math:`i` and :math:`j`. Check :func:`network_variance` for usage. References ---------- .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). Variance and covariance of distributions on graphs. SIAM Review, 64(2), 343–359. https://doi.org/10.1137/20M1361328 See Also -------- netneurotools.stats.network_variance """ if use_numba: if not has_numba: raise ValueError("Numba not installed; cannot use numba for calculation") return _network_covariance_numba(joint_pmat, D, calc_marginal) else: return _network_covariance_vectorized(joint_pmat, D, calc_marginal)