"""Functions for generating consensus networks."""
import numpy as np
from sklearn.utils.validation import (
check_random_state, check_array, check_consistent_length
)
[docs]
def func_consensus(data, n_boot=1000, ci=95, seed=None):
"""
Calculate thresholded group consensus functional connectivity graph.
This function concatenates all time series in `data` and computes a group
correlation matrix based on this extended time series. It then generates
length `T` bootstrapped samples from the concatenated matrix and estimates
confidence intervals for all correlations. Correlations whose sign is
consistent across bootstraps are retained; inconsistent correlations are
set to zero.
If `n_boot` is set to 0 or None a simple, group-averaged functional
connectivity matrix is estimated, instead.
Parameters
----------
data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T))
Pre-processed functional time series, where `N` is the number of nodes,
`T` is the number of volumes in the time series, and `S` is the number
of subjects.
n_boot : int, optional
Number of bootstraps for which to generate correlation. Default: 1000
ci : (0, 100) float, optional
Confidence interval for which to assess the reliability of correlations
with bootstraps. Default: 95
seed : int, optional
Random seed. Default: None
Returns
-------
consensus : (N, N) numpy.ndarray
Thresholded, group-level correlation matrix
References
----------
Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann,
P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and
competitive spreading dynamics on the human connectome. Neuron, 86(6),
1518-1529.
"""
# check inputs
rs = check_random_state(seed)
if ci > 100 or ci < 0:
raise ValueError("`ci` must be between 0 and 100.")
# group-average functional connectivity matrix desired instead of bootstrap
if n_boot == 0 or n_boot is None:
if isinstance(data, list):
corrs = [np.corrcoef(sub) for sub in data]
else:
corrs = [np.corrcoef(data[..., sub]) for sub in
range(data.shape[-1])]
return np.nanmean(corrs, axis=0)
if isinstance(data, list):
collapsed_data = np.hstack(data)
nsample = int(collapsed_data.shape[-1] / len(data))
else:
collapsed_data = data.reshape((len(data), -1), order='F')
nsample = data.shape[1]
consensus = np.corrcoef(collapsed_data)
# only keep the upper triangle for the bootstraps to save on memory usage
triu_inds = np.triu_indices_from(consensus, k=1)
bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot))
# generate `n_boot` bootstrap correlation matrices by sampling `t` time
# points from the concatenated time series
for boot in range(n_boot):
inds = rs.randint(collapsed_data.shape[-1], size=nsample)
bootstrapped_corrmat[..., boot] = \
np.corrcoef(collapsed_data[:, inds])[triu_inds]
# extract the CIs from the bootstrapped correlation matrices
# we don't need the input anymore so overwrite it
bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci],
axis=-1, overwrite_input=True)
# remove unreliable (i.e., CI zero-crossing) correlations
# if the signs of the bootstrapped confidence intervals are different
# (i.e., their signs sum to 0), then we want to remove them
# so, take the logical not of the CI (CI = 0 ---> True) and create a mask
# then, set all connections from the consensus array inside the mask to 0
remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0))
mask = np.zeros_like(consensus, dtype=bool)
mask[triu_inds] = remove_inds
consensus[mask + mask.T] = 0
return consensus
def _ecdf(data):
"""
Estimate empirical cumulative distribution function of `data`.
Taken directly from StackOverflow. See original answer at
https://stackoverflow.com/questions/33345780.
Parameters
----------
data : array_like
Returns
-------
prob : numpy.ndarray
Cumulative probability
quantiles : numpy.darray
Quantiles
"""
sample = np.atleast_1d(data)
# find the unique values and their corresponding counts
quantiles, counts = np.unique(sample, return_counts=True)
# take the cumulative sum of the counts and divide by the sample size to
# get the cumulative probabilities between 0 and 1
prob = np.cumsum(counts).astype(float) / sample.size
# match MATLAB
prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles)
return prob, quantiles
[docs]
def struct_consensus(data, distance, hemiid,
conn_num_inter=None,
conn_num_intra=None,
weighted=False):
"""
Calculate distance-dependent group consensus structural connectivity graph.
Takes as input a weighted stack of connectivity matrices with dimensions
(N, N, S) where `N` is the number of nodes and `S` is the number of
matrices or subjects. The matrices must be weighted, and ideally with
continuous weights (e.g. fractional anisotropy rather than streamline
count). The second input is a pairwise distance matrix, where distance(i,j)
is the Euclidean distance between nodes i and j. The final input is an
(N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or
left (`hemiid=1`) hemisphere (note that these values can be flipped as long
as `hemiid` contains only values of 0 and 1).
This function estimates the average edge length distribution and builds
a group-averaged connectivity matrix that approximates this distribution
with density equal to the mean density across subjects.
The algorithm works as follows:
1. Estimate the cumulative edge length distribution,
2. Divide the distribution into M length bins, one for each edge that will
be added to the group-average matrix, and
3. Within each bin, select the edge that is most consistently expressed
expressed across subjects, breaking ties according to average edge
weight (which is why the input matrix `data` must be weighted).
The algorithm works separately on within/between hemisphere links.
M is the sum of `conn_num_inter` and `conn_num_intra`, if provided.
Otherwise, M is estimated from the data.
Parameters
----------
data : (N, N, S) array_like
Weighted connectivity matrices (i.e., fractional anisotropy), where `N`
is nodes and `S` is subjects
distance : (N, N) array_like
Array where `distance[i, j]` is the Euclidean distance between nodes
`i` and `j`
hemiid : (N, 1) array_like
Hemisphere designation for `N` nodes where a value of 0/1 indicates
node `N_{i}` is in the right/left hemisphere, respectively
conn_num_inter : int, optional
Number of inter-hemispheric connections to include in the consensus
matrix. If `None`, the number of inter-hemispheric connections will be
estimated from the data. Default = `None`.
conn_num_intra : int, optional
Number of intra-hemispheric connections to include in the consensus
matrix. If `None`, the number of intra-hemispheric connections will be
estimated from the data. Default = `None`.
weighted : bool
Flag indicating whether or not to return a weighted consensus map. If
`True`, the consensus will be multiplied by the mean of `data`.
Returns
-------
consensus : (N, N) numpy.ndarray
Binary (default) or mean-weighted group-level connectivity matrix
References
----------
Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance-
dependent consensus thresholds for generating group-representative
structural brain networks. Network Neuroscience, 1-22.
"""
# confirm input shapes are as expected
check_consistent_length(data, distance, hemiid)
try:
hemiid = check_array(hemiid, ensure_2d=True)
except ValueError:
raise ValueError('Provided hemiid must be a 2D array. Reshape your '
'data using array.reshape(-1, 1) and try again.') from None
num_node, _, num_sub = data.shape # info on connectivity matrices
pos_data = data > 0 # location of + values in matrix
pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node
with np.errstate(divide='ignore', invalid='ignore'):
average_weights = data.sum(axis=2) / pos_data_count
# empty array to hold inter/intra hemispheric connections
consensus = np.zeros((num_node, num_node, 2))
for conn_type in range(2): # iterate through inter/intra hemisphere conn
if conn_type == 0: # get inter hemisphere edges
inter_hemi = (hemiid == 0) @ (hemiid == 1).T
keep_conn = np.logical_or(inter_hemi, inter_hemi.T)
else: # get intra hemisphere edges
right_hemi = (hemiid == 0) @ (hemiid == 0).T
left_hemi = (hemiid == 1) @ (hemiid == 1).T
keep_conn = np.logical_or(right_hemi @ right_hemi.T,
left_hemi @ left_hemi.T)
# mask the distance array for only those edges we want to examine
full_dist_conn = distance * keep_conn
upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn))
# generate array of weighted (by distance), positive edges across subs
pos_dist = pos_data * upper_dist_conn
pos_dist = pos_dist[np.nonzero(pos_dist)]
# determine average # of positive edges across subs
# we will use this to bin the edge weights
if conn_type == 0:
if conn_num_inter is None:
avg_conn_num = len(pos_dist) / num_sub
else:
avg_conn_num = conn_num_inter
else:
if conn_num_intra is None:
avg_conn_num = len(pos_dist) / num_sub
else:
avg_conn_num = conn_num_intra
# estimate empirical CDF of weighted, positive edges across subs
cumprob, quantiles = _ecdf(pos_dist)
cumprob = np.round(cumprob * avg_conn_num).astype(int)
# empty array to hold group-average matrix for current connection type
# (i.e., inter/intra hemispheric connections)
group_conn_type = np.zeros((num_node, num_node))
# iterate through bins (for edge weights)
for n in range(1, int(avg_conn_num) + 1):
# get current quantile of interest
curr_quant = quantiles[np.logical_and(cumprob >= (n - 1),
cumprob < n)]
if curr_quant.size == 0:
continue
# find edges in distance connectivity matrix w/i current quantile
mask = np.logical_and(full_dist_conn >= curr_quant.min(),
full_dist_conn <= curr_quant.max())
i, j = np.where(np.triu(mask)) # indices of edges of interest
c = pos_data_count[i, j] # get num sub with + values at edges
w = average_weights[i, j] # get averaged weight of edges
# find locations of edges most commonly represented across subs
indmax = np.argwhere(c == c.max())
# determine index of most frequent edge; break ties with higher
# weighted edge
if indmax.size == 1: # only one edge found
group_conn_type[i[indmax], j[indmax]] = 1
else: # multiple edges found
indmax = indmax[np.argmax(w[indmax])]
group_conn_type[i[indmax], j[indmax]] = 1
consensus[:, :, conn_type] = group_conn_type
# collapse across hemispheric connections types and make symmetrical array
consensus = consensus.sum(axis=2)
consensus = np.logical_or(consensus, consensus.T).astype(int)
if weighted:
consensus = consensus * np.mean(data, axis=2)
return consensus