Source code for netneurotools.datasets.fetch_project

"""Functions for fetching project data."""

import os
import numpy as np

from sklearn.utils import Bunch

from .datasets_utils import _get_data_dir, _get_reference_info, fetch_file

from ._mirchi2018 import _get_fc, _get_panas


[docs] def fetch_vazquez_rodriguez2019(force=False, data_dir=None, verbose=1): """ Download files from Vazquez-Rodriguez et al., 2019, PNAS. This dataset contains one file: rsquared_gradient.csv, which contains two columns: rsquared and gradient. If you used this data, please cite [1]_. Returns ------- data : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. Gradients of structure\u2013function tethering across neocortex. Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019. """ dataset_name = "ds-vazquez_rodriguez2019" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) # load data rsq, grad = np.loadtxt( fetched / "rsquared_gradient.csv", delimiter=",", skiprows=1 ).T data = {"rsquared": rsq, "gradient": grad} return Bunch(**data)
[docs] def fetch_mirchi2018(force=False, data_dir=None, verbose=1): """ Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. Parameters ---------- data_dir : str, optional Directory to check for existing data files (if they exist) or to save generated data files. Files should be named mirchi2018_fc.npy and mirchi2018_panas.csv for the functional connectivity and behavioral data, respectively. Returns ------- X : (73, 198135) numpy.ndarray Functional connections from MyConnectome rsfMRI time series data Y : (73, 13) numpy.ndarray PANAS subscales from MyConnectome behavioral data """ data_dir = _get_data_dir(data_dir=data_dir) / "ds-mirchi2018" data_dir.mkdir(exist_ok=True, parents=True) X_fname = data_dir / "myconnectome_fc.npy" Y_fname = data_dir / "myconnectome_panas.csv" if not os.path.exists(X_fname): X = _get_fc(verbose=verbose) np.save(X_fname, X, allow_pickle=False) else: X = np.load(X_fname, allow_pickle=False) if not os.path.exists(Y_fname): Y = _get_panas() np.savetxt( Y_fname, np.column_stack(list(Y.values())), header=",".join(Y.keys()), delimiter=",", fmt="%i", ) # convert dictionary to structured array before returning Y = np.array( [tuple(row) for row in np.column_stack(list(Y.values()))], dtype=dict(names=list(Y.keys()), formats=["i8"] * len(Y)), ) else: Y = np.genfromtxt(Y_fname, delimiter=",", names=True, dtype=int) return X, Y
[docs] def fetch_hansen_manynetworks(force=False, data_dir=None, verbose=1): """ Download files from Hansen et al., 2023, PLOS Biology. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Justine Y Hansen, Golia Shafiei, Katharina Voigt, Emma X Liang, Sylvia ML Cox, Marco Leyton, Sharna D Jamadar, and Bratislav Misic. Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease. PLoS biology, 21(9):e3002314, 2023. """ dataset_name = "ds-hansen_manynetworks" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) # _parc = { # "cammoun033": "Cammoun033", # "schaefer100": "Schaefer100", # "schaefer400": "Schaefer400", # } # data = { # parc: { # "bigbrain": fetched # / f"data/{_parc[parc]}/bigbrain_intensities.csv", # "cognitive": fetched # / f"data/{_parc[parc]}/cognitive_similarity.npy", # "sc": fetched / f"data/{_parc[parc]}/consensusSC.npy", # "sc_wei": fetched / f"data/{_parc[parc]}/consensusSC_wei.npy", # "ephys": fetched # / f"data/{_parc[parc]}/electrophysiological_connectivity.npy", # "gene": fetched / f"data/{_parc[parc]}/gene_coexpression.npy", # "megfc": fetched # / f"data/{_parc[parc]}/groupFCmeg_aec_orth_{_parc[parc]}.npy.npz", # "haemodynamic": fetched # / f"data/{_parc[parc]}/haemodynamic_connectivity.npy", # "laminar": fetched / f"data/{_parc[parc]}/laminar_similarity.npy", # "metabolic": fetched # / f"data/{_parc[parc]}/metabolic_connectivity.npy", # "receptor": fetched # / f"data/{_parc[parc]}/receptor_similarity.npy", # "temporal": fetched # / f"data/{_parc[parc]}/temporal_similarity.npy", # "voneconomo": fetched # / f"data/{_parc[parc]}/voneconomo_{_parc[parc]}.csv", # } # for parc in ["cammoun033", "schaefer100", "schaefer400"] # } return fetched
[docs] def fetch_hansen_receptors(force=False, data_dir=None, verbose=1): """ Download files from Hansen et al., 2022, Nature Neuroscience. This dataset contains If you used this data, please cite [1]_. .. important:: If you use this dataset, please also cite the original data sources. See the original publication or `neuromaps <https://netneurolab.github.io/neuromaps/listofmaps.html>`_ for more information. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Justine Y Hansen, Golia Shafiei, Ross D Markello, Kelly Smart, Sylvia ML Cox, Martin N\u00f8rgaard, Vincent Beliveau, Yanjun Wu, Jean-Dominique Gallezot, \u00c9tienne Aumont, and others. Mapping neurotransmitter systems to the structural and functional organization of the human neocortex. Nature neuroscience, 25(11):1569\u20131581, 2022. """ dataset_name = "ds-hansen_receptors" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_hansen_genescognition(force=False, data_dir=None, verbose=1): """Download files from Hansen et al., 2021, Nature Human Behaviour. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Justine Y Hansen, Ross D Markello, Jacob W Vogel, Jakob Seidlitz, Danilo Bzdok, and Bratislav Misic. Mapping gene transcription and neurocognition across human neocortex. Nature Human Behaviour, 5(9):1240\u20131250, 2021. """ dataset_name = "ds-hansen_genescognition" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_hansen_brainstemfc(force=False, data_dir=None, verbose=1): """Download files from Hansen et al., 2024. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Justine Y Hansen, Simone Cauzzo, Kavita Singh, Mar\u00eda Guadalupe Garc\u00eda-Gomar, James M Shine, Marta Bianciardi, and Bratislav Misic. Integrating brainstem and cortical functional architectures. Nature Neuroscience, pages 1\u201312, 2024. """ dataset_name = "ds-hansen_brainstemfc" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_shafiei_megfmrimapping(force=False, data_dir=None, verbose=1): """Download files from Shafiei et al., 2022. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Golia Shafiei, Sylvain Baillet, and Bratislav Misic. Human electromagnetic and haemodynamic networks systematically converge in unimodal cortex and diverge in transmodal cortex. PLoS biology, 20(8):e3001735, 2022. """ dataset_name = "ds-shafiei_megfmrimapping" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_shafiei_megdynamics(force=False, data_dir=None, verbose=1): """Download files from Shafiei et al., 2023. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Golia Shafiei, Ben D Fulcher, Bradley Voytek, Theodore D Satterthwaite, Sylvain Baillet, and Bratislav Misic. Neurophysiological signatures of cortical micro-architecture. Nature communications, 14(1):6000, 2023. """ dataset_name = "ds-shafiei_megdynamics" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_suarez_mami(force=False, data_dir=None, verbose=1): """Download files from Suarez et al., 2022, eLife. This dataset contains If you used this data, please cite [1]_ and [2]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Laura E Suarez, Yossi Yovel, Martijn P van den Heuvel, Olaf Sporns, Yaniv Assaf, Guillaume Lajoie, and Bratislav Misic. A connectomics-based taxonomy of mammals. Elife, 11:e78635, 2022. .. [2] Yaniv Assaf, Arieli Bouznach, Omri Zomet, Assaf Marom, and Yossi Yovel. Conservation of brain connectivity and wiring across the mammalian class. Nature Neuroscience, 23(7):805\u2013808, 2020. """ dataset_name = "ds-suarez_mami" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_bazinet_assortativity(force=False, data_dir=None, verbose=1): """ Download files from Bazinet et al., 2023, Nature Communications. This dataset contains If you used this data, please cite [1]_. Returns ------- filenames : :class:`sklearn.utils.Bunch` Dictionary-like object with fetched data. Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Vincent Bazinet, Justine Y Hansen, Reinder Vos de Wael, Boris C Bernhardt, Martijn P van den Heuvel, and Bratislav Misic. Assortative mixing in micro-architecturally annotated brain connectomes. Nature Communications, 14(1):2850, 2023." """ dataset_name = "ds-bazinet_assortativity" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) return fetched
[docs] def fetch_famous_gmat(dataset, force=False, data_dir=None, verbose=1): """ Download files from multi-species connectomes. This dataset contains If you used this data, please cite celegans [1]_, drosophila [2]_, human [3]_, macaque_markov [4]_, macaque_modha [5]_, mouse [6]_, rat [7]_. Parameters ---------- dataset : str Specifies which dataset to download. Returns ------- data : :class:`sklearn.utils.Bunch` Dictionary-like object with, at a minimum, keys ['conn', 'labels', 'ref'] providing connectivity / correlation matrix, region labels, and relevant reference. Other possible keys include 'dist' (an array of Euclidean distances between regions of 'conn'), 'coords' (an array of xyz coordinates for regions of 'conn'), 'acronyms' (an array of acronyms for regions of 'conn'), and 'networks' (an array of network affiliations for regions of 'conn'). Other Parameters ---------------- force : bool, optional If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 References ---------- .. [1] Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri B Chklovskii. Structural properties of the caenorhabditis elegans neuronal network. PLoS computational biology, 7(2):e1001066, 2011. .. [2] Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of brain-wide wiring networks in drosophila at single-cell resolution. Current biology, 21(1):1\u201311, 2011. .. [3] Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. Structural and functional connectome from 70 young healthy adults [data set]. Zenodo, 2019. .. [4] Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range connections on the specificity of the macaque interareal cortical network. Proceedings of the National Academy of Sciences, 110(13):5187\u20135192, 2013. .. [5] Dharmendra S Modha and Raghavendra Singh. Network architecture of the long-distance pathways in the macaque brain. Proceedings of the National Academy of Sciences, 107(30):13485\u201313490, 2010. .. [6] Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. Wiring cost and topological participation of the mouse brain connectome. Proceedings of the National Academy of Sciences, 112(32):10032\u201310037, 2015. .. [7] Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the cerebral cortical association connectome underlying cognition. Proceedings of the National Academy of Sciences, 112(16):E2093\u2013E2101, 2015. """ available_connectomes = [ "celegans", "drosophila", "human_func_scale033", "human_func_scale060", "human_func_scale125", "human_func_scale250", "human_func_scale500", "human_struct_scale033", "human_struct_scale060", "human_struct_scale125", "human_struct_scale250", "human_struct_scale500", "macaque_markov", "macaque_modha", "mouse", "rat", ] if dataset not in available_connectomes: raise ValueError( f"Provided dataset {dataset} not available; " f"must be one of {available_connectomes}" ) base_dataset_name = "ds-famous_gmat" _get_reference_info(base_dataset_name, verbose=verbose) fetched = fetch_file( base_dataset_name, force=force, data_dir=data_dir, verbose=verbose ) data = {} for f in (fetched / dataset).glob("*.csv"): try: data[f.stem] = np.loadtxt(f, delimiter=",") except ValueError: data[f.stem] = np.loadtxt(f, delimiter=",", dtype=str) return Bunch(**data)
[docs] def fetch_neurosynth(): """Download Neurosynth data.""" pass