2.2 Get the Data

2.2 Get the Data#

mode = "svg"  # output format for figs

import matplotlib

font = {'family' : 'Dejavu Sans',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

import matplotlib
from matplotlib import pyplot as plt
import os
import urllib
import boto3
from botocore import UNSIGNED
from botocore.client import Config
from graspologic.utils import import_edgelist
import numpy as np
import glob
from tqdm import tqdm

# the AWS bucket the data is stored in
BUCKET_ROOT = "open-neurodata"
parcellation = "Schaefer400"
FMRI_PREFIX = "m2g/Functional/BNU1-11-12-20-m2g-func/Connectomes/" + parcellation + "_space-MNI152NLin6_res-2x2x2.nii.gz/"
FMRI_PATH = os.path.join("datasets", "fmri")  # the output folder
DS_KEY = "abs_edgelist"  # correlation matrices for the networks to exclude

def fetch_fmri_data(bucket=BUCKET_ROOT, fmri_prefix=FMRI_PREFIX,
                    output=FMRI_PATH, name=DS_KEY):
    """
    A function to fetch fMRI connectomes from AWS S3.
    """
    # check that output directory exists
    if not os.path.isdir(FMRI_PATH):
        os.makedirs(FMRI_PATH)
    # start boto3 session anonymously
    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
    # obtain the filenames
    bucket_conts = s3.list_objects(Bucket=bucket, 
                    Prefix=fmri_prefix)["Contents"]
    for s3_key in tqdm(bucket_conts):
        # get the filename
        s3_object = s3_key['Key']
        # verify that we are grabbing the right file
        if name not in s3_object:
            op_fname = os.path.join(FMRI_PATH, str(s3_object.split('/')[-1]))
            if not os.path.exists(op_fname):
                s3.download_file(bucket, s3_object, op_fname)

def read_fmri_data(path=FMRI_PATH):
    """
    A function which loads the connectomes as adjacency matrices.
    """
    fnames = glob.glob(os.path.join(path, "*.csv"))
    fnames.sort()
    # import edgelists with graspologic
    # edgelists will be all of the files that end in a csv
    networks = [import_edgelist(fname) for fname in tqdm(fnames)]
    return np.stack(networks, axis=0)
fetch_fmri_data()
As = read_fmri_data()
  0%|          | 0/212 [00:00<?, ?it/s]
  1%|          | 2/212 [00:00<00:19, 10.59it/s]
  2%|▏         | 4/212 [00:00<00:18, 11.16it/s]
  3%|▎         | 6/212 [00:00<00:16, 12.33it/s]
  4%|▍         | 8/212 [00:00<00:15, 13.03it/s]
  5%|▍         | 10/212 [00:00<00:14, 13.56it/s]
  6%|▌         | 12/212 [00:00<00:14, 13.48it/s]
  7%|▋         | 14/212 [00:01<00:14, 13.69it/s]
  8%|▊         | 16/212 [00:01<00:16, 12.18it/s]
  8%|▊         | 18/212 [00:01<00:15, 12.85it/s]
  9%|▉         | 20/212 [00:01<00:15, 12.34it/s]
 10%|█         | 22/212 [00:01<00:15, 12.65it/s]
 11%|█▏        | 24/212 [00:01<00:14, 13.26it/s]
 12%|█▏        | 26/212 [00:02<00:13, 13.33it/s]
 13%|█▎        | 28/212 [00:02<00:13, 13.57it/s]
 14%|█▍        | 30/212 [00:02<00:13, 13.42it/s]
 15%|█▌        | 32/212 [00:02<00:13, 13.31it/s]
 16%|█▌        | 34/212 [00:02<00:14, 12.38it/s]
 17%|█▋        | 36/212 [00:02<00:14, 12.28it/s]
 18%|█▊        | 38/212 [00:02<00:13, 12.45it/s]
 19%|█▉        | 40/212 [00:03<00:13, 12.72it/s]
 20%|█▉        | 42/212 [00:03<00:12, 13.08it/s]
 21%|██        | 44/212 [00:03<00:12, 13.69it/s]
 22%|██▏       | 46/212 [00:03<00:11, 14.37it/s]
 23%|██▎       | 48/212 [00:03<00:11, 14.55it/s]
 24%|██▎       | 50/212 [00:03<00:10, 14.90it/s]
 25%|██▍       | 52/212 [00:03<00:10, 15.03it/s]
 25%|██▌       | 54/212 [00:04<00:12, 12.77it/s]
 26%|██▋       | 56/212 [00:04<00:11, 13.23it/s]
 27%|██▋       | 58/212 [00:04<00:12, 11.88it/s]
 28%|██▊       | 60/212 [00:04<00:12, 12.30it/s]
 29%|██▉       | 62/212 [00:04<00:11, 12.56it/s]
 30%|███       | 64/212 [00:04<00:11, 12.87it/s]
 31%|███       | 66/212 [00:05<00:10, 13.40it/s]
 32%|███▏      | 68/212 [00:05<00:10, 13.49it/s]
 33%|███▎      | 70/212 [00:05<00:10, 13.22it/s]
 34%|███▍      | 72/212 [00:05<00:10, 12.83it/s]
 35%|███▍      | 74/212 [00:05<00:10, 12.99it/s]
 36%|███▌      | 76/212 [00:05<00:10, 12.58it/s]
 37%|███▋      | 78/212 [00:06<00:11, 12.08it/s]
 38%|███▊      | 80/212 [00:06<00:11, 11.75it/s]
 39%|███▊      | 82/212 [00:06<00:10, 11.92it/s]
 40%|███▉      | 84/212 [00:06<00:10, 12.34it/s]
 41%|████      | 86/212 [00:06<00:09, 12.77it/s]
 42%|████▏     | 88/212 [00:06<00:10, 11.71it/s]
 42%|████▏     | 90/212 [00:07<00:09, 12.20it/s]
 43%|████▎     | 92/212 [00:07<00:09, 12.94it/s]
 44%|████▍     | 94/212 [00:07<00:09, 12.45it/s]
 45%|████▌     | 96/212 [00:07<00:08, 13.18it/s]
 46%|████▌     | 98/212 [00:07<00:08, 13.13it/s]
 47%|████▋     | 100/212 [00:07<00:11,  9.67it/s]
 48%|████▊     | 102/212 [00:08<00:10, 10.41it/s]
 49%|████▉     | 104/212 [00:08<00:09, 10.88it/s]
 50%|█████     | 106/212 [00:08<00:09, 10.99it/s]
 51%|█████     | 108/212 [00:08<00:09, 11.16it/s]
 52%|█████▏    | 110/212 [00:08<00:08, 11.63it/s]
 53%|█████▎    | 112/212 [00:08<00:09, 10.99it/s]
 54%|█████▍    | 114/212 [00:09<00:08, 11.26it/s]
 55%|█████▍    | 116/212 [00:09<00:11,  8.48it/s]
 56%|█████▌    | 118/212 [00:09<00:09,  9.54it/s]
 57%|█████▋    | 120/212 [00:09<00:08, 10.44it/s]
 58%|█████▊    | 122/212 [00:09<00:08, 11.13it/s]
 58%|█████▊    | 124/212 [00:10<00:07, 11.81it/s]
 59%|█████▉    | 126/212 [00:10<00:06, 12.50it/s]
 60%|██████    | 128/212 [00:10<00:06, 12.75it/s]
 61%|██████▏   | 130/212 [00:10<00:06, 12.94it/s]
 62%|██████▏   | 132/212 [00:10<00:06, 12.60it/s]
 63%|██████▎   | 134/212 [00:10<00:06, 12.92it/s]
 64%|██████▍   | 136/212 [00:11<00:05, 13.00it/s]
 65%|██████▌   | 138/212 [00:11<00:05, 13.49it/s]
 66%|██████▌   | 140/212 [00:11<00:05, 13.77it/s]
 67%|██████▋   | 142/212 [00:11<00:05, 13.25it/s]
 68%|██████▊   | 144/212 [00:11<00:04, 14.02it/s]
 69%|██████▉   | 146/212 [00:11<00:05, 13.19it/s]
 70%|██████▉   | 148/212 [00:11<00:04, 13.91it/s]
 71%|███████   | 150/212 [00:12<00:04, 13.98it/s]
 72%|███████▏  | 152/212 [00:12<00:04, 14.15it/s]
 73%|███████▎  | 154/212 [00:12<00:04, 12.36it/s]
 74%|███████▎  | 156/212 [00:12<00:04, 12.67it/s]
 75%|███████▍  | 158/212 [00:12<00:04, 13.03it/s]
 75%|███████▌  | 160/212 [00:12<00:03, 13.22it/s]
 76%|███████▋  | 162/212 [00:12<00:03, 13.44it/s]
 77%|███████▋  | 164/212 [00:13<00:03, 12.93it/s]
 78%|███████▊  | 166/212 [00:13<00:03, 12.99it/s]
 79%|███████▉  | 168/212 [00:13<00:03, 12.88it/s]
 80%|████████  | 170/212 [00:13<00:03, 13.05it/s]
 81%|████████  | 172/212 [00:13<00:03, 13.10it/s]
 82%|████████▏ | 174/212 [00:13<00:02, 12.84it/s]
 83%|████████▎ | 176/212 [00:14<00:02, 13.39it/s]
 84%|████████▍ | 178/212 [00:14<00:02, 11.71it/s]
 85%|████████▍ | 180/212 [00:14<00:02, 11.47it/s]
 86%|████████▌ | 182/212 [00:14<00:02, 11.91it/s]
 87%|████████▋ | 184/212 [00:14<00:02, 13.23it/s]
 88%|████████▊ | 186/212 [00:14<00:01, 13.33it/s]
 89%|████████▊ | 188/212 [00:14<00:01, 13.53it/s]
 90%|████████▉ | 190/212 [00:15<00:01, 12.76it/s]
 91%|█████████ | 192/212 [00:15<00:01, 12.83it/s]
 92%|█████████▏| 194/212 [00:15<00:01, 13.42it/s]
 92%|█████████▏| 196/212 [00:15<00:01, 13.12it/s]
 93%|█████████▎| 198/212 [00:15<00:01, 13.14it/s]
 94%|█████████▍| 200/212 [00:16<00:01,  9.50it/s]
 95%|█████████▌| 202/212 [00:16<00:01, 10.00it/s]
 96%|█████████▌| 204/212 [00:16<00:00, 10.60it/s]
 97%|█████████▋| 206/212 [00:16<00:00, 11.28it/s]
 98%|█████████▊| 208/212 [00:16<00:00, 12.01it/s]
 99%|█████████▉| 210/212 [00:16<00:00, 12.37it/s]
100%|██████████| 212/212 [00:17<00:00, 12.91it/s]
100%|██████████| 212/212 [00:17<00:00, 12.46it/s]

  0%|          | 0/106 [00:00<?, ?it/s]
  1%|          | 1/106 [00:00<00:32,  3.25it/s]
  2%|▏         | 2/106 [00:00<00:32,  3.25it/s]
  3%|▎         | 3/106 [00:00<00:31,  3.27it/s]
  4%|▍         | 4/106 [00:01<00:31,  3.25it/s]
  5%|▍         | 5/106 [00:01<00:31,  3.24it/s]
  6%|▌         | 6/106 [00:01<00:30,  3.25it/s]
  7%|▋         | 7/106 [00:02<00:30,  3.27it/s]
  8%|▊         | 8/106 [00:02<00:30,  3.25it/s]
  8%|▊         | 9/106 [00:02<00:29,  3.25it/s]
  9%|▉         | 10/106 [00:03<00:29,  3.26it/s]
 10%|█         | 11/106 [00:03<00:29,  3.27it/s]
 11%|█▏        | 12/106 [00:03<00:28,  3.26it/s]
 12%|█▏        | 13/106 [00:03<00:28,  3.27it/s]
 13%|█▎        | 14/106 [00:04<00:28,  3.27it/s]
 14%|█▍        | 15/106 [00:04<00:27,  3.28it/s]
 15%|█▌        | 16/106 [00:04<00:27,  3.27it/s]
 16%|█▌        | 17/106 [00:05<00:27,  3.29it/s]
 17%|█▋        | 18/106 [00:05<00:26,  3.29it/s]
 18%|█▊        | 19/106 [00:05<00:26,  3.30it/s]
 19%|█▉        | 20/106 [00:06<00:25,  3.31it/s]
 20%|█▉        | 21/106 [00:06<00:25,  3.31it/s]
 21%|██        | 22/106 [00:06<00:25,  3.31it/s]
 22%|██▏       | 23/106 [00:07<00:25,  3.31it/s]
 23%|██▎       | 24/106 [00:07<00:24,  3.30it/s]
 24%|██▎       | 25/106 [00:07<00:24,  3.32it/s]
 25%|██▍       | 26/106 [00:07<00:24,  3.33it/s]
 25%|██▌       | 27/106 [00:08<00:23,  3.33it/s]
 26%|██▋       | 28/106 [00:08<00:23,  3.33it/s]
 27%|██▋       | 29/106 [00:08<00:23,  3.33it/s]
 28%|██▊       | 30/106 [00:09<00:22,  3.34it/s]
 29%|██▉       | 31/106 [00:09<00:22,  3.33it/s]
 30%|███       | 32/106 [00:09<00:22,  3.32it/s]
 31%|███       | 33/106 [00:10<00:21,  3.32it/s]
 32%|███▏      | 34/106 [00:10<00:21,  3.32it/s]
 33%|███▎      | 35/106 [00:10<00:21,  3.32it/s]
 34%|███▍      | 36/106 [00:10<00:21,  3.32it/s]
 35%|███▍      | 37/106 [00:11<00:20,  3.32it/s]
 36%|███▌      | 38/106 [00:11<00:20,  3.32it/s]
 37%|███▋      | 39/106 [00:11<00:20,  3.32it/s]
 38%|███▊      | 40/106 [00:12<00:19,  3.33it/s]
 39%|███▊      | 41/106 [00:12<00:19,  3.33it/s]
 40%|███▉      | 42/106 [00:12<00:19,  3.33it/s]
 41%|████      | 43/106 [00:13<00:18,  3.33it/s]
 42%|████▏     | 44/106 [00:13<00:18,  3.32it/s]
 42%|████▏     | 45/106 [00:13<00:18,  3.31it/s]
 43%|████▎     | 46/106 [00:13<00:18,  3.31it/s]
 44%|████▍     | 47/106 [00:14<00:17,  3.31it/s]
 45%|████▌     | 48/106 [00:14<00:17,  3.30it/s]
 46%|████▌     | 49/106 [00:14<00:17,  3.32it/s]
 47%|████▋     | 50/106 [00:15<00:16,  3.32it/s]
 48%|████▊     | 51/106 [00:15<00:16,  3.32it/s]
 49%|████▉     | 52/106 [00:15<00:16,  3.32it/s]
 50%|█████     | 53/106 [00:16<00:15,  3.32it/s]
 51%|█████     | 54/106 [00:16<00:15,  3.32it/s]
 52%|█████▏    | 55/106 [00:16<00:15,  3.32it/s]
 53%|█████▎    | 56/106 [00:16<00:15,  3.32it/s]
 54%|█████▍    | 57/106 [00:17<00:14,  3.29it/s]
 55%|█████▍    | 58/106 [00:17<00:14,  3.28it/s]
 56%|█████▌    | 59/106 [00:17<00:14,  3.27it/s]
 57%|█████▋    | 60/106 [00:18<00:14,  3.26it/s]
 58%|█████▊    | 61/106 [00:18<00:13,  3.25it/s]
 58%|█████▊    | 62/106 [00:18<00:13,  3.23it/s]
 59%|█████▉    | 63/106 [00:19<00:13,  3.25it/s]
 60%|██████    | 64/106 [00:19<00:12,  3.26it/s]
 61%|██████▏   | 65/106 [00:19<00:12,  3.25it/s]
 62%|██████▏   | 66/106 [00:20<00:12,  3.25it/s]
 63%|██████▎   | 67/106 [00:20<00:11,  3.26it/s]
 64%|██████▍   | 68/106 [00:20<00:11,  3.26it/s]
 65%|██████▌   | 69/106 [00:20<00:11,  3.27it/s]
 66%|██████▌   | 70/106 [00:21<00:11,  3.27it/s]
 67%|██████▋   | 71/106 [00:21<00:10,  3.28it/s]
 68%|██████▊   | 72/106 [00:21<00:10,  3.28it/s]
 69%|██████▉   | 73/106 [00:22<00:10,  3.29it/s]
 70%|██████▉   | 74/106 [00:22<00:09,  3.28it/s]
 71%|███████   | 75/106 [00:22<00:09,  3.26it/s]
 72%|███████▏  | 76/106 [00:23<00:09,  3.27it/s]
 73%|███████▎  | 77/106 [00:23<00:08,  3.26it/s]
 74%|███████▎  | 78/106 [00:23<00:08,  3.26it/s]
 75%|███████▍  | 79/106 [00:24<00:08,  3.26it/s]
 75%|███████▌  | 80/106 [00:24<00:07,  3.27it/s]
 76%|███████▋  | 81/106 [00:24<00:07,  3.28it/s]
 77%|███████▋  | 82/106 [00:24<00:07,  3.27it/s]
 78%|███████▊  | 83/106 [00:25<00:07,  3.28it/s]
 79%|███████▉  | 84/106 [00:25<00:06,  3.28it/s]
 80%|████████  | 85/106 [00:25<00:06,  3.28it/s]
 81%|████████  | 86/106 [00:26<00:06,  3.29it/s]
 82%|████████▏ | 87/106 [00:26<00:05,  3.29it/s]
 83%|████████▎ | 88/106 [00:26<00:05,  3.29it/s]
 84%|████████▍ | 89/106 [00:27<00:05,  3.28it/s]
 85%|████████▍ | 90/106 [00:27<00:04,  3.28it/s]
 86%|████████▌ | 91/106 [00:27<00:04,  3.28it/s]
 87%|████████▋ | 92/106 [00:27<00:04,  3.27it/s]
 88%|████████▊ | 93/106 [00:28<00:03,  3.27it/s]
 89%|████████▊ | 94/106 [00:28<00:03,  3.25it/s]
 90%|████████▉ | 95/106 [00:28<00:03,  3.26it/s]
 91%|█████████ | 96/106 [00:29<00:03,  3.27it/s]
 92%|█████████▏| 97/106 [00:29<00:02,  3.27it/s]
 92%|█████████▏| 98/106 [00:29<00:02,  3.27it/s]
 93%|█████████▎| 99/106 [00:30<00:02,  3.27it/s]
 94%|█████████▍| 100/106 [00:30<00:01,  3.28it/s]
 95%|█████████▌| 101/106 [00:30<00:01,  3.28it/s]
 96%|█████████▌| 102/106 [00:31<00:01,  3.28it/s]
 97%|█████████▋| 103/106 [00:31<00:00,  3.28it/s]
 98%|█████████▊| 104/106 [00:31<00:00,  3.28it/s]
 99%|█████████▉| 105/106 [00:31<00:00,  3.29it/s]
100%|██████████| 106/106 [00:32<00:00,  3.29it/s]
100%|██████████| 106/106 [00:32<00:00,  3.29it/s]

from graphbook_code import heatmap
from matplotlib import pyplot as plt


A = As[0]

fig, axs = plt.subplots(1, 2, gridspec_kw={"width_ratios": [1.4, 2]}, figsize=(16, 5.5))
heatmap(A, vmin=-1, vmax=1, ax=axs[0], xticks=[0, 199, 399], xticklabels=[1, 200, 400],
        yticks=[0, 199, 399], yticklabels=[1, 200, 400], ytitle="Brain Area", xtitle="Brain Area",
        title="(A) Heatmap of Functional Connectome", shrink=0.6, legend_title="Edge weight")
import seaborn as sns

sns.histplot(A.flatten(), ax=axs[1], bins=50, color="gray")
axs[1].set_xlabel("Edge weight")
axs[1].set_title("(B) Histogram of functional connectome edge-weights", size=20, pad=15)

fig.tight_layout()

fname = "raw"
if mode != "png":
    os.makedirs(f"Figures/{mode:s}", exist_ok=True)
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

os.makedirs("Figures/png", exist_ok=True)
fig.savefig(f"Figures/png/{fname:s}.png")
../../_images/0c0bba7c3e2ee143bbfc645160585b617784f19af76b55f0cfe5d1b2fce9ab35.png