7.1 Two-sample testing for networks

7.1 Two-sample testing for networks#

mode = "svg"

import matplotlib

font = {'family' : 'Dejavu Sans',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

import matplotlib
from matplotlib import pyplot as plt

import warnings; warnings.filterwarnings("ignore")

from graspologic.simulations import sbm
import numpy as np
from graphbook_code import dcsbm, generate_dcsbm_pmtx, \
                           generate_sbm_pmtx

n = 150  # the number of nodes
# human brains have homophilic block structure
Bhum = np.array([[0.2, 0.02], [0.02, 0.2]])
# alien brains add degree-correction
theta_alien = np.tile(np.linspace(1.5, 0.5, n // 2), 2)

# generate human and alien brain network
np.random.seed(0)
A_human, z = sbm([n // 2, n // 2], Bhum, return_labels=True)
A_alien = dcsbm(z, theta_alien, Bhum)

Phum = generate_sbm_pmtx(z, Bhum)
Palien = generate_dcsbm_pmtx(z, theta_alien, Bhum)

from graphbook_code import heatmap
import os
fig, axs = plt.subplots(2, 2, figsize=(10, 10))

heatmap(Phum, legend_title="Probability", ax=axs[0][0],
       xtitle="Node", inner_hier_labels=z + 1, vmin=0, vmax=1,
       title="(A) $P^{(1)}$")
heatmap(A_human.astype(int), ax=axs[0][1],
       xtitle="Node", inner_hier_labels=z + 1, vmin=0, vmax=1,
       title="(A) $A^{(1)}$, human network")

heatmap(Palien, legend_title="Probability", ax=axs[1][0],
       xtitle="Node", inner_hier_labels=z + 1, vmin=0, vmax=1,
       title="(A) $P^{(2)}$")
heatmap(A_alien.astype(int), ax=axs[1][1],
       xtitle="Node", inner_hier_labels=z + 1, vmin=0, vmax=1,
       title="(A) $A^{(2)}$, alien network")

fig.tight_layout()

fname = "ts_ex"
os.makedirs("Figures", exist_ok=True)
if mode != "png":
    os.makedirs(f"Figures/{mode:s}", exist_ok=True)
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

os.makedirs("Figures/png", exist_ok=True)
fig.savefig(f"Figures/png/{fname:s}.png")

../../_images/8464a97dddf3b320fdacf3d9c18c8996330d7197aacee5bc3852afafbd5ce417.png

from scipy.linalg import orthogonal_procrustes
from graspologic.embed import AdjacencySpectralEmbed as ase

d = 2
# estimate latent positions for alien and human networks
Xhat_human = ase(n_components=d).fit_transform(A_human)
Xhat_alien = ase(n_components=d).fit_transform(A_alien)
# estimate best possible rotation of Xhat_alien to Xhat_human by 
# solving orthogonal procrustes problem
W = orthogonal_procrustes(Xhat_alien, Xhat_human)[0]
observed_norm = np.linalg.norm(Xhat_human - Xhat_alien @ W, ord="fro")

from graspologic.simulations import rdpg

def generate_synthetic_networks(X):
    """
    A function which generates two synthetic networks with
    same latent position matrix X.
    """
    A1 = rdpg(X, directed=False, loops=False)
    A2 = rdpg(X, directed=False, loops=False)
    return A1, A2

Ap, App = generate_synthetic_networks(Xhat_human)

def compute_latent(A, d):
    """
    A function which returns the latent position estimate
    for an adjacency matrix A.
    """
    return ase(n_components=d).fit_transform(A)

Xhat_p = compute_latent(Ap, d)
Xhat_pp = compute_latent(App, d)

def compute_norm_orth_proc(A, B):
    """
    A function which finds the best rotation of B onto A,
    and then computes and returns the norm.
    """
    R = orthogonal_procrustes(B, A)[0]
    return np.linalg.norm(A - B @ R)

norm_null = compute_norm_orth_proc(Xhat_p, Xhat_pp)

def parametric_resample(A1, A2, d, nreps=100):
    """
    A function to generate samples of the null distribution under H0
    using parametric resampling.
    """
    null_norms = np.zeros(nreps)
    Xhat1 = compute_latent(A1, d)
    for i in range(0, nreps):
        Ap, App = generate_synthetic_networks(Xhat1)
        Xhat_p = compute_latent(Ap, d)
        Xhat_pp = compute_latent(App, d)
        null_norms[i] = compute_norm_orth_proc(Xhat_p, Xhat_pp)
    return null_norms

nreps = 1000
null_norms = parametric_resample(A_alien, A_human, 2, nreps=nreps)

pval = ((null_norms >= observed_norm).sum() + 1)/(nreps + 1)
print(f"estimate of p-value: {pval:.5f}")
# estimate of p-value: 0.00100

estimate of p-value: 0.00100

from graspologic.inference import latent_position_test

nreps = 1000 # the number of null replicates
lpt = latent_position_test(A_human, A_alien, n_bootstraps = nreps, n_components=d, workers=-1)
print("estimate of p-value: {:.5f}".format(lpt[1]))
# estimate of p-value: 0.00100

estimate of p-value: 0.00100

# generate a new human brain network with same block matrix
A_human2 = sbm([n // 2, n // 2], Bhum)

lpt_hum2hum = latent_position_test(A_human, A_human2, n_bootstraps=nreps, n_components=d, workers=-1)
print("estimate of p-value: {:.5f}".format(lpt_hum2hum[1]))
# estimate of p-value: 0.84715

estimate of p-value: 0.73926

import seaborn as sns

fig, axs = plt.subplots(1, 3, figsize=(21, 5))
sns.histplot(null_norms, ax=axs[0], stat="probability", bins=15, color="black", label="Null")
axs[0].set_xlabel("Statistic")
axs[0].set_ylabel("Fraction of replicates")
axs[0].set_title("(A) Latent position test", loc="left", fontsize=20)
axs[0].axvline(x=observed_norm, color="#888888", label="Observed")
axs[0].legend(title="Statistic")
axs[0].annotate("$\\| \\hat X^{(1)} - \\hat X^{(2)}\\hat W\\|_F$", (observed_norm + .03, 0.1), color="#888888", fontsize=15)
axs[0].set_xlim([0.5, observed_norm + 1.2])
axs[0].set_yticks([0, 0.05, 0.1, 0.15])
axs[0].annotate("$\\| \\hat X^{\\prime} - \\hat X^{\\prime \\prime}\\hat W''\\|_F$", (0.7, 0.1), color="#000000", fontsize=15)

Text(0.7, 0.1, "$\\| \\hat X^{\\prime} - \\hat X^{\\prime \\prime}\\hat W''\\|_F$")

../../_images/0e3f6f9c8a8d4b0e4881be72482d84d8354f7e67623b1de6aa3c17bf6adca51e.png

ncoins = 300  # the number of coins in each container

# the probabilities from container 1 landing on heads
# with a much larger variance
pi1 = np.random.beta(a=4, b=4, size=ncoins)

# the probabilities of container 2 landing on heads,
# with a much smaller variance
pi2 = np.random.beta(a=15, b=15, size=ncoins)

import pandas as pd

# Create common bins
bins = np.linspace(0, 1, 20)
# Plot histograms
sns.histplot(pi1, color="black", label="1", kde=False, bins=bins, ax=axs[1])
sns.histplot(pi2, color="grey", label="2", kde=False, bins=bins, ax=axs[1])

# Add legend
axs[1].legend(title="Manufacturer")
axs[1].set_xlim([0, 1])

# Add titles and labels
axs[1].set_title('(B) Distribution of Coin Probabilities', loc="left", fontsize=20)
axs[1].set_xlabel('Probability of Heads')

fig

../../_images/50dde2c8877cfa3242bdd9c9601e3ca4d2e0dba62db725d1e1dd228ff1b6c2fd.png

from graspologic.inference import latent_distribution_test

nreps = 1000
approach = 'mgc'  # the strategy for the latent distribution test
ldt_dcorr = latent_distribution_test(A_human, A_alien, test=approach, metric="euclidean", n_bootstraps=nreps, workers=-1)
print("estimate of p-value: {:.5f}".format(ldt_dcorr.pvalue))

estimate of p-value: 0.00100

sns.histplot(ldt_dcorr.misc_dict["null_distribution"], color="black", ax=axs[2], stat="probability", label="Null")
axs[2].axvline(ldt_dcorr.stat, color="#888888", label="Observed")
axs[2].legend(title="Statistic")
axs[2].set_title("(C) Latent distribution test")
fig.tight_layout()

fname = "ts_ldt_ex"
if mode != "png":
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

fig.savefig(f"Figures/png/{fname:s}.png")

fig

../../_images/80897f0f9b245855719492135170503328994439beb260f6be6b6d2f21a48dd9.png