7.2 Two-sample testing for SBMs

7.2 Two-sample testing for SBMs#

mode = "svg"

import matplotlib

font = {'family' : 'Dejavu Sans',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

import matplotlib
from matplotlib import pyplot as plt
import numpy as np
from graspologic.simulations import sbm

ns = [45, 30, 25]  # number of exits

states = ["NY", "NJ", "PA"]
# z is a column vector indicating which state each exit is in
z = np.repeat(states, ns)

Bnight = np.array([[.3, .2, .05], [.2, .3, .05], [.05, .05, .3]])
Bday = Bnight*2  # day time block matrix is generally 50% more than night

# people tend to commute from New Jersey to New York during the day
# at anomalously high rates
Bday[0, 1] = .5; Bday[1,0] = .5

np.random.seed(0)
Anight = sbm(ns, Bnight)
Aday = sbm(ns, Bday)
from graphbook_code import heatmap
import os

fig, axs = plt.subplots(2, 2, figsize=(10, 10))

heatmap(Bday, legend_title="Probability", ax=axs[0][0],
       xtitle="State", xticklabels=states, yticklabels=states, vmin=0, vmax=1,
       title="(A) $B^{(1)}$ Day block matrix", annot=True)
heatmap(Aday.astype(int), ax=axs[1][0],
       xtitle="Exit", inner_hier_labels=z,
       title="(C) $A^{(1)}$, Day adjacency matrix", shrink=0.5)

heatmap(Bnight, legend_title="Probability", ax=axs[0][1],
       xtitle="State", vmin=0, vmax=1, xticklabels=states, yticklabels=states,
       title="(B) $B^{(2)}$ Night block matrix", annot=True)
heatmap(Anight.astype(int), ax=axs[1][1],
       xtitle="Exit", inner_hier_labels=z,
       title="(D) $A^{(2)}$, Night adjacency matrix", shrink=0.5)

fig.tight_layout()

os.makedirs("Figures", exist_ok=True)
fname = "twosamp_sbm_ex"
if mode != "png":
    os.makedirs(f"Figures/{mode:s}", exist_ok=True)
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

os.makedirs("Figures/png", exist_ok=True)
fig.savefig(f"Figures/png/{fname:s}.png")
../../_images/d011f413dfbfe7228e3c272489bd99a9b4d973d5b05c0841e791e29ad68f522a.png
from scipy.stats import fisher_exact

K = 3
Pvals = np.empty((K, K))
# fill matrix with NaNs
Pvals[:] = np.nan

# get the indices of the upper triangle of Aday
upper_tri_idx = np.triu_indices(Aday.shape[0], k=1)
# create a boolean array that is nxn
upper_tri_mask = np.zeros(Aday.shape, dtype=bool)
# set indices which correspond to the upper triangle to True
upper_tri_mask[upper_tri_idx] = True

for k in range(0, K):
    for l in range(k, K):
        comm_mask = np.outer(z == states[k], z == states[l])
        table = [[Aday[comm_mask & upper_tri_mask].sum(),
                 (Aday[comm_mask & upper_tri_mask] == 0).sum()],
                 [Anight[comm_mask & upper_tri_mask].sum(),
                 (Anight[comm_mask & upper_tri_mask] == 0).sum()]]
        Pvals[k,l] = fisher_exact(table)[1]
import numpy as np
from graspologic.simulations import er_np
import seaborn as sns
from scipy.stats import binomtest

ncoins = 5000 # the number of coins
p = 0.5  # the true probability
n = 500  # the number of flips

# the number of heads from each experiment
experiments = np.random.binomial(n, p, size=ncoins)

# perform binomial test to see if the number of heads we obtain supports that the
# true probabiily is 0.5
pvals = [binomtest(nheads_i, n, p=p).pvalue for nheads_i in experiments]
from statsmodels.stats.multitest import multipletests

alpha = 0.05  # the desired alpha of the test
_, adj_pvals, _, _ = multipletests(pvals, alpha=alpha, method="holm")
import seaborn as sns

fig, axs = plt.subplots(1, 2, figsize=(15, 5))

sns.histplot(pvals, stat="probability", bins=20, ax=axs[0], color="black")
axs[0].set_title("(A) Multiple comparisons problem")
axs[0].set_xlabel("p-value")
axs[0].set_ylabel("Fraction of tests")
axs[0].axvline(alpha, color="#888888")
axs[0].annotate("$\\alpha = 0.05$", color="#888888", xy=(alpha, .08))
axs[0].set_xlim([0, 1.05])

sns.histplot(adj_pvals, stat="probability", bins=20, ax=axs[1], color="black")
axs[1].set_title("(B) Multiple comparisons adjustments")
axs[1].set_xlabel("p-value")
axs[1].set_ylabel("Fraction of tests")
axs[1].axvline(alpha, color="#888888")
axs[1].annotate("$\\alpha = 0.05$", color="#888888", xy=(alpha, .7))
axs[1].set_xlim([0, 1.05])
axs[1].set_ylim([0, 1])

fig.tight_layout()

fname = "twosampsbm_mc"
if mode != "png":
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

fig.savefig(f"Figures/png/{fname:s}.png")
../../_images/8aef1fcb13cdaaa590949ff163b1bb31c0f8ffe759b319625c8d67eda8f0a516.png
from graspologic.utils import symmetrize

Pvals_adj = multipletests(Pvals.flatten(), method="holm")[1].reshape(K, K)
Pvals_adj = symmetrize(Pvals_adj, method="triu")
pval_dif = Pvals_adj.min()
print(f"p-value of block matrix difference: {pval_dif:.4f}")
# p-value of block matrix difference: 0.0000
p-value of block matrix difference: 0.0000
from graspologic.inference import group_connection_test

stat, pval_diff_rescale, misc = group_connection_test(Aday, Anight,
    labels1=z, labels2=z, density_adjustment=True)
Pval_adj_rescaled = np.array(misc["corrected_pvalues"])
print(f"p-value of block matrix difference, after rescaling: {pval_diff_rescale:.4f}")
# p-value of block matrix difference: 0.0000
p-value of block matrix difference, after rescaling: 0.0087
/opt/hostedtoolcache/Python/3.12.5/x64/lib/python3.12/site-packages/graspologic/inference/group_connection_test.py:362: UserWarning: This test assumes that the networks are directed, but one or both adjacency matrices are symmetric.
  warnings.warn(msg)
fig, axs = plt.subplots(1, 2, figsize=(10, 5))

heatmap(Pvals_adj, legend_title="$p$-value", ax=axs[0],
       xtitle="State", vmin=0, vmax=1,
       title="(A) Matrix of $p$-values", annot=True, fmt='.3f')

heatmap(Pval_adj_rescaled, legend_title="$p$-value", ax=axs[1],
       xtitle="State", vmin=0, vmax=1,
       title="(B) $p$-values after rescaling", annot=True, fmt='.3f')
fig.tight_layout()

fname = "twosamp_sbm_pvals"
if mode != "png":
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

fig.savefig(f"Figures/png/{fname:s}.png")
../../_images/8536e4bea0430cffb9fac3dc74ea12aa102df504f34bf259a54a9685b20d8428.png