6.3 Testing for differences between groups of edges

6.3 Testing for differences between groups of edges#

mode = "svg"

import matplotlib

font = {'family' : 'Dejavu Sans',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

import matplotlib
from matplotlib import pyplot as plt
import numpy as np
from graphbook_code import siem


n = 100
Z = np.ones((n, n))

# Fill the upper and lower 50th diagonals with 2
# and the main diagonal with 0
np.fill_diagonal(Z[:, 50:], 2)
np.fill_diagonal(Z[50:, :], 2)
np.fill_diagonal(Z, 0)

p = [0.4, 0.6]
np.random.seed(0)
A = siem(n, p, Z)
est_pvec = {k: A[Z == k].mean() for k in [1, 2]}
print(est_pvec)
# {1: 0.3955102040816327, 2: 0.6}
{1: 0.3955102040816327, 2: 0.6}
from scipy.stats import fisher_exact
import numpy as np

# assemble the contingency table indicated
table = np.array([[3, 7], [7, 3]])
_, pvalue = fisher_exact(table)
print(f"p-value: {pvalue:.3f}")
p-value: 0.179
# compute an upper-triangular mask to only look at the
# upper triangle since the network is simple (undirected and loopless)
upper_tri_mask = np.triu(np.ones(A.shape), k=1).astype(bool)
column_clust1 = [
    A[(Z == 1) & upper_tri_mask].sum(),
    (A[(Z == 1) & upper_tri_mask] == 0).sum(),
]
column_clust2 = [
    A[(Z == 2) & upper_tri_mask].sum(),
    (A[(Z == 2) & upper_tri_mask] == 0).sum(),
]
cont_tabl = np.vstack((column_clust1, column_clust2))
print(cont_tabl)
[[1938. 2962.]
 [  30.   20.]]
from graphbook_code import heatmap
import os

fig, axs = plt.subplots(1, 3, figsize=(20, 6))

heatmap(Z.astype(int), ax=axs[0], xtitle="Node", ytitle="Node",
        xticks=[0.5, 49.5, 99.5], xticklabels=[1, 50, 100],
        yticks=[0.5, 49.5, 99.5], yticklabels=[1, 50, 100], 
        legend_title="Edge cluster", title="(A) Cluster assignment matrix")
heatmap(A.astype(int), ax=axs[1], xtitle="Node", ytitle="Node",
        xticks=[0.5, 49.5, 99.5], xticklabels=[1, 50, 100],
        yticks=[0.5, 49.5, 99.5], yticklabels=[1, 50, 100], 
        title="(B) $SIEM_n(Z, \\vec p)$ sample")
heatmap(cont_tabl/cont_tabl.sum(axis=1, keepdims=1), ax=axs[2], title="(C) Contingency table",
        xticks=[0.5, 1.5], xticklabels=[f"Number\nof edges", f"Number of\nnon-edges"],
        yticks=[0.5, 1.5], yticklabels=["Cluster 1", "Cluster 2"],
        vmin=0, vmax=1, annot=cont_tabl.astype(int), legend_title="Fraction of cluster", fmt="d")

axs[2].tick_params(axis='y', labelrotation=90)
fig.tight_layout()

os.makedirs("Figures", exist_ok=True)
fname = "testing_siem_ex"
if mode != "png":
    os.makedirs(f"Figures/{mode:s}", exist_ok=True)
    fig.savefig(f"Figures/{mode:s}/{fname:s}.{mode:s}")

os.makedirs("Figures/png", exist_ok=True)
fig.savefig(f"Figures/png/{fname:s}.png")
../../_images/e324e48f81d8ff56ad97e6abc58b8488c9ba0ea384771c7b1beaca16b5fd421e.png
_, pvalue = fisher_exact(cont_tabl)
print(f"p-value: {pvalue:.5f}")
# p-value: 0.00523
p-value: 0.00523