Code Reproducibility

Code Reproducibility#

import numpy as np
import networkx as nx

G = nx.DiGraph()
# add nodes to the network
G.add_node("1", pos=(1,1))
G.add_node("2", pos=(4,4))
G.add_node("3", pos=(4,2))
# add edges to the network
G.add_edge("1", "2")
G.add_edge("2", "1")
G.add_edge("1", "3")
G.add_edge("3", "1")

# the coordinates in space to use for plotting the nodes
# in the layout plot
pos = {"1": (0, 0), "2": (1, 0), "3": (.5, .5)}

nx.draw_networkx(G, with_labels=True, node_color="white", pos=pos,
                font_size=10, font_color="black", arrows=False, edge_color="black",
                width=1)

../../_images/9d2942b734e432537f58ed3a3a15b5b30868e74bb01db3222354a7130cc2f7ae.png

from graphbook_code import heatmap
import matplotlib.pyplot as plt
import seaborn as sns

# convert the networkx graph to a numpy array
A = np.asarray(nx.to_numpy_array(G))

heatmap(A, annot=True, linewidths=.1, cbar=False, 
        title="Adjacency matrix", xticklabels=[1,2,3], xtitle="Node", 
        yticklabels=[1,2,3], ytitle="Node"
       )

<Axes: title={'left': 'Adjacency matrix'}, xlabel='Node', ylabel='Node'>

../../_images/b91aba6a5950cd59cf84e90f41484060d6c445c8e4c07602accd8632c77eda75.png

import networkx as nx
from graphbook_code import heatmap

# create an undirected network G
G = nx.Graph()
# add the nodes like before
G.add_node("SI", pos=(2,1))
G.add_node("MH", pos=(4,4))
G.add_node("BK", pos=(4,1.7))
G.add_node("Q", pos=(6,3))
G.add_node("BX", pos=(6,6))

# specify boroughs that are adjacent to one another
pos = nx.get_node_attributes(G, 'pos')
G.add_edge("SI", "BK")
G.add_edge("MH", "BK")
G.add_edge("BK", "Q")
G.add_edge("MH", "Q")
G.add_edge("MH", "BX")
G.add_edge("Q", "BX")

A = nx.to_numpy_array(G)

# plotting
nx.draw_networkx(G, with_labels=True, node_color="black", pos=pos,
                font_color="white", edge_color="black")

# pass in the xticklabels and yticklabels corresponding to the
# appropriately ordered boroughs (in the order we constructed them)
heatmap(A.astype(int), xticklabels=["SI", "MH", "BK", "Q", "BX"],
        yticklabels=["SI", "MH", "BK", "Q", "BX"],
        xtitle="Borough", ytitle="Borough"
       )

<Axes: xlabel='Borough', ylabel='Borough'>

../../_images/71943de219d09c4bbf331b48af5da508db568a00ebc6a8d007c710ec4428dfe2.png

../../_images/be415a39cdda1c1e82bcced63543be7c113911bc6a9ca67ca900021a7fa81226.png

from copy import deepcopy

G_dir = G.to_directed()

# remove the edge from BK to SI
G_dir.remove_edge("BK", "SI")

nx.draw_networkx(G_dir, with_labels=True, node_color="black", pos=pos,
                font_color="white", arrows=True, edge_color="black")

../../_images/7d70742b993b78198a93a938ff93827b1cc9f562228b4265a565fedf8356c1cd.png

from graspologic.utils import is_symmetric

A = nx.to_numpy_array(G)
is_symmetric(A)
# True
A_dir = nx.to_numpy_array(G_dir)
is_symmetric(A_dir)
# False

False

G_loopy = deepcopy(G)
# add edge from SI to itself
G_loopy.add_edge("SI", "SI")
nx.draw_networkx(G_loopy, with_labels=True, node_color="black", pos=pos,
                font_color="white", edge_color="black")

../../_images/a87868143fc43f559d6e8a76c8ed6a5876025ee899c1ae22c269d5773238670f.png

from graspologic.utils import is_loopless
is_loopless(A)
# True
A_loopy = nx.to_numpy_array(G_loopy)
is_loopless(A_loopy)
# False

False

G_weight = nx.Graph()

G_weight.add_node("SI", pos=(2,1))
G_weight.add_node("MH", pos=(4,4))
G_weight.add_node("BK", pos=(4,1.7))
G_weight.add_node("Q", pos=(6,3))
G_weight.add_node("BX", pos=(6,6))

# this time, we add weights to the edges
pos = nx.get_node_attributes(G, 'pos')
G_weight.add_edge("SI", "BK", weight=20)
G_weight.add_edge("MH", "BK", weight=15)
G_weight.add_edge("BK", "Q", weight=5)
G_weight.add_edge("MH", "Q", weight=15)
G_weight.add_edge("MH", "BX", weight=5)
G_weight.add_edge("Q", "BX", weight=15)

edge_wts = nx.get_edge_attributes(G_weight, "weight")
nx.draw_networkx(G_weight, with_labels=True, node_color="black", pos=pos,
                 font_color="white", edge_color="black")
nx.draw_networkx_edge_labels(G_weight, pos, edge_wts)

{('SI', 'BK'): Text(3.0000143938232213, 1.3500050378381272, '20'),
 ('MH', 'BK'): Text(4.0, 2.850047130277381, '15'),
 ('MH', 'Q'): Text(4.999998955288903, 3.5000005223555486, '15'),
 ('MH', 'BX'): Text(4.999994298210368, 4.999994298210369, '5'),
 ('BK', 'Q'): Text(4.999999737134203, 2.3499998291372317, '5'),
 ('Q', 'BX'): Text(6.0, 4.499991549295374, '15')}

../../_images/d97d50d1e9e4f902657a367dccb83d489088c78f346cc8dc4cdac724f573ea46.png

from graspologic.utils import is_unweighted

A_weight = nx.to_numpy_array(G_weight)
is_unweighted(A)
# True
is_unweighted(A_weight)
# False

False

A_weight = nx.to_numpy_array(G_weight, nonedge=0).astype(float)

heatmap(A_weight, xticklabels=["SI", "MH", "BK", "Q", "BX"],
        yticklabels=["SI", "MH", "BK", "Q", "BX"], title="Weighted adjacency matrix", 
        xtitle="Borough", ytitle="Borough")

<Axes: title={'left': 'Weighted adjacency matrix'}, xlabel='Borough', ylabel='Borough'>

../../_images/a431f747b969933964764b03ff8f3028eb26dcb4993048e995479817ddb570fb.png

def in_degrees(A):
    """
    A function to compute the in-degrees for the nodes of an adjacency matrix.
    """
    return A.sum(axis=1)

def out_degrees(A):
    """
    A function to compute the out-degrees for the nodes of an adjacency matrix.
    """
    return A.sum(axis=0)

# get the degree for node BK, which is node 2 in our network,
# using either the in- or out-degree
print(in_degrees(A)[2])
# 3.0

3.0

D = nx.floyd_warshall_numpy(G)
heatmap(D, title="Distance matrix",  xticklabels=["SI", "MH", "BK", "Q", "BX"],
        yticklabels=["SI", "MH", "BK", "Q", "BX"], xtitle="Borough", ytitle="Borough")

<Axes: title={'left': 'Distance matrix'}, xlabel='Borough', ylabel='Borough'>

../../_images/98e89b024344d625d8eb650a55538b90394b841cd5a2b8792d351a04f2b23b2b.png

nx.density(G)
# 0.6

0.6

G_clus = nx.Graph()

G_clus.add_node("MH", pos=(4,4))
G_clus.add_node("BK", pos=(4,1.7))
G_clus.add_node("Q", pos=(6,3))
G_clus.add_node("BX", pos=(6,6))


pos = nx.get_node_attributes(G, 'pos')
G_clus.add_edge("MH", "BX")
G_clus.add_edge("MH", "BK")
G_clus.add_edge("BK", "Q")
G_clus.add_edge("MH", "Q")
G_clus.add_edge("Q", "BX")

nx.draw_networkx(G_clus, with_labels=True, node_color="black", pos=pos,
                 font_color="white", edge_color="black")

../../_images/64350d5e345c561174269aa91842a1948e1f8b55cf88e0d5fcc0f29d449554c5.png

nx.transitivity(G_clus)
# 0.75

0.75

# in-degree matrix
D_in = np.diag(in_degrees(A))
# out-degree matrix
D_out = np.diag(out_degrees(A))
# the network is undirected, so in and out degrees are same
print(np.all(D_in == D_out))
# True

True

# arbitrarily set degree matrix to in-degree matrix
D = D_in

L = D - A

from graspologic.utils import to_laplacian
L_sym = to_laplacian(A, form="I-DAD")

L_dad = to_laplacian(A, form="DAD")

tau = 1
L_rdad = to_laplacian(A, form="R-DAD", regularizer=tau)

G_induced = G.subgraph(["BK", "MH", "Q", "BX"]).copy()
nx.draw_networkx(G_induced, with_labels=True, node_color="black", pos=pos,
                 font_color="white", edge_color="black")

G_withbos = deepcopy(G)
G_withbos.add_node("BO", pos=(8, 6))
G_withbos.add_node("CA", pos=(8, 8))
G_withbos.add_edge("BO", "CA")
# fetch positions with boston and cambridge added
pos = nx.get_node_attributes(G_withbos, 'pos')
# plot
nx.draw_networkx(G_withbos, with_labels=True, node_color="black", pos=pos,
                font_color="white", edge_color="black")

../../_images/d333feed4d56a84d8a18dbfc1db7e80d5e52ebd64166b6c18ab1a1c0c60469f1.png

# returns a list of connected components, ordered 
# by decreasing size (#nodes)
cc_withbos = nx.connected_components(G_withbos)
# return the connected components, as networks
CC_nets = [G_withbos.subgraph(cc).copy() for cc in cc_withbos]

# plot the LCC
nx.draw_networkx(CC_nets[0], with_labels=True, node_color="black", pos=pos,
                font_color="white", edge_color="black")

from graphbook_code import heatmap
from matplotlib import pyplot as plt
from graspologic.simulations import er_np
import networkx as nx

n = 10
A_bus = er_np(n, 0.6)

# add pendants
n_pend = 3
A_bus = np.column_stack([np.row_stack([A_bus, np.zeros((n_pend, n))]), 
                         np.zeros((n + n_pend, n_pend))])
n = n + n_pend

# add pizza hut node
n_pizza = 1
A_bus = np.column_stack([np.row_stack([A_bus, np.ones((n_pizza, n))]), 
                         np.ones((n + n_pizza, n_pizza))])
n = n + n_pizza

# add isolates
n_iso = 3
A_bus = np.column_stack([np.row_stack([A_bus, np.zeros((n_iso, n))]), 
                         np.zeros((n + n_iso, n_iso))])
A_bus = A_bus - np.diag(np.diag(A_bus))
n = n + n_iso

# as a heatmap
node_names = [i for i in range(0, n)]
heatmap(A_bus.astype(int), title="Business Network Adjacency Matrix", 
               xticklabels=node_names, yticklabels=node_names)
               
# as a layout plot
G_bus = nx.from_numpy_array(A_bus)
node_pos = nx.shell_layout(G_bus)

plt.figure()
nx.draw(G_bus, pos=node_pos, node_color='white', edgecolors='black', with_labels=True, node_size=1500)

../../_images/378ee7123e36dfc7c85fcac340c8b9f66e4eef55a9613a2cb4116be35202fced.png

../../_images/d5c7bcf32a4e0f61cf480de30d9a0c8af8177c0b33b3dfe291d118d811aa2da3.png

def compute_degrees(A):
    # compute the degrees of the network A
    # since A is undirected, we can just sum
    # along an axis.
    return A.sum(axis=1)

def prune_low_degree(A, return_inds=True, threshold=1):
    # remove nodes which have a degree under a given
    # threshold. For a simple network, threshold=0 removes isolates,
    # and threshold=1 removes pendants
    degrees = compute_degrees(A)
    non_prunes = degrees > threshold
    robj = A[np.where(non_prunes)[0],:][:,np.where(non_prunes)[0]]
    if return_inds:
        robj = (robj, np.where(non_prunes)[0])
    return robj

A_bus_lowpruned, nonpruned_nodes = prune_low_degree(A_bus)

# relabel the nodes from 0:10 to their original identifier names
node_names_lowpruned = {i: nodeidx for i, nodeidx in enumerate(nonpruned_nodes)}

G_bus_lowpruned = nx.from_numpy_array(A_bus_lowpruned)
G_bus_lowpruned = nx.relabel_nodes(G_bus_lowpruned, node_names_lowpruned)

nx.draw(G_bus_lowpruned, pos=node_pos, with_labels=True, node_color='white', edgecolors='black', node_size=1500)

../../_images/cf141ed9631663bd94b844f7e1148b607d31974e4d32536f5b85b1460d5f039b.png

degrees_before = compute_degrees(A_bus)
degrees_after = compute_degrees(A_bus_lowpruned)

from seaborn import histplot
fig, axs = plt.subplots(1,2, figsize=(15, 4))

ax = histplot(degrees_before, ax=axs[0], binwidth=1, binrange=(0, 14))
ax.set_xlabel("Node degree");
ax.set_ylabel("Number of Nodes");
ax.set_title("Business Network, before pruning");
ax = histplot(degrees_after, ax=axs[1], binwidth=1, binrange=(0, 14))
ax.set_xlabel("Node degree");
ax.set_title("Business Network, after pruning")

Text(0.5, 1.0, 'Business Network, after pruning')

../../_images/10d94cdbbd0d897588dcb4a87637b818a47f96e44503eaed37ba961aef616c39.png

def prune_high_degree(A, return_inds=True, threshold=0):
    # remove nodes which have a degree over a given
    # threshold. For a simple network, threshold=A.shape[0] - 1
    # removes any pizza hut node
    degrees = compute_degrees(A)
    non_prunes = degrees < threshold
    robj = A[np.where(non_prunes)[0],:][:,np.where(non_prunes)[0]]
    if return_inds:
        robj = (robj, np.where(non_prunes)[0])
    return robj

# pruning nodes 
A_bus_pruned, highpruned_nodes = prune_high_degree(A_bus_lowpruned, threshold=A_bus_lowpruned.shape[0] - 1)

# relabel the nodes from 0:9 to their original identifier names,
# using the previous filters from node_names_lowpruned
node_names_highpruned = {i: node_names_lowpruned[lowpruned_idx] for 
                          i, lowpruned_idx in enumerate(highpruned_nodes)}

G_bus_pruned = nx.from_numpy_array(A_bus_pruned)
G_bus_pruned = nx.relabel_nodes(G_bus_pruned, node_names_highpruned)
nx.draw(G_bus_pruned, pos=node_pos, with_labels=True, node_color='white', edgecolors='black', node_size=1500)

../../_images/0444b43c929f8a10f995463a6983af3997f2bf7d9336bb2615d4bfc6a7f5438a.png

from graspologic.simulations import sbm
import numpy as np

wtargsa = [[dict(n=50, p=.09), dict(n=50, p=.02)],
          [dict(n=50, p=.02), dict(n=50, p=.06)]]
# activity network as upper triangle matrix
A_activity_uppertri = sbm(n=[25, 25], p=[[1,1], [1,1]], wt=np.random.binomial, wtargs=wtargsa, loops=False, directed=False)
A_activity_uppertri = np.triu(A_activity_uppertri)

# friend network
wtargsf = [[dict(a=4, b=2), dict(a=2, b=5)],
          [dict(a=2, b=5), dict(a=6, b=2)]]
A_friend = sbm(n=[25, 25], p=[[.8, .4], [.4, 1]], wt=np.random.beta, wtargs=wtargsf, directed=True)

from graspologic.utils import symmetrize

# upper-triangle symmetrize the upper triangle
A_activity = symmetrize(A_activity_uppertri, method="triu")

# symmetrize with averaging
A_friend_avg_sym = symmetrize(A_friend, method="avg")

from graspologic.utils import augment_diagonal

A_friend_aug = augment_diagonal(A_friend)

def discard_diagonal(A):
    """
    A function that discards the diagonal of a matrix,
    and returns its non-diagonal edge-weights.
    """
    # create a mask that is True for the non-diagonal edges
    non_diag_idx = np.where(~np.eye(A.shape[0], dtype=bool))
    return A[non_diag_idx].flatten()
    
# obtain the non-diagonal edge-weights
friend_nondiag_ew = discard_diagonal(A_friend)
# get the non-zero, non-diagonal edge weights
friend_nondiag_nz_ew = friend_nondiag_ew[friend_nondiag_ew > 0]

# plot the histogram, as above
histplot(friend_nondiag_nz_ew, bins=20, binrange=(0, 1))

<Axes: ylabel='Count'>

../../_images/5097fb4bcc5c5f4e78f3bef6e1d97c6bf8a18565d6404d87002d8922051d63ef.png

def truncate_network(A, threshold):
    A_cp = np.copy(A)
    A_cp[A_cp <= threshold] = 0
    return A_cp

tau = 0.4
A_friend_trunc = truncate_network(A_friend, threshold=tau)

friend_trunc_nondiag_ew = discard_diagonal(A_friend_trunc)
# get the non-zero, non-diagonal edge weights
friend_trunc_nondiag_nz_ew = friend_trunc_nondiag_ew[friend_trunc_nondiag_ew > 0]
histplot(friend_trunc_nondiag_nz_ew, bins=20, binrange=(0, 1))

<Axes: ylabel='Count'>

../../_images/f6217abdc38ee21badf0b273d365babd6aa19d6822c2a4ba654cd90f02b424af.png

# find the indices which are in the upper triangle and not in the diagonal
upper_tri_non_diag_idx = np.where(np.triu(np.ones(A_activity.shape), k=1).astype(bool))
q = 0.5  # desired percentile is 50, or a fraction of 0.5
histplot(A_activity[upper_tri_non_diag_idx].flatten())
# use the quantile function with the desired fraction q
tau = np.quantile(A_activity[upper_tri_non_diag_idx], q=q)

../../_images/72d56cecfe34167c8368c384963c4a22e42ed99da4bcdc9b5c32397731875256.png

n_lteq_tau = np.sum(A_activity[upper_tri_non_diag_idx] <= tau)
n_gt_tau = np.sum(A_activity[upper_tri_non_diag_idx] > tau)
print("Number of edges less than or equal to tau: {}".format(n_lteq_tau))
print("Number of edges greater than to tau: {}".format(n_gt_tau))

Number of edges less than or equal to tau: 722
Number of edges greater than to tau: 503

from numpy import copy

def min_difference(arr):
    b = np.diff(np.sort(arr))
    return b[b>0].min()

def quantile_threshold_network(A, directed=False, loops=False, q=0.5):
    # a function to threshold a network on the basis of the
    # fraction q
    A_cp = np.copy(A)
    n = A.shape[0]
    E = np.random.uniform(low=0, high=min_difference(A)/10, size=(n, n))
    if not directed:
        # make E symmetric
        E = (E + E.transpose())/2
    mask = np.ones((n, n))
    if not loops:
        # remove diagonal from E
        E = E - np.diag(np.diag(E))
        # exclude diagonal from the mask
        mask = mask - np.diag(np.diag(mask))
    Ap = A_cp + E
    tau = np.quantile(Ap[np.where(mask)].flatten(), q=q)
    A_cp[Ap <= tau] = 0; A_cp[Ap > tau] = 1
    return A_cp

A_activity_thresholded03 = quantile_threshold_network(A_activity, q=0.3)
A_activity_thresholded07 = quantile_threshold_network(A_activity, q=0.7)

from graspologic.utils import is_unweighted, is_loopless, is_symmetric

def simple_network_dens(X):
    # make sure the network is simple
    if (not is_unweighted(X)) or (not is_loopless(X)) or (not is_symmetric(X)):
        raise TypeError("Network is not simple!")
    # count the non-zero entries in the upper-right triangle
    # for a simple network X
    nnz = np.triu(X, k=1).sum()
    # number of nodes
    n = X.shape[0]
    # number of possible edges is 1/2*n*(n-1)
    poss_edges = 0.5*n*(n-1)
    return nnz/poss_edges

print("Network Density: {:.3f}".format(simple_network_dens(A_activity_thresholded03)))
# Network Density: 0.700

Network Density: 0.700

from graspologic.utils import is_loopless, is_symmetric
from scipy.stats import zscore

def z_score_directed_loopless(X, undirected=False):
    if not is_loopless(X):
        raise TypeError("The network has loops!")
    if is_symmetric(X):
        raise TypeError("The network is undirected!")
    # the entries of the adjacency matrix that are not on the diagonal
    non_diag_idx = np.where(~np.eye(X.shape[0], dtype=bool))
    Z = np.zeros(X.shape)
    Z[non_diag_idx] = zscore(X[non_diag_idx])
    return Z

ZA_friend = z_score_directed_loopless(A_friend)

from graspologic.utils import pass_to_ranks

RA_friend = pass_to_ranks(A_friend)

RA_friend_zb = pass_to_ranks(A_friend, method="zero-boost")

def augment_zeros(X, base=10):
    if np.any(X < 0):
        raise TypeError("The logarithm is not defined for negative values!")
    am = np.min(X[np.where(X > 0)])  # the smallest non-zero entry of X
    eps = am/base  # epsilon is one order of magnitude smaller than the smallest non-zero entry
    return X + eps  # augment all entries of X by epsilon

def log_transform(X, base=10):
    """
    A function to log transform an adjacency matrix X, which may
    have zero-weight edges.
    """
    X_aug = augment_zeros(X, base=base)
    return np.log(X_aug)/np.log(base)

A_activity_log = log_transform(A_activity)