Why different Random Walks with Restart have different results?

79 views Asked by At

I've written two programs to conduct RWR.
One uses a networkx graph and I conduct the run on the transition probability matrix:

import numpy as np
import networkx as nx


def run_rwr(nxgraph, a, steps):
    # Transition probability matrix
    A = nx.adjacency_matrix(nxgraph).astype(float)
    # Create the transition probability matrix
    D = np.diag(np.array(A.sum(axis=1)).flatten())
    # Calculate the Laplacian matrix
    L = D - A
    # Calculate the transition probability matrix
    P = np.linalg.inv(D) @ L

    # Initialize the probability distribution vector
    p = np.zeros(len(nxgraph.nodes))
    p[0] = 1
    old_p = p
    # Save the initial vector
    p_0 = p
    for j in range(steps):
        # Random walk step
        p = (1 - a) * P @ old_p + a * p_0
        # Check if converged
        if np.allclose(p, old_p):
            break
        old_p = p
    return p

The other doesn't use networkx, and I conduct the run on the regular matrix:
def run_rwr(A, a, steps):
    # Initialize the probability distribution vector
    p = np.zeros((A.shape[0], 1))
    p[0] = 1
    old_p = p
    # Save the initial vector
    p_0 = p

    for i in range(steps):
        # Random walk step
        p = (1 - a) * A @ old_p + a * p_0
        # check if converged
        if np.linalg.norm(p - old_p) < 1e-6:
            break
        old_p = p
    pbar.close()
    return p


def build_A():
    X = np.loadtxt("human-graph.tsv", dtype=float, comments='#')
    m, n = X.shape
    print(X.shape)
    # the graph is unweighted
    X = np.c_[X, np.ones(m)]
    base = np.amin(X[:, 0:2])

    if base < 0:
        raise ValueError('Out of range of node ids: negative base')
    # make node id start from 0
    X[:, 0:2] = X[:, 0:2] - base
    # sort by the first column
    b_idx = X[:, 0] > X[:, 1]
    a_idx = np.logical_not(b_idx)
    B = X[b_idx, :]
    # swap the first and second column
    B[:, [0, 1]] = B[:, [1, 0]]
    # concatenate the two matrices
    A = X[a_idx, :]

    X = np.vstack((A, B))
    # get the row, column, and data vectors
    rows = X[:, 0]
    cols = X[:, 1]
    data = X[:, 2]

    # assume id starts from 0
    n = int(np.amax(X[:, 0:2])) + 1
    # create a sparse matrix from the adjacency matrix
   _A = csr_matrix((data, (rows, cols)), shape=(n, n))
    A = _A + _A.T
    I, J, K = find(A)
    A = csr_matrix((np.ones(len(K)), (I, J)), shape=A.shape)
    return A

How is it that the second code runs in seconds, and the first takes ages? When I tried to run the first on GPU using cupy, it crashed with a memory error.

0

There are 0 answers