Computing efficacy (protein expression)

Computes the change in network relative entropy from viral PPIs after setting immune system proteins, one at a time, to high abundance {1,0}.

import os, sys
import numpy as np
import scipy as sp
import pandas as pd
import copy as copy
from tqdm.notebook import tqdm
import math
import scipy.stats as st

from CoRe import reader
from CoRe.ncip import ncip
from CoRe.BA_C import BA

import importlib

import networkx as nx
import matplotlib.pyplot as plt
import json

from matplotlib import cm
from matplotlib import rcParams
import matplotlib.patches as patches

data_directory = "./Examples/Immune_System"
os.chdir(data_directory)

edge_data = pd.read_pickle('Immune_System_medium-PPI-edges.pkl')
node_data = pd.read_pickle('Immune_System_medium-PPI-nodes.pkl')

remake_graph = False

if remake_graph==False:
    netObj = ncip()
    netObj.load_graph('Immune_System-medium-PPI.gml')
else:
    netObj = ncip()
    netObj.load_data(edge_data,node_data)
    netObj.make_graph()
    netObj.save_network(pathway_nametag,network_type)

All immune system communication network proteins that have PPI with SARS-CoV-2 proteins.

f = open('SARS_CoV2-Immune_System_interactions.json')
SARS_nodes = json.load(f)
f.close()

all_sars_nodes = []

for s in SARS_nodes.keys():
    all_sars_nodes += SARS_nodes[s]

all_sars_nodes = list(set(all_sars_nodes))

print(all_sars_nodes)

['RHOA', 'SLC27A2', 'PVR', 'ELOB', 'EIF4E2', 'CYB5R3', 'NLRX1', 'RAB14', 'ECSIT', 'AP2A2', 'CSNK2B', 'HECTD1', 'ERP44', 'IL17RA', 'ITGB1', 'RALA', 'RAB10', 'NEU1', 'IMPDH2', 'TOMM70', 'GGH', 'PTGES2', 'TBK1', 'RIPK1', 'RAB7A', 'ANO6', 'HMOX1', 'SLC44A2', 'NPC2', 'RNF41', 'RAB18', 'GOLGA7', 'ELOC', 'STOM', 'RAB5C', 'GLA']

Specifying the reference state and construction of the global transition matrix.

initial_state_type = 'maxEnt'

errorname = '0.0'
rho = float(errorname)

input_bits = 1
code_length = int(2**input_bits)

max_entropy_state = (1.0/float(code_length))*np.ones(shape=(code_length,))

low_state = np.zeros(shape=(code_length,))
low_state[-1] = 1.0

high_state = np.zeros(shape=(code_length,))
high_state[0] = 1.0

if initial_state_type=='high':
    initial_state = high_state
elif initial_state_type=='low':
    initial_state = low_state
else:
    initial_state = max_entropy_state

print(high_state,low_state)

netObj.construct_C(rho,h=input_bits,neglect_modules=[])
node_list = list(netObj.G_d.nodes)

[1. 0.] [0. 1.]

Disconnect all drugs from the network.

netObj.disconnect_drug_nodes()

Compute the reference stationary state of the network.

initial_network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = {}

for n in range(0,len(node_list)):
    initial_network_state[code_length*n:code_length*(n+1),0] = initial_state

network_sources = []

reference_final_state, steps = netObj.get_final_state(initial_network_state,[])
reference_final_entropy = netObj.state_entropy(reference_final_state,[])
print('Reference state relative entropy: ',reference_final_entropy)

Reference state relative entropy:  0.0

Set the SARS-CoV-2 nodes in the network to low abundance.

network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
network_sources = []

for n in range(0,len(node_list)):
    network_state[code_length*n:code_length*(n+1),0] = initial_state

for k in tqdm(SARS_nodes.keys()):
    for n in SARS_nodes[k]:
        try:
            i = node_list.index(n)

            network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

            if i not in network_sources:
                network_sources.append(i)
        except ValueError:
            pass

0%|          | 0/17 [00:00<?, ?it/s]

Relative entropy of the total network and number of steps to stationary state.

final_state, steps = netObj.get_final_state(network_state,network_sources)
SARSCoV2_entropy = netObj.state_entropy(final_state,network_sources)

Compute stationary state of the network due to SARS-CoV-2 PPIs and proteins.

The proteins in the Reactome database were set to the state {1,0} to compute the stationary state, and the subsequent change in the network relative entropy.

node_class = nx.get_node_attributes(netObj.G_d,"class")
node_n = list(netObj.G_d.nodes())

c = 0

for i in range(0,len(node_n)):
    nn = node_n[i]
    if node_class[nn]=='EntityWithAccessionedSequence':
        relH = st.entropy(final_state[netObj.code_length*i:netObj.code_length*(i+1),0],max_entropy_state,base=2)

        if relH>0.01:
            c += 1

all_sources = []

for n in netObj.G_d.nodes(data=True):
    if n[1]['class']=='EntityWithAccessionedSequence' and n[0] not in all_sars_nodes:
        all_sources.append((n[0],netObj.G_d.in_degree(n[0])))

df_H_with_proteins = pd.DataFrame()
df_H_drop_and_gain = pd.DataFrame()

df_H_with_proteins = pd.DataFrame([],columns=['Protein', 'Relative Entropy'])

df_H_drop_and_gain = pd.DataFrame([],columns=['Protein', 'Drop', 'Gain'])

for this_protein in tqdm(all_sources):
    s = this_protein[0]
    additional_source_nodes = [s]

    netObj.construct_C(rho,h=input_bits)
    netObj.disconnect_nodes('ChemicalDrug',additional_source_nodes)
    netObj.disconnect_nodes('ProteinDrug',additional_source_nodes)

    network_state = np.zeros(shape=(netObj.C_sparse.shape[0],1))
    network_sources = []

    for n in range(0,len(node_list)):
        network_state[code_length*n:code_length*(n+1),0] = initial_state

    for k in SARS_nodes.keys():
        for n in SARS_nodes[k]:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = low_state

                network_sources.append(i)
            except ValueError:
                pass

        for n in additional_source_nodes:
            try:
                i = node_list.index(n)

                network_state[netObj.code_length*i:netObj.code_length*(i+1),0] = high_state

                network_sources.append(i)
            except ValueError:
                pass

    this_state, steps = netObj.get_final_state(network_state,network_sources)
    H_with_proteins = netObj.state_entropy(this_state,network_sources)
    H_drop, H_gain = netObj.entropy_drop_and_rise(this_state,final_state,reference_final_state,network_sources)

    df_temp = pd.DataFrame([[this_protein[0],H_with_proteins]],columns=['Protein','Relative Entropy'])
    df_H_with_proteins = pd.concat([df_H_with_proteins,df_temp],sort=False,ignore_index=True)

    df_temp = pd.DataFrame([[this_protein[0],H_drop,H_gain]],columns=['Protein', 'Drop', 'Gain'])
    df_H_drop_and_gain = pd.concat([df_H_drop_and_gain,df_temp],sort=False,ignore_index=True)

0%|          | 0/1122 [00:00<?, ?it/s]

try:
    os.chdir('./counter_entropic_shift')
except OSError:
    os.mkdir('./counter_entropic_shift')
    os.chdir('./counter_entropic_shift')

df_H_with_proteins = df_H_with_proteins.sort_values(by=['Relative Entropy'],ignore_index=True)

#df_temp = pd.DataFrame([['Ref',SARSCoV2_entropy]],columns=['Protein','Relative Entropy'])
#df_H_with_proteins = pd.concat([df_temp,df_H_with_proteins],sort=False,ignore_index=True)

#df_H_with_proteins.to_csv('high_all_protein_shifts-'+initial_state_type+'.csv',index=False)

df_H_drop_and_gain = df_H_drop_and_gain.sort_values(by=['Drop'],ascending=True,ignore_index=True)
df_H_drop_and_gain.to_csv('split_all_high_protein_shifts-'+initial_state_type+'.csv',index=False)

c_high = df_H_with_proteins['Relative Entropy'][1:] - SARSCoV2_entropy
r_min, r_max = np.min(c_high), np.max(c_high)
max_r = max(abs(r_min),abs(r_max))
v_min, v_max = -abs(r_min), abs(r_min)

Immune system proteins ranked by \(\Delta H_{\mathrm{ref}}(X).\)

all_tick_names = df_H_with_proteins['Protein'].to_list()[1:]
data_size = len(all_tick_names)
x = np.linspace(1,data_size,data_size)

tick_names = [n.split(' [')[0] for n in all_tick_names]

fig, ax = plt.subplots(figsize=(8,6))

plt.scatter(x,df_H_with_proteins['Relative Entropy'][1:],c=c_high,cmap=cm.seismic,vmin=v_min,vmax=v_max,marker='o',alpha=0.8,s=30)
plt.plot(x,SARSCoV2_entropy*np.ones(shape=x.shape),color='black',markersize=0,linewidth=4,alpha=1.0)
plt.plot(-10,df_H_with_proteins['Relative Entropy'][0],lw=0,ms=8,marker='o',label=r'Protein state: $\{1,0\}$',c='black')

plt.xlim(-3,data_size+5)

plt.ylabel(r'$H_{\mathrm{ref}}(X)$ (bits)',size=20)
plt.tick_params(axis='y',labelsize=20)
plt.tick_params(axis='x',labelsize=22)

plt.xlabel('Immune system proteins sorted by $\Delta H_{\mathrm{ref}}(X)$',size=22,labelpad=10)

plt.legend(frameon=True,fontsize=20,handlelength=1.0,handletextpad=0.25,loc='upper left')

cbar = plt.colorbar(fraction=0.05,pad=0.01)

cbar.set_label(r'$\Delta H_{\mathrm{ref}}(X)$',fontsize=18,rotation=-90,labelpad=5)
cbar.ax.tick_params(labelsize=18)

plt.tight_layout()
plt.show()

Immune system proteins ranked by the drop component of \(\Delta H_{\mathrm{ref}}(X).\)

fig, ax = plt.subplots(figsize=(8,6))

plt.bar(x,df_H_drop_and_gain['Drop'],color='Blue',label='Drop')
plt.bar(x,df_H_drop_and_gain['Gain'],color='Red',label='Gain')
plt.xlim(-0.5,len(df_H_drop_and_gain['Protein'])+0.5)

plt.ylabel(r'drop/gain in $\Delta H_{\mathrm{ref}}(X)$ (bits)',size=20)
plt.tick_params(axis='x',labelsize=20)
plt.tick_params(axis='y',labelsize=20)
plt.xlabel(r'Immune system proteins sorted by $\Delta H_{\mathrm{ref}}(X)$ drop',size=22,labelpad=10)
plt.text(350,-25,r'$\Delta H_{\mathrm{ref}}(X)=$ drop + gain',fontsize=20)
plt.legend(frameon=True,fontsize=20,handlelength=1.0,handletextpad=0.25,loc='upper left')

plt.tight_layout()
plt.show()