I have a C++ script, where I load a python file to execute a function to evaluate a Graph Neural Network. This function gets executed to evaluate data in an event loop, so the script gets executed for every event. I have the problem that the initialization takes quite long. When I want to evaluate the model in the python code the first time it takes ~8 seconds, if I do it again immediately again, it takes only 0.005 seconds, which is great. What can I do to speed this initialization since I need to loop over millions of events :\
This is the C++ code
//arachne
#include "ArachneWupperFlow.hh"
#include "pic.h"
//std/sys
#include <iostream>
#include <stdio.h>
#include <sys/stat.h>
//python
#define PY_SSIZE_T_CLEAN
#include <Python.h>
//root
#include "TSystem.h"
ArachneWupperFlow::ArachneWupperFlow()
{
runWupperFlow = false; //flag to run WupperFlow
runMode = "DNN"; //Possible run-modes are DNN or GNN
modelFilepath = ""; //path to ONNX model
num_inputs = -999; //number of input variables
nnoutbins = 100; //number of nnout-bins
nnout = -999; //nnout variable
nvar = -1; //nvar
GNNnvar = -1; //number of elements in the GNNarray per event
num_classes = 1;
name = "";
train_tree_name = "";
}
ArachneWupperFlow::~ArachneWupperFlow()
{
}
void ArachneWupperFlow::setUpWupperFlow()
{
if (runMode == "GNN") {
std::cout<<"Running in mode GNN. Preparing everything for interference of the GNN model."<<std::endl;
//Print Logo
print_WupperFlow("GNN Evaluator");
//Initialize the python objects
PyObject *pName, *pModule;//, *pFunc;
PyObject *pArgs, *pValue;
PyObject* GNNList = PyList_New(0);
//Initialize the interpreter
Py_Initialize();
//Load the python module
pName = PyUnicode_DecodeFSDefault("scripts.python.WupperFlowEvaluator");
//Import the function from the python module
pModule = PyImport_Import(pName);
Py_DECREF(pName);
if (pModule != NULL) {
//Obtain the function from the python module
//pFunc is a new reference
pFunc = PyObject_GetAttrString(pModule, "main");
if (pFunc && PyCallable_Check(pFunc)) {
//Retrieve the arguments to be passed to the function
pArgs = PyTuple_New(2);
//Convert the C++ std::vector with the GNN input-vars to a python list
int size = GNNnvar;
if (size == -1) {
std::cout<<"Please provide the number of elements in the GNNarray with the GNNnvar parameter!"<<std::endl;
} else {
//Fill the python list with the GNN input-vars
for (int i=0; i<size; i++) {
PyList_Append(GNNList, PyFloat_FromDouble(2.0));
}
}
//Fill the arguments for the python function, GNN input-list and event number
PyTuple_SetItem(pArgs, 0, GNNList);
PyTuple_SetItem(pArgs, 1, PyLong_FromLong(345654323));
//Call the function and get the output
pValue = PyObject_CallObject(pFunc, pArgs);
Py_DECREF(pArgs);
Py_DECREF(GNNList);
if (pValue != NULL) {
//Get the output score of the GNN interference
nnout = PyFloat_AsDouble(pValue);
Py_DECREF(pValue);
} else {
Py_DECREF(pFunc);
Py_DECREF(pModule);
PyErr_Print();
fprintf(stderr,"Call failed\n");
}
}
else {
if (PyErr_Occurred()) {
PyErr_Print();
}
fprintf(stderr, "Cannot find function \"%s\"\n", "main");
}
// Py_XDECREF(pFunc);
Py_DECREF(pModule);
} else {
PyErr_Print();
fprintf(stderr, "Failed to load \"%s\"\n", "scripts.python.WupperFlowEvaluator");
}
}
}
double ArachneWupperFlow::evaluate(float* inputarray, ULong_t event_number)
{
if (runMode == "DNN") {
} else if (runMode == "GNN") {
PyObject *pArgs, *pValue;
PyObject* GNNList = PyList_New(0);
// Retrieve the arguments to be passed to the function
pArgs = PyTuple_New(2);
//Convert the C++ std::vector with the GNN input-vars to a python list
int size = GNNnvar;
if (size == -1) {
std::cout<<"Please provide the number of elements in the GNNarray with the GNNnvar parameter!"<<std::endl;
} else {
//Fill the python list with the GNN input-vars
for (int i=0; i<size; i++) {
PyList_Append(GNNList, PyFloat_FromDouble(inputarray[i]));
}
}
//Fill the arguments for the python function, GNN input-list and event number
PyTuple_SetItem(pArgs, 0, GNNList);
PyTuple_SetItem(pArgs, 1, PyLong_FromLong(event_number));
//Call the function and get the output
pValue = PyObject_CallObject(pFunc, pArgs);
Py_DECREF(pArgs);
Py_DECREF(GNNList);
if (pValue != NULL) {
//Get the output score of the GNN interference
nnout = PyFloat_AsDouble(pValue);
Py_DECREF(pValue);
} else {
// Py_DECREF(pFunc);
// Py_DECREF(pModule);
PyErr_Print();
fprintf(stderr,"Call failed\n");
}
}
//Return the output score
std::cout<<nnout<<std::endl;
return nnout;
}
And this is the python part:
import os
import numpy as np
import pandas as pd
import sonnet as snt
import tensorflow as tf
from graph_nets import utils_tf
from graph_nets import graphs
from graph_nets import modules
def calc_dphi_array(phi1,phi2):
...
def make_graph(event):
...
class MyMLP(snt.Module):
@tf.function()
def __init__(self,latent_size,num_layers,dropout,activation):
super(MyMLP, self).__init__(name=None)
self.mlp = snt.nets.MLP([latent_size] * num_layers, activate_final=True, dropout_rate=dropout, w_init = None, b_init = None, activation = activation)
self.ln = snt.LayerNorm(axis=-1, create_scale=True, create_offset=False)
self.use_dropout = (dropout != 0)
@tf.function()
def __call__(self, inputs):
if self.use_dropout:
outputs = self.mlp(inputs, is_training=False)
else:
outputs = self.mlp(inputs)
outputs = self.ln(outputs)
return outputs
class OutputMLP(snt.Module):
@tf.function()
def __init__(self, global_output_size = 1, latent_size=64, dropout=0.05, activation=tf.nn.leaky_relu):
super(OutputMLP, self).__init__(name=None)
self.mlp = snt.nets.MLP([latent_size, global_output_size],
name='global_output', dropout_rate = dropout, w_init = None, b_init = None, activation = activation)
self.use_dropout = (dropout != 0)
@tf.function()
def __call__(self, inputs):
if self.use_dropout:
outputs = self.mlp(inputs, is_training=False)
else:
outputs = self.mlp(inputs)
outputs = tf.sigmoid(outputs)
return outputs
def make_mlp_model(latent_size=64,num_layers=4,dropout=0.05,activation=tf.nn.leaky_relu):
return MyMLP(latent_size,num_layers,dropout,activation)
class MLPGraphIndependent(snt.Module):
"""GraphIndependent with MLP edge, node, and global models."""
@tf.function()
def __init__(self):
super(MLPGraphIndependent, self).__init__(name="MLPGraphIndependent")
self._network = modules.GraphIndependent(
edge_model_fn=make_mlp_model,
node_model_fn=make_mlp_model,
global_model_fn=make_mlp_model)
@tf.function()
def __call__(self, inputs):
return self._network(inputs)
class OutputTransform(snt.Module):
@tf.function()
def __init__(self):
super(OutputTransform, self).__init__(name="OutputTransform")
self._network = modules.GraphIndependent(
edge_model_fn = None,
node_model_fn = None,
global_model_fn = OutputMLP)
@tf.function()
def __call__(self, inputs):
return self._network(inputs)
class MLPGraphNetwork(snt.Module):
"""GraphIndependent with MLP edge, node, and global models."""
@tf.function()
def __init__(self):
super(MLPGraphNetwork, self).__init__(name="MLPGraphNetwork")
self._network = modules.GraphNetwork(
edge_model_fn=make_mlp_model,
node_model_fn=make_mlp_model,
global_model_fn=make_mlp_model)
@tf.function()
def __call__(self, inputs):
return self._network(inputs)
class MLPAttentionNetwork(snt.Module):
"""SelfAttention with MLP edge, node, and global models."""
@tf.function()
def __init__(self):
super(MLPAttentionNetwork, self).__init__(name="MLPAttentionNetwork")
self._attn = modules.SelfAttention()
@tf.function()
def __call__(self, inputs):
nodes = inputs.nodes
return self._attn(nodes,nodes,nodes,inputs)
class GeneralClassifier(snt.Module):
@tf.function()
def __init__(self):
super(GeneralClassifier, self).__init__(name="GeneralClassifier")
self._encoder = MLPGraphIndependent()
self._core = MLPGraphNetwork()
self._decoder = MLPGraphIndependent()
# Transforms the outputs into appropriate shapes.
self._output_transform = OutputTransform()
@tf.function()
def __call__(self, input_op, num_processing_steps):
latent = self._encoder(input_op)
latent0 = latent
output_ops = []
for _ in range(num_processing_steps):
core_input = utils_tf.concat([latent0, latent], axis=1)
latent = self._core(core_input)
decoded_op = self._decoder(latent)
output_ops.append(self._output_transform(decoded_op))
return output_ops
class AttentionClassifier(snt.Module):
@tf.function()
def __init__(self):
super(AttentionClassifier, self).__init__(name="AttentionClassifier")
self._encoder = MLPGraphIndependent()
self._attn = MLPAttentionNetwork()
self._core = MLPGraphNetwork()
self._decoder = MLPGraphIndependent()
# Transforms the outputs into appropriate shapes.
self._output_transform = OutputTransform()
@tf.function()
def __call__(self, input_op, num_processing_steps):
latent = self._encoder(input_op)
latent0 = latent
output_ops = []
for _ in range(num_processing_steps):
core_input = utils_tf.concat([latent0, latent], axis=1)
latent = self._core( self._attn(core_input))
decoded_op = self._decoder(latent)
output_ops.append(self._output_transform(decoded_op))
return output_ops
"""
Main executable for evaluating WupperFlow GNNs in the Arachne event-loop
"""
def main(input_list, event_number):
...
import time
start = time.time()
output = model(input_graphs_ntuple, nprocsteps)
end = time.time()
print("Eval time: "+str(end-start))
start = time.time()
output = model(input_graphs_ntuple, nprocsteps)
end = time.time()
print("Eval time: "+str(end-start))
return output[0][4].numpy()[0][0]
I have left out the non-problematic parts, the main problem is that the call of output = model(input_graphs_ntuple, nprocsteps)
takes too long, since the initialization takes some time. I call it twice here since I noticed this way that the second execution was way faster. but in the end I only want to call it once.
I have tried some optimizations of the tf code but this did not help enough, the main problem is the initialization.