I want to do an analysis by running thousands of instances. Each instance solves several optimization models, including a final one using the result of the previous models. I want to parallelize all instances to multiple processors. I have a Gurobi WLS License, which should let me do it on a distributed system. I use joblib for parallelizing, with backend as multiprocessing since Gurobi does not support multithreading. I attach below a minimal reproducible example
import numpy as np
import joblib
from joblib import Parallel, delayed
from contextlib import contextmanager
from tqdm import tqdm
import gurobipy as grb
def instance_analysis():
grb_env = grb.Env()
n_players = np.random.randint(2, 5)
player_values = dict()
for player in range(n_players):
num_extr = np.random.uniform(1, 2)
with grb.Model(env=grb_env) as max_model:
x_vars = max_model.addVar(vtype=grb.GRB.CONTINUOUS,
lb=0,
name=f"x")
### Constraints
constraints = max_model.addLConstr(lhs= x_vars,
sense=grb.GRB.LESS_EQUAL,
rhs= num_extr,
name= "limit")
### Objective
max_model.setObjective(x_vars, sense=grb.GRB.MAXIMIZE)
max_model.setParam('OutputFlag', False)
max_model.optimize()
player_values[player] = max_model.objVal
with grb.Model(env=grb_env) as min_player_model:
p_var = min_player_model.addVar(vtype=grb.GRB.CONTINUOUS,
lb=0,
name=f"p_var")
### Constraints
constraints = {ii : min_player_model.addLConstr(lhs= p_var,
sense=grb.GRB.LESS_EQUAL,
rhs= val,
name= "limit") for ii, val in player_values.items()}
### Objective
min_player_model.setObjective(p_var, sense=grb.GRB.MAXIMIZE)
min_player_model.setParam('OutputFlag', False)
min_player_model.optimize()
p_val = p_var.X
grb_env.dispose()
return p_val <= 1.5
@contextmanager
def tqdm_joblib(tqdm_object):
"""Context manager to patch joblib to report into tqdm progress bar given as argument"""
class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
def __call__(self, *args, **kwargs):
tqdm_object.update(n=self.batch_size)
return super().__call__(*args, **kwargs)
old_batch_callback = joblib.parallel.BatchCompletionCallBack
joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
try:
yield tqdm_object
finally:
joblib.parallel.BatchCompletionCallBack = old_batch_callback
tqdm_object.close()
def main():
with tqdm_joblib(tqdm(desc="Progress", total=20)) as progress_bar:
results_dict = Parallel(n_jobs=-1, verbose=0, backend='multiprocessing')(delayed(instance_analysis)() for iter in range(50000))
print(f"{round(sum(results_dict) / len(results_dict) * 100,2)}% of the instances have value less or equal than 1.5")
if __name__ == "__main__":
main()
but I get an error
Progress: 335it [00:06, 51.31it/s]
Traceback (most recent call last):
File "/gpfs/home2/fmercurio/src/mrex.py", line 82, in <module>
main()
File "/gpfs/home2/fmercurio/src/mrex.py", line 77, in main
results_dict = Parallel(n_jobs=-1, verbose=0, backend='multiprocessing')(delayed(cpu_instance_analysis)() for iter in range(50000))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/sw/arch/RHEL8/EB_production/2023/software/Python-bundle-PyPI/2023.06-GCCcore-12.3.0/lib/python3.11/site-packages/joblib/parallel.py", line 1098, in __call__
self.retrieve()
File "/sw/arch/RHEL8/EB_production/2023/software/Python-bundle-PyPI/2023.06-GCCcore-12.3.0/lib/python3.11/site-packages/joblib/parallel.py", line 975, in retrieve
self._output.extend(job.get(timeout=self.timeout))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/sw/arch/RHEL8/EB_production/2023/software/Python/3.11.3-GCCcore-12.3.0/lib/python3.11/multiprocessing/pool.py", line 774, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x1484674eb7d0>'. Reason: 'TypeError("cannot pickle 'PyCapsule' object")'
I think it is because too many environments called in a short time make the license full in little time. Is there a way to have one gurobi environment per joblib job?
Thanks!