SCS and MOSEK solver keeps running

303 views Asked by At

My application was using ECOS Solver for quite a long time, all of sudden we started getting infeasible solution and thus ending up with solver errors. Looking at few stacks and suggestions online, i saw recommendations for MOSEK and SCS solvers.

I tried replacing my ECOS to SCS and MOSEK Solvers, but my runs never ends. Usually my run ends in 2 hours but after replacing it ran around 8 hours and never ends. Please suggest me.

Below are the params,

'solver': {'name': 'MOSEK', 'backup_name': 'SCS', 'verbose': True, 'max_iters': 3505}

Kindly help

ERROR LOG:

Job aborted due to stage failure: Task 1934 in stage 6.0 failed 4 times, most recent failure: Lost task 1934.3 in stage 6.0 (TID 5028, ip-10-219-208-218.ec2.internal, executor 1): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/model.py", line 262, in fit raise SolverError cvxpy.error.SolverError

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/expressions/constants/constant.py", line 243, in extremal_eig_near_ref ev = SA_eigsh(sigma) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/expressions/constants/constant.py", line 238, in SA_eigsh return eigsh(A, k=1, sigma=sigma, return_eigenvectors=False) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 1687, in eigsh params.iterate() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 571, in iterate self._raise_no_convergence() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 377, in _raise_no_convergence raise ArpackNoConvergence(msg % (num_iter, k_ok, self.k), ev, vec) scipy.sparse.linalg.eigen.arpack.arpack.ArpackNoConvergence: ARPACK error -1: No convergence (361 iterations, 0/1 eigenvectors converged)

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/pyspark.zip/pyspark/worker.py", line 377, in main process() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/pyspark.zip/pyspark/worker.py", line 372, in process serializer.dump_stream(func(split_index, iterator), outfile) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream vs = list(itertools.islice(iterator, batch)) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/pyspark.zip/pyspark/util.py", line 113, in wrapper return f(*args, **kwargs) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000001/py_dependencies.zip/pyspark_scripts/spark_tf_pipeline.py", line 49, in File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/tf_get_from_smu_records.py", line 38, in tf_get_from_smu_records data_points, current_date_string, params) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/tf_get_from_smu_records.py", line 24, in tf_get_outputs_from_smu_records model_output, _ = fit_model(ts_wrapper, params) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/fit_model.py", line 13, in fit_model machine_model.fit() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/machine_model.py", line 62, in fit self._fit() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/machine_model.py", line 120, in _fit self.model.fit() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/model.py", line 267, in fit self._fit(self.solver_params['backup_name']) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/py_dependencies.zip/cat/tf/tf_model/model.py", line 245, in _fit feastol_inacc=tols['feastol_inacc']) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/problems/problem.py", line 401, in solve return solve_func(self, *args, **kwargs) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/problems/problem.py", line 818, in _solve self, data, warm_start, verbose, kwargs) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/reductions/solvers/solving_chain.py", line 341, in solve_via_data solver_opts, problem._solver_cache) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/reductions/solvers/conic_solvers/cvxopt_conif.py", line 162, in solve_via_data if self.remove_redundant_rows(data) == s.INFEASIBLE: File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/reductions/solvers/conic_solvers/cvxopt_conif.py", line 286, in remove_redundant_rows eig = extremal_eig_near_ref(gram, ref=TOL) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/expressions/constants/constant.py", line 247, in extremal_eig_near_ref ev = SA_eigsh(sigma) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/cvxpy/expressions/constants/constant.py", line 238, in SA_eigsh return eigsh(A, k=1, sigma=sigma, return_eigenvectors=False) File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 1687, in eigsh params.iterate() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 571, in iterate self._raise_no_convergence() File "envpath/appcache/application_1618545751422_0044/container_1618545751422_0044_02_000002/miniconda/envs/project/lib/python3.6/site-packages/scipy/sparse/linalg/eigen/arpack/arpack.py", line 377, in _raise_no_convergence raise ArpackNoConvergence(msg % (num_iter, k_ok, self.k), ev, vec) scipy.sparse.linalg.eigen.arpack.arpack.ArpackNoConvergence: ARPACK error -1: No convergence (361 iterations, 0/1 eigenvectors converged)

at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:227)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$3.apply(ShuffleExchangeExec.scala:283)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$3.apply(ShuffleExchangeExec.scala:252)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:

0

There are 0 answers