My goal is to create a difference equation to fit a set of data. After establishing the difference equation, I need to find a set of parameters and initial conditions that make the calculated results as close as possible to the fitted data, with closeness measured by Euclidean distance. Essentially, this is a straightforward minimization problem. I have tried using algorithms like simulated annealing and genetic algorithms to solve it, but due to the large scale of the problem, my computer is unable to handle it.
My code:
import numpy as np
import pandas as pd
from skopt import gbrt_minimize
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
import random
import numpy as np
from scipy.optimize import minimize
inf = 1e10
# My data
ss = [57.0 , 34.66666667 , 35.83333333 , 62.33333333 , 44.16666667 , 49.5 ,
56.0 , 56.83333333 , 59.0 , 60.16666667 , 56.83333333, 55.66666667 ,
69.5 , 69.5 , 68.5 , 65.83333333 , 64.0 , 57.0 ,
67.16666667 , 53.83333333]
ee = [35.0 , 41.66666667 , 100.0 , 98.33333333 , 80.0 ,
136.66666667 , 51.66666667 , 68.33333333 , 128.33333333 , 125.0 ,
121.66666667 , 93.33333333 , 36.66666667 , 55.0 , 56.66666667 ,
130.0 , 101.66666667 , 95.0 , 80.0 , 81.66666667]
F = [0.0 for _ in range(29)]
M = [0.0 for _ in range(29)]
N = [0.0 for _ in range(29)]
E = [0.0 for _ in range(29)]
thelta_N = [0.0 for _ in range(29)]
thelta_F = [0.0 for _ in range(29)]
thelta_M = [0.0 for _ in range(29)]
def dist(k , ss , ee): # k is a 21-element array
for x in k:
if x <= 0:
return inf
M[0] = k[6]
F[0] = k[7]
N[0] = k[8]
E[0] = k[9] # 1 for 10,000,000
thelta_F[0] = k[10]
thelta_F[1] = k[11]
thelta_F[2] = k[12]
thelta_F[3] = k[13]
thelta_M[0] = k[14]
thelta_M[1] = k[15]
thelta_M[2] = k[16]
thelta_M[3] = k[17]
N[0] = k[18]
N[1] = k[19]
N[2] = k[20]
N[3] = k[0]
for t in range(1, 21):
if (N[t - 1] - F[t - 1] - M[t - 1] <= 0):
return inf
if k[4] * E[0] - N[0] <= 0:
return inf
if t >= 4 and N[t - 1] - F[t - 1] - M[t - 1] - thelta_N[t - 2] - thelta_N[t - 3] - thelta_N[t - 4] <= 0:
return inf
if t >= 3:
sE = E[t - 3] + E[t - 2] + E[t - 1]
thelta_M[t] = k[2] * sE * thelta_N[t - 3] / (sE + k[2]) ** 2
thelta_F[t] = sE ** 2 * thelta_N[t - 3] / (sE + k[2]) ** 2
N[t] = N[t - 1] + thelta_N[t - 1] - thelta_N[t - 4] + thelta_M[t - 1] + thelta_F[t - 1] - 2 * k[3] * min(
M[t - 1], F[t - 1])
F[t] = F[t - 1] + thelta_F[t - 1] - k[3] * min(M[t - 1], F[t - 1])
M[t] = M[t - 1] + thelta_M[t - 1] - k[3] * min(M[t - 1], F[t - 1])
E[t] = k[5] + E[t - 1] - N[t] / k[4]
thelta_N[t] = k[1] * k[3] * min(M[t], F[t]) * (k[4] * E[t] - N[t]) / (
k[4] * E[t] - N[t] + k[1] * k[3] * min(M[t], F[t]))
if np.isinf(thelta_N[t]):
return inf
d = 0
for t in range(0, 20):
d += (M[t] / (F[t] + M[t]) - ss[t]) ** 2 + (thelta_N[t] - ee[t]) ** 2
#calculate the error
return d / 40
#k0 = [98,4,1,0.3,150,1,50,50,121,1,1,9,2,7,2,9,3,7,121,121,113]
"""k0 = [100.28346158, 6.60466344, 5.69687238, 0.79787653, 186.81979229,
2.03047995, 57.72947785, 57.60329536, 121.0, 1.06726908,
3.93969287, 6.06958948, 13.3043024, 7.0, 11.19179567,
17.05771766, 2.59326457, 7.0, 121.0, 119.20370156,
103.00356794]"""
#k0 = [100.28346158,6.60466344,5.69687238,0.79787653,186.81979229,2.03047995,57.72947785,57.60329536,121.0,1.06726908,3.93969287,6.06958948,13.3043024,7.0,11.19179567,17.05771766,2.59326457,7.0,121.0,119.20370156,103.00356794]
#k0 = [100.28346158,6.60466344,5.69687238,0.79787653,186.81979229,2.03047995,57.72947785,57.60329536,121.0,1.06726908,3.93969287,6.06958948,13.3043024,7.0,11.19179567,17.05771766,2.59326457,7.0,121.0,119.20370156,103.00356794]
k0 = [100.28346158,6.60466344,5.69687238,0.79787653,186.81979229,2.03047995,57.72947785,57.60329536,121.0,1.06726908,3.93969287,6.06958948,13.3043024,7.0,11.19179567,17.05771766,2.59326457,7.0,121.0,119.20370156,103.00356794]
# Bounds for k
bounds = [(0, 250) for _ in range(21)]
# Constraints
cons = {'type': 'eq', 'fun': lambda k: dist(k, ss, ee)}
# Minimize the function
res = minimize(dist, k0, args=(ss, ee), bounds=bounds, constraints=cons)
# Print the result
print(f"The minimal value of dist(k) is {res.fun:.4f}.")
print(f"The value of k that minimizes dist(k) is.")
print ("[" , end = '')
for x in k0:
print(x , end = ',')
print("]" , end = '')
This problem is essentially a simple minimization problem. I have tried algorithms such as simulated annealing and genetic algorithms to solve it, but due to the large scale of this problem, my computer cannot handle it. I understand that I can optimize by limiting the solution space, but I haven't found a good way to limit the solution space to a convex polygon. I don't like using the method of penalty functions because it causes a large number of positions in my function to take the value of infinity (inf), leading the computer to mistakenly believe that the function is constantly inf as it only receives inf as the function value. Most of my parameters are essentially initial values, but since these initial values do not appear in the data, they must be given through calculation, which is not easy. This hinders me from solving the problem.