I want to generate say about 500 samples of 36 variables using Latin Hypercube sampling to ensure good coverage of the parameter spaces.
I also want to ensure each sample meets some constraint.
from scipy.stats import qmc
import numpy as np
def unpack(params: np.ndarray) -> tuple[
float, # p
np.ndarray, # variable means
np.ndarray, # standard errors
np.ndarray, # correlation coefficients
np.ndarray, # covariance
]:
(p,), means, dev_diag, X_triu = np.split(params, (1, 8, 15))
dev = np.diag(dev_diag)
n = dev_diag.size
X = np.zeros((n, n))
X[np.triu_indices(n=n, k=1)] = X_triu
X += X.T + np.eye(n)
cov = dev @ X @ dev
return p, means, dev, X, cov
def positive_definite(params: np.ndarray) -> np.ndarray:
p, means, dev, X, cov = unpack(params)
return np.real(np.linalg.eigvals(cov))
def init_pop_qml(bounds, popsize,multipler=10):
limits = np.array(bounds, dtype='float').T
parameter_count = len(bounds)
num_population_members = popsize * parameter_count
sampler = qmc.LatinHypercube(d=parameter_count,seed=123456)
sample = sampler.random(n=num_population_members*multipler)
l_bounds = limits[0]
u_bounds = limits[1]
samples = qmc.scale(sample, l_bounds, u_bounds)
checks = np.zeros((num_population_members*multipler, 7))
for row in range(num_population_members*multipler):
check = positive_definite(samples[row, :])
checks[row, :] = check
population = samples[(checks > 0).all(axis=1), :]
while population.shape[0] < num_population_members:
sample2 = sampler.random(n=num_population_members*multipler)
samples2 = qmc.scale(sample2, l_bounds, u_bounds)
checks2 = np.zeros((num_population_members*multipler, 7))
for row in range(num_population_members*multipler):
check2 = positive_definite(samples2[row, :])
checks2[row,:] = check2
population2 = samples2[(checks2 > 0).all(axis=1), :]
population = np.vstack((population, population2))
print(population.shape)
return population[0:num_population_members,:]
bounds = (((0.0, 1.0),) * 8 + ((0.0, 1.0),) + ((0.0, 0.5),) * 6 + ((-1.0,1.0),)*21)
init_pop=init_pop_qml(bounds, 15, 10)
print(init_pop)
First, this takes quite long time to get 500 samples.
Second, the samples in the end may not have good coverage of the parameter space because of the constraint, so using constraint may lose the purpose of using Latin Hypercube sampling.
How can I improve sampling here?
My question is related to this, but there is no answer.