I am using the below functions to call lbfgs using RcppEnsmallen to maximize a function defined by computeQval() :
double Qfunc::EvaluateWithGradient(const arma::mat& x, arma::mat& g)
{
// Initialize the function value and gradient vector
Eigen::Map<const Eigen::MatrixXd> eigen_X(x.memptr(), x.n_rows, x.n_cols);
Eigen::VectorXd x_vec = eigen_X.col(0);
double Q = 0.0;
std::vector<Eigen::MatrixXd> inputWeightArray = listMat(inputWeightArrayList);
Eigen::MatrixXd inputWeightArray_st = inputWeightArray[curr_state];
Eigen::MatrixXd inputWeightArray_st_clone(inputWeightArray_st);
inputWeightArray_st_clone.col(next_state) = x_vec;
//Compute function value for input x_vec
Q = computeQval(x_vec);
if(std::isinf(Q))
{
Q = -1000000.0;
}
//Compute the gradient for input x_vec
Eigen::VectorXd grad = derivateQfunc(x_vec);
Eigen::MatrixXd y = Eigen::Map<Eigen::MatrixXd>(grad.data(), grad.size(), 1);
g = arma::mat(y.data(), y.rows(), y.cols(), false, false);
// Return the function value as a numeric scalar
return (-Q);
}
// [[Rcpp::export]]
arma::mat optimizeQfunc_arma(Rcpp::NumericVector x, Rcpp::List& inputWeightArrayList,
const Rcpp::DataFrame& binned_spiketrain,
Rcpp::List& log_xi,
Rcpp::NumericMatrix log_gamma,
Rcpp::NumericMatrix probMatrix,
int curr_state, int next_state, int n_observations,int n_states)
{
Eigen::VectorXd eigen_x = (as<Eigen::Map<Eigen::VectorXd> >(x));
std::vector<Eigen::MatrixXd> inputWeightArray = listMat(inputWeightArrayList);
std::vector<Eigen::MatrixXd> eigen_log_xi = listMat(log_xi);
Eigen::Map<Eigen::MatrixXd> eigen_ProbMatrix(as<Eigen::Map<Eigen::MatrixXd> >(probMatrix));
Eigen::Map<Eigen::MatrixXd> eigen_log_gamma(as<Eigen::Map<Eigen::MatrixXd> >(log_gamma));
Qfunc q(inputWeightArrayList,
eigen_ProbMatrix,
binned_spiketrain,
eigen_log_xi,
eigen_log_gamma,
curr_state, next_state,n_observations, n_states);
Rcout << q.derivateQfunc(eigen_x) << std::endl;
Rcout << q.computeQval(eigen_x) << std::endl;
arma::vec vec_data = Rcpp::as<arma::vec>(x);
arma::mat x_mat = vec_data;
ens::L_BFGS optimizer;
optimizer.Optimize(q, x_mat);
return x_mat;
}
RcppEnsmallen optimizer evaluated the function and gradient, but does not do any update and returns the same initial values:
Optimization Report
--------------------------------------------------------------------------------
Initial coordinates:
2.3434 0.0127 -4.7116 ... -4.7017
Final coordinates:
2.3434 0.0127 -4.7116 ... -4.7017
iter loss loss change |gradient| total time
--------------------------------------------------------------------------------
Version:
ensmallen: 2.19.1 (Eight Ball Deluxe)
armadillo: 12.6.4 (Cortisol Retox)
Function:
Coordinates rows: 12
Coordinates columns: 1
Loss:
Initial 92.452
Final 92.452
Change 0.000
Optimizer:
Iterations: 0 (No steps taken! Did the optimization fail?)
Evaluate calls: 51
Gradient calls: 51
Time (in seconds): 14.024
However, R optim converges to a local minima:
Browse[4]> res<-optim(par = inputWeightArray[,n,m], Qfunc,derivateQfunc, method="L-BFGS-B",
+ inputWeightArray,binned_spiketrain,log_xi,log_gamma,curr_state=1,next_state=2,control = list(fnscale=-1))
Browse[4]> res
$par
[1] 4.8103809 0.9332788 -3.4781151 -7.8554820 9.4818493 10.0474750 -13.2142134 10.3501064 5.7930575
[10] 10.4973834 8.7745170 -7.5360977
$value
[1] -83.6604
$counts
function gradient
17 17
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
To test the if the function and gradient are computed correctly in Rcpp, I used the Rcpp functions QTest and derivTest to call computeQval and derivateQfunc respectively, which was used with RcppEnsmallen:
Browse[4]> res<-optim(par = inputWeightArray[,n,m],QTest,derivTest, method="L-BFGS-B",
+ inputWeightArray=inputWeightArrayList,binned_spiketrain=binned_spiketrain,probMatrix=probMatrix,log_xi=log_xi,log_gamma=log_gamma,curr_state=0,next_state=1,n_observations=n_observations,n_states=n_states,control = list(fnscale=-1))
Browse[4]> res
$par
[1] 4.8103809 0.9332788 -3.4781151 -7.8554820 9.4818493 10.0474750 -13.2142134 10.3501064 5.7930575
[10] 10.4973834 8.7745170 -7.5360977
$value
[1] -83.6604
$counts
function gradient
17 17
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH""
I don't understand why R optim is able to find a minima, whereas RcppEnsmallen is mostly searching around the intial parameter values (checked with print statements during optimization) ? Is it due to difference in the implementation of lbfgs in both libraries or am I doing some wrong configuration with RcppEnsmallen?