I am trying to compute the gradient of a Gaussian Process when the training data set is given by N X D in MATLAB. Where N is the number of data and D is the number of columns. The Python code for computing the gradient is given here. https://stats.stackexchange.com/questions/373446/computing-gradients-via-gaussian-process-regression/479474#479474?newreg=74fa203413cf486a84cb0a4470e6ebdc
I implemented the Python code for multiple X and it works well:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
#from sklearn.model_selection import train_test_split
from sklearnex import patch_sklearn
patch_sklearn()
#%matplotlib
N = 100;
x1 = np.linspace(0,10,N)
x2 = np.linspace(0,5,N)
sigma_n = 1e-8
x = (x1,x2)
#x = np.dstack(x)
x = np.array(x)
X_train = np.transpose(x)
y_train = np.sin(X_train[:,0]**2 + X_train[:,1]**2)
#y_train = 4*x1**3 + x2**3
#%
x1_test = np.linspace(0,10,200)
x2_test = np.linspace(0,5,200)
X_test = (x1_test, x2_test)
X_test = np.array(X_test)
X_test = np.transpose(X_test)
y_1_prime_original = 2*x1_test*np.cos(x1_test**2 + x2_test**2)
y_2_prime_original = 2*x2_test*np.cos(x1_test**2 + x2_test**2)
#y_1_prime_original = 12*x1_test**2
#y_2_prime_original = 3* x2_test**2
#% GP MODEL
#kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
kernel = 1.0 * RBF(1)
#kernel = RBF(length_scale=0.1,length_scale_bounds=(1e-5,1e5))
gp_model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100).fit(X_train, y_train)
k2_l = gp_model.kernel_.get_params()['k2__length_scale']
# not necessary to do predict, but now y_pred has correct shape
y_pred, sigma = gp_model.predict(X_test, return_std=True)
Nn = X_test.shape[0]
y_pred_grad = np.zeros((Nn,2))
# set of points where gradient is to be queried
alpha_p = gp_model.alpha_
# loop over each point that a gradient is needed
for key, x_star in enumerate(X_test):
# eval_gradient can't be true when eval site doesn't match X
# this gives standard RBF kernel evaluations
k_val = gp_model.kernel_(X_train, np.atleast_2d(x_star), eval_gradient=False)#.ravel()
# x_i - x_star / l^2
x_diff_over_l_sq = ((X_train-x_star)/np.power(k2_l,2))#.ravel()
#x_diff1 = x_diff_over_l_sq[:,0].reshape(-1, 1)
#x_diff2 = -x_diff_over_l_sq[:,1].reshape(-1, 1)
#x_diff_over_l_sq1 = np.hstack((x_diff1, x_diff2))
# pair-wise multiply
intermediate_result = np.multiply(k_val, x_diff_over_l_sq)
# dot product intermediate_result with the alphas
#final_result = np.dot(intermediate_result, alpha_p)
final_result = np.dot(alpha_p.T, intermediate_result)
final_result *= gp_model._y_train_std
final_result = final_result.reshape(1, -1)
# store gradient at this point
y_pred_grad[key,:] = final_result
#% PLOT GRAPH
y_train = y_train.reshape(-1, 1)
plt.figure(1)
ax = plt.axes(projection='3d')
ax.scatter(X_train[:,0],X_train[:,1], y_train,marker='.', linewidth=2)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.title('Prediction of original function')
#
# PLOT GRAPH
#"""
fig = plt.figure(2)
ax = plt.axes(projection='3d')
ax.plot3D(X_test[:,0], X_test[:,1],y_1_prime_original ,'green', linewidth=2)
ax.scatter(X_test[:,0], X_test[:,1], y_pred_grad[:,0],color='red',marker='x', linewidth=2)
ax.set_xlabel('x')
ax.set_ylabel('y')
#plt.grid()
#ax.set(xlim=(0, 3600))
#ylim=(0, 8), yticks=np.arange(1, 8))
plt.legend([ "original function", "y_pred_grad_1"], loc ="upper right")
plt.title('Gradient of y/x1')
#"""
#"""
fig = plt.figure(3)
ax = plt.axes(projection='3d')
ax.plot3D(X_test[:,0], X_test[:,1],y_2_prime_original ,'green', linewidth=2)
ax.scatter(X_test[:,0], X_test[:,1], y_pred_grad[:,1],color='red',marker='x', linewidth=2)
ax.set_xlabel('x')
ax.set_ylabel('y')
#plt.grid()
#ax.set(xlim=(0, 3600))
#ylim=(0, 8), yticks=np.arange(1, 8))
plt.legend([ "original function", "y_pred_grad_1"], loc ="upper right")
plt.title('Gradient of y/x2')
plt.show()
#"""
I have been trying to write similar code in MATLAB as below, without any success:
N = 100;
x1 = linspace(0, 10, N);
x2 = linspace(0, 5, N);
sigma_n = 1e-8;
X_train = [x1; x2];
X_train = X_train';
y_train = sin(X_train(:, 1).^2 + X_train(:, 2).^2);
x1_test = linspace(0, 10, 200);
x2_test = linspace(0, 5, 200);
X_test = [x1_test; x2_test];
X_test = X_test';
y_1_prime_original = 2 * x1_test .* cos(x1_test.^2 + x2_test.^2);
y_2_prime_original = 2 * x2_test .* cos(x1_test.^2 + x2_test.^2);
%%
my_gpr = fitrgp(X_train,y_train);
theta = my_gpr.KernelInformation.KernelParameters;
sigma_l = theta(1);
sigma_f = theta(2);
alpha_P = my_gpr.Alpha;
x_train = my_gpr.X ;
Nn = size(X_test,1); %Dimension of Test data
gpr_grad = zeros(Nn, 2);
%%
for idx = 1 : Nn
dist = X_test(idx,:) - x_train ;
gpr_grad(idx, :) = GPR_gradient(X_test(idx,:),my_gpr );
% K_x_star =sigma_f.* exp(-(dist).^2./ (2*sigma_l^2));
% K_x_star = squared_exponential_kernel( X_test(idx,:),x_train, theta);
% K_x_star = K_x_star';
% K_x_star = K_x_star';
%
% K_x_prime = (K_x_star.* (-dist / sigma_l^2));
%
% gpr_grad(idx, :) = alpha_P' * K_x_prime ;
end
%%
figure;
plot3(X_test(:, 1), X_test(:, 2), y_1_prime_original, 'g', 'LineWidth', 2);
hold on;
scatter3(X_test(:, 1), X_test(:, 2), gpr_grad(:, 2), 'r', 'x', 'LineWidth', 2);
xlabel('x');
ylabel('y');
zlabel('z');
legend('Original function', 'y\_pred\_grad\_1', 'Location', 'northwest');
title('Gradient of y/x1');
%%
function K_x = squared_exponential_kernel(a, b, theta)
sigma_l = theta(1);
sigma_f = theta(2);
% sigma_f = 1;
sqdist = sum(a.^2, 2) + sum(b.^2, 2).' - 2 * (a * b.');
K_x = sigma_f^2 * exp(-0.5 * (1/sigma_l^2) * sqdist);
end
function gpr_grad = GPR_gradient(X_test, my_gpr)
theta = my_gpr.KernelInformation.KernelParameters;
sigma_l = theta(1);
% sigma_f = theta(2);
alpha_P = my_gpr.Alpha;
x_train = my_gpr.X ;
Kx_star = mykernel(X_test,x_train,theta);
dist = (X_test - x_train)./ sigma_l^2;
intermediate_result = Kx_star' .* dist;
gpr_grad = alpha_P' * intermediate_result;
%
%
% K_x_star =sigma_f.* exp(-(dist).^2./ (2*sigma_l^2));
%
% % K_x_star = exp(-(dist).^2./ (2*sigma_l^2));
%
% K_x_prime = (K_x_star.* (-dist / sigma_l^2));
%
% gpr_grad = alpha_P' * K_x_prime ;
end
function KMN = mykernel(XM,XN,theta)
%https://www.mathworks.com/matlabcentral/answers/253605-kernel-custom-problem-firgp
%mykernel - Compute sum of squared exponential and squared exponential ARD.
% KMN = mykernel(XM,XN,theta) takes a M-by-D matrix XM, a N-by-D matrix
% XN and computes a M-by-N matrix KMN of kernel products such that
% KMN(i,j) is the kernel product between XM(i,:) and XN(j,:). theta is
% the R-by-1 unconstrained parameter vector for the kernel.
%
% Let theta = [log(sigmaL1);log(sigmaL2);log(sigmaF1);log(sigmaF2)]
%
% where
%
% sigmaL1 = D-by-1 vector of length scales for squared exponential ARD.
% sigmaL2 = scalar length scale for squared exponential.
% sigmaF1 = scalar signal standard deviation for squared exponential ARD.
% sigmaF2 = scalar signal standard deviation for squared exponential.
% 1. Get D. Assume XN, XM have the same number of columns.
D = size(XM,2);
% 2. Convert theta into sigmaL1, sigmaL2, sigmaF1 and sigmaF2...
sigma_l = theta(1);
sigma_f = theta(2);
% 3. Create the contribution due to squared exponential.
KMN = pdist2(XM(:,1)/sigma_l, XN(:,1)/sigma_l).^2;
for r = 2:D
KMN = KMN + pdist2(XM(:,r)/sigma_l, XN(:,r)/sigma_l).^2;
end
KMN = (sigma_f^2)*exp(-0.5*KMN);
% 4. Add the contribution due to squared exponential.
% KMN = KMN + (sigmaF2^2)*exp(-0.5*(pdist2(XM/sigmaL2, XN/sigmaL2).^2));
end
The result is not correct. Does any one have a code for GP gradient in MATLAB?