Gaussian Process Gradient when X_training is given by N X D dataset

47 views Asked by At

I am trying to compute the gradient of a Gaussian Process when the training data set is given by N X D in MATLAB. Where N is the number of data and D is the number of columns. The Python code for computing the gradient is given here. https://stats.stackexchange.com/questions/373446/computing-gradients-via-gaussian-process-regression/479474#479474?newreg=74fa203413cf486a84cb0a4470e6ebdc

I implemented the Python code for multiple X and it works well:

import numpy as np

from matplotlib import pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

#from sklearn.model_selection import train_test_split

from sklearnex import patch_sklearn
patch_sklearn()
#%matplotlib


N = 100;

x1 = np.linspace(0,10,N)
x2 = np.linspace(0,5,N)

sigma_n = 1e-8

x = (x1,x2)
#x = np.dstack(x)
x = np.array(x)

X_train = np.transpose(x)

y_train = np.sin(X_train[:,0]**2 + X_train[:,1]**2)
#y_train = 4*x1**3 + x2**3


#%

x1_test = np.linspace(0,10,200)
x2_test = np.linspace(0,5,200)

X_test = (x1_test, x2_test)


X_test = np.array(X_test)
X_test = np.transpose(X_test)

y_1_prime_original = 2*x1_test*np.cos(x1_test**2 + x2_test**2)
y_2_prime_original = 2*x2_test*np.cos(x1_test**2 + x2_test**2)

#y_1_prime_original = 12*x1_test**2
#y_2_prime_original = 3* x2_test**2




#% GP MODEL


#kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
kernel =  1.0 * RBF(1)
#kernel =  RBF(length_scale=0.1,length_scale_bounds=(1e-5,1e5))
gp_model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100).fit(X_train, y_train)

k2_l = gp_model.kernel_.get_params()['k2__length_scale']




# not necessary to do predict, but now y_pred has correct shape
y_pred, sigma = gp_model.predict(X_test, return_std=True)


Nn = X_test.shape[0]

y_pred_grad = np.zeros((Nn,2))

# set of points where gradient is to be queried

alpha_p = gp_model.alpha_

# loop over each point that a gradient is needed
for key, x_star in enumerate(X_test):
    # eval_gradient can't be true when eval site doesn't match X
    # this gives standard RBF kernel evaluations
    k_val = gp_model.kernel_(X_train, np.atleast_2d(x_star), eval_gradient=False)#.ravel()

    # x_i - x_star / l^2
    x_diff_over_l_sq = ((X_train-x_star)/np.power(k2_l,2))#.ravel()
    
    #x_diff1 = x_diff_over_l_sq[:,0].reshape(-1, 1)
    #x_diff2 = -x_diff_over_l_sq[:,1].reshape(-1, 1)
    
    #x_diff_over_l_sq1 = np.hstack((x_diff1, x_diff2))

    # pair-wise multiply
    intermediate_result = np.multiply(k_val, x_diff_over_l_sq)

    # dot product intermediate_result with the alphas
    
    
    #final_result = np.dot(intermediate_result, alpha_p)
    final_result = np.dot(alpha_p.T, intermediate_result)
    
    final_result *= gp_model._y_train_std
    
    final_result = final_result.reshape(1, -1)

    # store gradient at this point
    
    y_pred_grad[key,:] = final_result


#% PLOT GRAPH

y_train = y_train.reshape(-1, 1)
plt.figure(1)
ax = plt.axes(projection='3d')

ax.scatter(X_train[:,0],X_train[:,1], y_train,marker='.', linewidth=2)


ax.set_xlabel('x')
ax.set_ylabel('y')

plt.title('Prediction of original function')

#

# PLOT GRAPH

#"""
fig = plt.figure(2)
ax = plt.axes(projection='3d')

ax.plot3D(X_test[:,0], X_test[:,1],y_1_prime_original ,'green', linewidth=2)
ax.scatter(X_test[:,0], X_test[:,1], y_pred_grad[:,0],color='red',marker='x', linewidth=2)

ax.set_xlabel('x')
ax.set_ylabel('y')
#plt.grid()
#ax.set(xlim=(0, 3600))
       #ylim=(0, 8), yticks=np.arange(1, 8))
plt.legend([ "original function", "y_pred_grad_1"], loc ="upper right")
plt.title('Gradient of y/x1') 
#"""

#"""
fig = plt.figure(3)
ax = plt.axes(projection='3d')

ax.plot3D(X_test[:,0], X_test[:,1],y_2_prime_original ,'green', linewidth=2)
ax.scatter(X_test[:,0], X_test[:,1], y_pred_grad[:,1],color='red',marker='x', linewidth=2)

ax.set_xlabel('x')
ax.set_ylabel('y')
#plt.grid()
#ax.set(xlim=(0, 3600))
       #ylim=(0, 8), yticks=np.arange(1, 8))
plt.legend([ "original function", "y_pred_grad_1"], loc ="upper right")
plt.title('Gradient of y/x2')
plt.show()
#"""

enter image description here

I have been trying to write similar code in MATLAB as below, without any success:

N = 100;

x1 = linspace(0, 10, N);
x2 = linspace(0, 5, N);

sigma_n = 1e-8;

X_train = [x1; x2];
X_train = X_train';

y_train = sin(X_train(:, 1).^2 + X_train(:, 2).^2);

x1_test = linspace(0, 10, 200);
x2_test = linspace(0, 5, 200);

X_test = [x1_test; x2_test];
X_test = X_test';

y_1_prime_original = 2 * x1_test .* cos(x1_test.^2 + x2_test.^2);
y_2_prime_original = 2 * x2_test .* cos(x1_test.^2 + x2_test.^2);

%%

my_gpr = fitrgp(X_train,y_train);
theta = my_gpr.KernelInformation.KernelParameters;
sigma_l = theta(1);
sigma_f = theta(2);

alpha_P = my_gpr.Alpha;
x_train = my_gpr.X  ;

Nn = size(X_test,1);  %Dimension of Test data 
gpr_grad = zeros(Nn, 2);




%%
for idx = 1 : Nn
 
    dist =  X_test(idx,:) - x_train ;
    gpr_grad(idx, :) = GPR_gradient(X_test(idx,:),my_gpr ); 

%     K_x_star =sigma_f.* exp(-(dist).^2./ (2*sigma_l^2));

%     K_x_star = squared_exponential_kernel( X_test(idx,:),x_train, theta);

%     K_x_star = K_x_star';
    
   

%     K_x_star = K_x_star';
% 
%     K_x_prime = (K_x_star.* (-dist / sigma_l^2));
% 
%     gpr_grad(idx, :) =  alpha_P' * K_x_prime ;

end
%%
figure;
plot3(X_test(:, 1), X_test(:, 2), y_1_prime_original, 'g', 'LineWidth', 2);
hold on;
scatter3(X_test(:, 1), X_test(:, 2), gpr_grad(:, 2), 'r', 'x', 'LineWidth', 2);
xlabel('x');
ylabel('y');
zlabel('z');
legend('Original function', 'y\_pred\_grad\_1', 'Location', 'northwest');
title('Gradient of y/x1');


%%

function K_x = squared_exponential_kernel(a, b, theta)

    sigma_l = theta(1);
    sigma_f = theta(2);
%     sigma_f = 1;

    sqdist = sum(a.^2, 2) + sum(b.^2, 2).' - 2 * (a * b.');
    K_x = sigma_f^2 * exp(-0.5 * (1/sigma_l^2) * sqdist);


end

function gpr_grad = GPR_gradient(X_test, my_gpr)

    theta = my_gpr.KernelInformation.KernelParameters;
    sigma_l = theta(1);
%     sigma_f = theta(2);

    alpha_P = my_gpr.Alpha;
    x_train = my_gpr.X  ;

    Kx_star =  mykernel(X_test,x_train,theta);

    dist = (X_test - x_train)./ sigma_l^2;

    intermediate_result =  Kx_star' .* dist;

    gpr_grad = alpha_P' * intermediate_result;
% 
%       
%     K_x_star =sigma_f.* exp(-(dist).^2./ (2*sigma_l^2));
% 
% %     K_x_star = exp(-(dist).^2./ (2*sigma_l^2));
% 
%     K_x_prime = (K_x_star.* (-dist / sigma_l^2));
% 
%     gpr_grad =  alpha_P' * K_x_prime ;


end

function KMN = mykernel(XM,XN,theta)
%https://www.mathworks.com/matlabcentral/answers/253605-kernel-custom-problem-firgp

%mykernel - Compute sum of squared exponential and squared exponential ARD.
%   KMN = mykernel(XM,XN,theta) takes a M-by-D matrix XM, a N-by-D matrix
%   XN and computes a M-by-N matrix KMN of kernel products such that
%   KMN(i,j) is the kernel product between XM(i,:) and XN(j,:). theta is
%   the R-by-1 unconstrained parameter vector for the kernel.
%
%   Let theta = [log(sigmaL1);log(sigmaL2);log(sigmaF1);log(sigmaF2)]
%
%   where
%
%   sigmaL1 = D-by-1 vector of length scales for squared exponential ARD.
%   sigmaL2 = scalar length scale for squared exponential.
%   sigmaF1 = scalar signal standard deviation for squared exponential ARD.
%   sigmaF2 = scalar signal standard deviation for squared exponential.
      % 1. Get D. Assume XN, XM have the same number of columns.
      D = size(XM,2);
      % 2. Convert theta into sigmaL1, sigmaL2, sigmaF1 and sigmaF2...

      sigma_l = theta(1);
      sigma_f = theta(2);

    
      % 3. Create the contribution due to squared exponential.    
      KMN = pdist2(XM(:,1)/sigma_l, XN(:,1)/sigma_l).^2;
      for r = 2:D
          KMN = KMN + pdist2(XM(:,r)/sigma_l, XN(:,r)/sigma_l).^2;        
      end
      KMN = (sigma_f^2)*exp(-0.5*KMN);
      % 4. Add the contribution due to squared exponential.
     % KMN = KMN + (sigmaF2^2)*exp(-0.5*(pdist2(XM/sigmaL2, XN/sigmaL2).^2));       
  end

The result is not correct. Does any one have a code for GP gradient in MATLAB? enter image description here

0

There are 0 answers