I am attempting to write the code for PLS
from scratch without using the scikit
function. I obtain the following dimension problem that arises
from the coefficient computation ---> 56 self.coef_ = np.dot(self.x_weights_, self.y_weights_.T)
....
ValueError: shapes (2,3) and (2,2) not aligned: 3 (dim 1) != 2 (dim 0)
My complete code is as follows. Any ideas on why the dimension incompatibility might be arising from?
`import numpy as np
class PLSRegression:
def __init__(self, n_components=2):
self.n_components = n_components
self.x_scores_ = None
self.y_scores_ = None
self.x_weights_ = None
self.y_weights_ = None
self.x_loadings_ = None
self.y_loadings_ = None
self.coef_ = None
def fit(self, X, Y):
n = X.shape[0]
d = X.shape[1]
t = Y.shape[1]
self.x_scores_ = np.zeros((n, self.n_components))
self.y_scores_ = np.zeros((n, self.n_components))
self.x_weights_ = np.zeros((d, self.n_components))
self.y_weights_ = np.zeros((t, self.n_components))
self.x_loadings_ = np.zeros((d, self.n_components))
self.y_loadings_ = np.zeros((t, self.n_components))
Xk = X - X.mean(axis=0)
Yk = Y - Y.mean(axis=0)
for k in range(self.n_components):
# Step a)
C = np.dot(Xk.T, Yk)
U, S, Vt = np.linalg.svd(C, full_matrices=False)
uk = U[:, 0]
vk = Vt.T[:, 0]
# Step b)
x_scores = np.dot(Xk, uk)
y_scores = np.dot(Yk, vk)
self.x_scores_[:, k] = x_scores
self.y_scores_[:, k] = y_scores
# Step c)
x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
y_loadings = np.dot(Yk.T, y_scores) / np.dot(y_scores.T, y_scores)
self.x_loadings_[:, k] = x_loadings
self.y_loadings_[:, k] = y_loadings
# Step d)
Xk -= np.outer(x_scores, x_loadings)
Yk -= np.outer(y_scores, y_loadings)
# Calculate the weights matrix (for X)
self.x_weights_ = np.dot(np.linalg.pinv(np.dot(self.x_scores_.T, self.x_scores_)), np.dot(self.x_scores_.T, X))
# Calculate the weights matrix (for Y)
self.y_weights_ = np.dot(np.linalg.pinv(np.dot(self.y_scores_.T, self.y_scores_)), np.dot(self.y_scores_.T, Y))
# Calculate the coefficient matrix
self.coef_ = np.dot(self.x_weights_, self.y_weights_.T)
def predict(self, X):
X_centered = X - X.mean(axis=0)
return np.dot(X_centered, self.coef_)
# Example usage:
X = np.array([[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [3., 5., 4.]])
Y = np.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]])
pls = PLSRegression(n_components=2)
pls.fit(X, Y)
print(pls.predict(X))`