PLS in Python from scratch - matrix dimension problem

46 views Asked by At

I am attempting to write the code for PLS from scratch without using the scikit function. I obtain the following dimension problem that arises

from the coefficient computation ---> 56 self.coef_ = np.dot(self.x_weights_, self.y_weights_.T)

....

ValueError: shapes (2,3) and (2,2) not aligned: 3 (dim 1) != 2 (dim 0)

My complete code is as follows. Any ideas on why the dimension incompatibility might be arising from?

`import numpy as np

class PLSRegression:
def __init__(self, n_components=2):
    self.n_components = n_components
    self.x_scores_ = None
    self.y_scores_ = None
    self.x_weights_ = None
    self.y_weights_ = None
    self.x_loadings_ = None
    self.y_loadings_ = None
    self.coef_ = None

def fit(self, X, Y):
    n = X.shape[0]
    d = X.shape[1]
    t = Y.shape[1]
    self.x_scores_ = np.zeros((n, self.n_components))
    self.y_scores_ = np.zeros((n, self.n_components))
    self.x_weights_ = np.zeros((d, self.n_components))
    self.y_weights_ = np.zeros((t, self.n_components))
    self.x_loadings_ = np.zeros((d, self.n_components))
    self.y_loadings_ = np.zeros((t, self.n_components))

    Xk = X - X.mean(axis=0)
    Yk = Y - Y.mean(axis=0)

    for k in range(self.n_components):
        # Step a)
        C = np.dot(Xk.T, Yk)
        U, S, Vt = np.linalg.svd(C, full_matrices=False)
        uk = U[:, 0]
        vk = Vt.T[:, 0]

        # Step b)
        x_scores = np.dot(Xk, uk)
        y_scores = np.dot(Yk, vk)
        self.x_scores_[:, k] = x_scores
        self.y_scores_[:, k] = y_scores

        # Step c)
        x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
        y_loadings = np.dot(Yk.T, y_scores) / np.dot(y_scores.T, y_scores)
        self.x_loadings_[:, k] = x_loadings
        self.y_loadings_[:, k] = y_loadings

        # Step d)
        Xk -= np.outer(x_scores, x_loadings)
        Yk -= np.outer(y_scores, y_loadings)

    # Calculate the weights matrix (for X)
    self.x_weights_ = np.dot(np.linalg.pinv(np.dot(self.x_scores_.T, self.x_scores_)), np.dot(self.x_scores_.T, X))
    # Calculate the weights matrix (for Y)
    self.y_weights_ = np.dot(np.linalg.pinv(np.dot(self.y_scores_.T, self.y_scores_)), np.dot(self.y_scores_.T, Y))
    # Calculate the coefficient matrix
    self.coef_ = np.dot(self.x_weights_, self.y_weights_.T)

def predict(self, X):
    X_centered = X - X.mean(axis=0)
    return np.dot(X_centered, self.coef_)

# Example usage:
X = np.array([[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [3., 5., 4.]])
Y = np.array([[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]])
pls = PLSRegression(n_components=2)
pls.fit(X, Y)
print(pls.predict(X))`
0

There are 0 answers