I am using the newest version of prince (0.7.1), but this seems to be an issue on previous versions as well.
I have a categorical dataframe named 'cat' that I am attempting to find an embedding for.
mca = prince.MCA(n_components=2)
mca.fit(cat)
When I apply the learned embedding to the original dataset, everything works as it should, but when I apply it to a dataset with less data:
x = mca.transform(cat.sample(10))
I get the following error:
ValueError Traceback (most recent call last)
<ipython-input-28-faee8865f560> in <module>
----> 1 x = mca.transform(cat.sample(10))
2 print(x)
3 plot(np.array(x))
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/prince/mca.py in transform(self, X)
48 if self.check_input:
49 utils.check_array(X, dtype=[str, np.number])
---> 50 return self.row_coordinates(X)
51
52 def plot_coordinates(self, X, ax=None, figsize=(6, 6), x_component=0, y_component=1,
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/prince/mca.py in row_coordinates(self, X)
36 if not isinstance(X, pd.DataFrame):
37 X = pd.DataFrame(X)
---> 38 return super().row_coordinates(pd.get_dummies(X))
39
40 def column_coordinates(self, X):
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/prince/ca.py in row_coordinates(self, X)
132
133 return pd.DataFrame(
--> 134 data=X @ sparse.diags(self.col_masses_.to_numpy() ** -0.5) @ self.V_.T,
135 index=row_names
136 )
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/scipy/sparse/base.py in __rmatmul__(self, other)
564 raise ValueError("Scalar operands are not allowed, "
565 "use '*' instead")
--> 566 return self.__rmul__(other)
567
568 ####################
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/scipy/sparse/base.py in __rmul__(self, other)
548 except AttributeError:
549 tr = np.asarray(other).transpose()
--> 550 return (self.transpose() * tr).transpose()
551
552 #######################
/usr/local/anaconda3/envs/upe-pipeline/lib/python3.8/site-packages/scipy/sparse/base.py in __mul__(self, other)
514
515 if other.shape[0] != self.shape[1]:
--> 516 raise ValueError('dimension mismatch')
517
518 result = self._mul_multivector(np.asarray(other))
ValueError: dimension mismatch