Neighbors indexing error: self._check_indexing_error(key) KeyError: 8

42 views Asked by At

I am creating a clothes recommendation system, with NearestNeighbors, the data is coming from 2 datasets one with ratings.csv in this case 0 and 1 based on saved to wishlist or not and clothes.csv with all the clothes, I want to passed the ID of a garment and get a list of recommended items, but I am getting an index error. Here is the code:

user_ratings_df = pd.read_csv("ratings.csv")

user_ratings_df['IDGARMENT'] = user_ratings_df['IDGARMENT'].astype(int)

# read in data; use default pd.RangeIndex, i.e. 0, 1, 2, etc., as columns
clothes_desc = pd.read_csv("clothes.csv", on_bad_lines='skip')
clothes_metadata = clothes_desc[['IDGARMENT', 'DESCRIPTION','CATEGORY', 'BRAND', 'PRICE']]

clothes_metadata['IDGARMENT'] = clothes_metadata['IDGARMENT'].astype(int)
clothes_data = user_ratings_df.merge(clothes_metadata, on='IDGARMENT')

user_item_matrix = user_ratings_df.pivot(index=['USERID'], columns=['IDGARMENT'], values='RATING').fillna(0)
user_item_matrix

# Define a KNN model on cosine similarity
cf_knn_model= NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)
#lr.fit(x.reshape(-1, 1), y)

# Fitting the model on our matrix
cf_knn_model.fit(user_item_matrix)


def clothes_recommender_engine(garment_id, matrix, cf_model, n_recs):
    # Fit model on matrix
    cf_knn_model.fit(matrix)
    
    # Calculate neighbour distances
    distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)
    clothes_rec_ids = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
    
    # List to store recommendations
    cf_recs = []
    for i in clothes_rec_ids:
        cf_recs.append({'Desc':clothes_desc['DESCRIPTION'][i[0]],'Distance':i[1]})
    
    # Select top number of recommendations needed
    df = pd.DataFrame(cf_recs, index = range(1,n_recs))
    return df


n_recs = 10
clothes_recommender_engine(54448, user_item_matrix, cf_knn_model, n_recs)

And the error I am getting is:

> *keyError                                  Traceback (most recent call last) File
> ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3802,
> in Index.get_loc(self, key, method, tolerance)    3801 try:
> -> 3802     return self._engine.get_loc(casted_key)    3803 except KeyError as err: File
> ~/anaconda3/lib/python3.11/site-packages/pandas/_libs/index.pyx:138,
> in pandas._libs.index.IndexEngine.get_loc() File
> ~/anaconda3/lib/python3.11/site-packages/pandas/_libs/index.pyx:165,
> in pandas._libs.index.IndexEngine.get_loc() File
> pandas/_libs/hashtable_class_helper.pxi:2263, in
> pandas._libs.hashtable.Int64HashTable.get_item() File
> pandas/_libs/hashtable_class_helper.pxi:2273, in
> pandas._libs.hashtable.Int64HashTable.get_item() KeyError: 54448 The
> above exception was the direct cause of the following exception:
> KeyError                                  Traceback (most recent call
> last) Cell In[4], line 64
>      59     return df
>      63 n_recs = 10
> ---> 64 clothes_recommender_engine(54448, user_item_matrix, cf_knn_model, n_recs) Cell In[4], line 48, in
> clothes_recommender_engine(garment_id, matrix, cf_model, n_recs)
>      42 cf_knn_model.fit(matrix)
>      44 # Extract input movie ID
>      45 #garment_id = process.extractOne(category, clothes_desc['CATEGORY'])[2]
>      46 
>      47 # Calculate neighbour distances
> ---> 48 distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)
>      49 clothes_rec_ids = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda
> x: x[1])[:0:-1]
>      51 # List to store recommendations File ~/anaconda3/lib/python3.11/site-packages/pandas/core/frame.py:3807, in
> DataFrame.__getitem__(self, key)    3805 if self.columns.nlevels > 1: 
> 3806     return self._getitem_multilevel(key)
> -> 3807 indexer = self.columns.get_loc(key)    3808 if is_integer(indexer):    3809     indexer = [indexer] File
> ~/anaconda3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3804,
> in Index.get_loc(self, key, method, tolerance)    3802     return
> self._engine.get_loc(casted_key)    3803 except KeyError as err:
> -> 3804     raise KeyError(key) from err    3805 except TypeError:    3806     # If we have a listlike key, _check_indexing_error will raise
> 3807     #  InvalidIndexError. Otherwise we fall through and re-raise 
> 3808     #  the TypeError.    3809     self._check_indexing_error(key)
> KeyError: 54448*

The error seems to be in this line:

distances, indices = cf_model.kneighbors(matrix[garment_id], n_neighbors=n_recs)

when passing matrix[garment_id], any idea how to solve it?

0

There are 0 answers