I am getting following error in Jupyter Notebook
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 39
37 query = input("Enter your query: ")
38 print("Recommended contacts:")
---> 39 for contact in search_contacts(query):
40 print(contact)
Cell In[2], line 33, in search_contacts(query)
31 scores = {}
32 for contact, embedding in embeddings.items():
---> 33 scores[contact] = 1 - cosine(query_embedding, embedding)
34 return sorted(scores, key=scores.get, reverse=True)[:5]
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\spatial\distance.py:668, in cosine(u, v, w)
626 """
627 Compute the Cosine distance between 1-D arrays.
628
(...)
663
664 """
665 # cosine distance is also referred to as 'uncentered correlation',
666 # or 'reflective correlation'
667 # clamp the result to 0-2
--> 668 return max(0, min(correlation(u, v, w=w, centered=False), 2.0))
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\spatial\distance.py:608, in correlation(u, v, w, centered)
575 def correlation(u, v, w=None, centered=True):
576 """
577 Compute the correlation distance between two 1-D arrays.
578
(...)
606
607 """
--> 608 u = _validate_vector(u)
609 v = _validate_vector(v)
610 if w is not None:
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\spatial\distance.py:301, in _validate_vector(u, dtype)
299 if u.ndim == 1:
300 return u
--> 301 raise ValueError("Input vector should be 1-D.")
ValueError: Input vector should be 1-D.
Here is my Code
import pandas as pd
import openai
import numpy as np
from scipy.spatial.distance import cosine
# Authenticate to OpenAI
openai.api_key = "API_KEY"
# Load the CSV file
contacts = pd.read_csv("c:/tmp/connect.csv")
# Generate embeddings for each contact using GPT-3
embeddings = {}
for index, row in contacts.iterrows():
combined = row["Combined"]
response = openai.Completion.create(
model="text-davinci-002",
prompt=f"generate embeddings for {combined}",
temperature=0.5,
)
embedding = response["choices"][0]["text"]
embeddings[combined] = embedding
# Search function to return recommended contacts based on a user's query
def search_contacts(query):
query_embedding = openai.Completion.create(
model="text-davinci-002",
prompt=f"generate embeddings for {query}",
temperature=0.5,
)["choices"][0]["text"]
scores = {}
for contact, embedding in embeddings.items():
scores[contact] = 1 - cosine(query_embedding, embedding)
return sorted(scores, key=scores.get, reverse=True)[:5]
# Example usage
query = input("Enter your query: ")
print("Recommended contacts:")
for contact in search_contacts(query):
print(contact)
My connect.csv file looks like this:
Combined |
---|
FullName: Alex Goodwill; Company: HyperCap; Position: Business Consultant |
FullName: Amy Power; Company: Hollywood; Position: Strategy & Operations - CEO's Office |
Need help with figuring out how to fix this error. I did google search but was not able to find anything which can help me understand how I am passing non-1D array to the Cosine similarity search.
You are trying to calculate cosine similarity of text instead of vectors. Embedding is a vector representation of text that has a semantic meaning. You do not create embeddings by giving a prompt to completions endpoint. You need to use Embeddings endpoint.
response will contain embeddings of every phrase. For example:
So you can take the embeddings from the response and calculate the cosine similarity.