I am using PGvector with Django. While I usually leverage the PGvector ORM functionality, this usecase requires a complex query which I am running in raw sql as shown below.
from .embed_utils import oaiembed
from .models import Embedding
from pgvector.django import HnswIndex
from django.db import connection
# TODO add performance metrics from time library
test_vector = [0.1] * 1536
def queryModel(k, query):
response = Embedding.objects.order_by(HnswIndex('embedding', query))[:k]
print('response: ', response)
return response
def djankQueryModel(qvector, k, project_id):
'''
Query the database for the k nearest neighbors to a given vector
Parameters
----------
ref_vector : list
The vector to query for nearest neighbors
k : int
The number of nearest neighbors to return
project_id : str
The project ID to filter the query by
Returns
-------
rows : list
A list of tuples containing the product data and embedding data for the k nearest neighbors
'''
print(type(qvector))
raw_query = '''
SELECT e.*, p.*
FROM embeddings e
JOIN products p ON e.product_id = p.product_id
WHERE e.embedding_id IN (
SELECT e1.embedding_id
FROM embeddings e1
JOIN (
SELECT product_id, MIN(embedding <#> %s) as min_distance
FROM embeddings
GROUP BY product_id
ORDER BY min_distance ASC
LIMIT %s
) as unique_products ON e1.product_id = unique_products.product_id
)
ORDER BY e.embedding <-> %s
LIMIT %s;
'''
with connection.cursor() as cursor:
cursor.execute(raw_query, [qvector, k, qvector, k])
rows = cursor.fetchall()
# Convert rows to your model instances or process as needed
return rows
def vectorQuery(k, query, project_id):
'''
Query the database for the k nearest neighbors to a given vector
Parameters
----------
k : int
The number of nearest neighbors to return
query : str
The query to embed and find nearest neighbors for
project_id : str
The project ID to filter the query by
Returns
-------
results : list
A list of tuples containing the product data and embedding data for the k nearest neighbors
'''
print("{'message':'Executing query transaction}")
embedding = oaiembed(query)
results = djankQueryModel(embedding, k, project_id)
print("{'message':'Query transaction complete'}")
return results
I get this output when running django, where pgvector does not seem to recognize a list. I have tried converting to a nparray, and explicitly defining a list.
I have tried converting the vector to a NP array, which is not recognized.
I have run the raw SQL query on my Postgres console successfully
Stack Supabase Postgres: 15.1.0.133 PGvector Postgres: 0.5.1: Running a HNSW index Django: 4.2.7 OpenAI text-embedding-ada-002: 1536 dim