PGvector Django SQL query not registering vector datatype

226 views Asked by At

I am using PGvector with Django. While I usually leverage the PGvector ORM functionality, this usecase requires a complex query which I am running in raw sql as shown below.

from .embed_utils import oaiembed
from .models import Embedding
from pgvector.django import HnswIndex
from django.db import connection

# TODO add performance metrics from time library
test_vector = [0.1] * 1536


def queryModel(k, query):
    response = Embedding.objects.order_by(HnswIndex('embedding', query))[:k]
    print('response: ', response)
    return response



def djankQueryModel(qvector, k, project_id):
    '''
    Query the database for the k nearest neighbors to a given vector

    Parameters
    ----------
    ref_vector : list
        The vector to query for nearest neighbors
    k : int
        The number of nearest neighbors to return
    project_id : str
        The project ID to filter the query by

    Returns
    -------
    rows : list
        A list of tuples containing the product data and embedding data for the k nearest neighbors
    '''


    print(type(qvector))

    raw_query = '''
    SELECT e.*, p.*
    FROM embeddings e
    JOIN products p ON e.product_id = p.product_id
    WHERE e.embedding_id IN (
        SELECT e1.embedding_id
        FROM embeddings e1
        JOIN (
            SELECT product_id, MIN(embedding <#> %s) as min_distance
            FROM embeddings
            GROUP BY product_id
            ORDER BY min_distance ASC
            LIMIT %s
        ) as unique_products ON e1.product_id = unique_products.product_id
    )
    ORDER BY e.embedding <-> %s
    LIMIT %s;
    '''

    with connection.cursor() as cursor:
        cursor.execute(raw_query, [qvector, k, qvector, k])
        rows = cursor.fetchall()

    # Convert rows to your model instances or process as needed
    return rows

def vectorQuery(k, query, project_id):
    '''
    Query the database for the k nearest neighbors to a given vector

    Parameters
    ----------
    k : int
        The number of nearest neighbors to return
    query : str
        The query to embed and find nearest neighbors for
    project_id : str
        The project ID to filter the query by

    Returns
    -------
    results : list
        A list of tuples containing the product data and embedding data for the k nearest neighbors
    '''
    print("{'message':'Executing query transaction}")
    embedding = oaiembed(query)
    results = djankQueryModel(embedding, k, project_id)
    print("{'message':'Query transaction complete'}")
    return results

I get this output when running django, where pgvector does not seem to recognize a list. I have tried converting to a nparray, and explicitly defining a list.

enter image description here

I have tried converting the vector to a NP array, which is not recognized.

I have run the raw SQL query on my Postgres console successfully

Stack Supabase Postgres: 15.1.0.133 PGvector Postgres: 0.5.1: Running a HNSW index Django: 4.2.7 OpenAI text-embedding-ada-002: 1536 dim

0

There are 0 answers