I'm trying to create an embedding vector database with some .txt documents in my local folder. In particular I'm following this tutorial from the official page of LangChain: LangChain - Azure Cognitive Search and Azure OpenAI. I have followed all the steps of the tutorial and this is my Python script:

# From https://python.langchain.com/docs/integrations/vectorstores/azuresearch


import openai
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch


os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "https://xxxxxx.openai.azure.com"
os.environ["OPENAI_API_KEY"] = "xxxxxxxxx"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"

model: str = "text-embedding-ada-002"


vector_store_address: str = "https://xxxxxxx.search.windows.net"
vector_store_password: str = "xxxxxxx"



embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=model, chunk_size=1)
index_name: str = "cognitive-search-openai-exercise-index"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)


from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader("C:/Users/xxxxxxxx/azure_openai_cognitive_search_exercise/data/qna/a.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)




# Perform a similarity search
docs = vector_store.similarity_search(
    query="Who is Pippo Franco?",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

Now, when I run the script I get the following error:

vector_search_configuration is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
algorithm_configurations is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.VectorSearch'> and will be ignored
Traceback (most recent call last):
  File "C:\Users\xxxxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 105, in _get_search_client
    index_client.get_index(name=index_name)
  File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxxx\KYF\venv\Lib\site-packages\azure\search\documents\indexes\_search_index_client.py", line 145, in get_index
    result = self._client.indexes.get(name, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxx\KYF\venv\Lib\site-packages\azure\search\documents\indexes\_generated\operations\_indexes_operations.py", 
line 864, in get
    map_error(status_code=response.status_code, response=response, error_map=error_map)
  File "C:\Users\xxxxxxxx\venv\Lib\site-packages\azure\core\exceptions.py", line 165, in map_error
    raise error
azure.core.exceptions.ResourceNotFoundError: () No index with the name 'cognitive-search-openai-exercise-index' was found in the service 'cognitive-search-openai-exercise'.
Code:
Message: No index with the name 'cognitive-search-openai-exercise-index' was found in the service 'cognitive-search-openai-exercise'.      

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\xxxxxxx\venv\azure_openai_cognitive_search_exercise\test.py", line 25, in <module>
    vector_store: AzureSearch = AzureSearch(
                                ^^^^^^^^^^^^
  File "C:\Users\xxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 237, in __init__
    self.client = _get_search_client(
                  ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 172, in _get_search_client       
    index_client.create_index(index)
  File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\search\documents\indexes\_search_index_client.py", line 220, in create_index
    result = self._client.indexes.create(patched_index, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\xxxxxx\venv\Lib\site-packages\azure\search\documents\indexes\_generated\operations\_indexes_operations.py", 
line 402, in create
    raise HttpResponseError(response=response, model=error)
azure.core.exceptions.HttpResponseError: (InvalidRequestParameter) The request is invalid. Details: definition : The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set.
Code: InvalidRequestParameter
Message: The request is invalid. Details: definition : The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set.
Exception Details:      (InvalidField) The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition
        Code: InvalidField
        Message: The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition

I have created an index manually from the Azure Cognitive Search Console, but I don't think this is the correct approach, as the script should automatically create a new index.

1

There are 1 answers

0
Farzzy - MSFT On BEST ANSWER

Please use pip install azure-search-documents==11.4.0b8 to ensure you are using the azure cognitive search python SDK compatible with LangChain.