Using Langchain and Pinecone, to make the model answer questions just on the reference dpocuments uploaded. Getting correct results when asked sample questions in the code itself, but when udeploying it using streamlit getting the error "Forbidden Exception 403 HTTP Response body: Project in subdomain didn't match API keys project"
# Installing required packages:
# pip install --upgrade langchain openai
# pip install unstructured
# pip install unstructured[local-inference]
# apt-get install poppler-utils
# pip install pinecone-client
# pip install tiktoken
# pip install streamlit
# Importing necessary libraries:
import os
import openai
import pinecone
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
os.environ["OPENAI_API_KEY"] = "xxxxx"
# Loading documents
directory = r'D:\GenAI\Upload'
def load_docs(directory):
loader = DirectoryLoader(directory)
documents = loader.load()
return documents
documents = load_docs(directory)
len(documents)
# Splitting documents
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(documents)
return docs
docs = split_docs(documents)
# print(len(docs))
# Embedding documents with OpenAI
embeddings = OpenAIEmbeddings(model_name="ada")
query_result = embeddings.embed_query("Hello world")
len(query_result)
# Vector search with Pinecone
pinecone.init(
api_key="xxxxxx",
environment="gcp-starter"
)
index_name = "temp-dba"
index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
# Finding similar documents
def get_similar_docs(query, k=2, score=False):
if score:
similar_docs = index.similarity_search_with_score(query, k=k)
else:
similar_docs = index.similarity_search(query, k=k)
return similar_docs
# Question answering using LangChain and OpenAI LLM
model_name = "text-davinci-003"
# model_name = "gpt-3.5-turbo"
llm = OpenAI(model_name=model_name)
chain = load_qa_chain(llm, chain_type="stuff")
def get_answer(query):
similar_docs = get_similar_docs(query)
answer = chain.run(input_documents=similar_docs, question=query)
return answer
### STREAMLIT ###
# Get the user query
def get_text():
input_text = st.text_input("You:",key="input")
return input_text
st.set_page_config(page_title="DBA Buddy", page_icon=":robot:")
st.header("Your DBA Buddy powered by GenAI :)")
query = get_text()
answer = get_answer(query)
# Generate Result
submit=st.button('Generate')
if submit:
st.subheader("Answer :")
st.write(answer)
Tried changing API keys, but no help. Since, API keys are already valid, working fine for example questions asked in code itself. Error only when generating the response for query on streamlit