Subject Matter Expert ChatBot

*Usig FAISS Vectorstore + Langchain* ```python !pip install langchain !pip install InstructorEmbedding !pip install sentence_transformers !pip install faiss-gpu !pip install openai !pip install tqdm ``` ```python from google.colab import drive drive.mount('/content/drive/') ``` ```python from langchain.document_loaders import DirectoryLoader, TextLoader from langchain.text_splitter import MarkdownTextSplitter from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.vectorstores import FAISS from tqdm import tqdm data_dirpath = '/content/drive/My Drive/inputtext' embedding_cache_folder = '/content/drive/My Drive/embedding' index_save_directory = '/content/drive/My Drive/faiss' loader = DirectoryLoader (path=data_dirpath, glob="*.txt", loader_cls=TextLoader) essays = loader.load() text_splitter = MarkdownTextSplitter(chunk_size=1500, chunk_overlap=100) documents = text_splitter.split_documents(essays) documents_with_progress = tqdm(documents, total=len(documents)) print(len(documents_with_progress)) embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cuda"}, cache_folder=embedding_cache_folder,) vectorstore = FAISS.from_documents(documents_with_progress, embeddings) vectorstore.save_local(index_save_directory) ``` ```python import os from langchain import PromptTemplate, OpenAI from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA os.environ["OPENAI_API_KEY"] = 'YOUR-API-KEY' embedding_cache_folder = '/content/drive/My Drive/embedding' index_save_directory = '/content/drive/My Drive/faiss' embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cuda"}, cache_folder=embedding_cache_folder,) vectorstore = FAISS.load_local(index_save_directory, embeddings) llm = OpenAI(temperature=0) prompt_template = PromptTemplate( input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.\n\n{context}In\nQuestion: {question} \nHelpful Answer:" ) qa_chain = RetrievalQA.from_llm( llm=llm, prompt=prompt_template, retriever=vectorstore.as_retriever() ) query = "Can you give me an example how how to write to a DWN?" print(qa_chain(query, return_only_outputs=True)) ```