*Usig FAISS Vectorstore + Langchain*
```python
!pip install langchain
!pip install InstructorEmbedding
!pip install sentence_transformers
!pip install faiss-gpu
!pip install openai
!pip install tqdm
```
```python
from google.colab import drive
drive.mount('/content/drive/')
```
```python
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import MarkdownTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from tqdm import tqdm
data_dirpath = '/content/drive/My Drive/inputtext'
embedding_cache_folder = '/content/drive/My Drive/embedding'
index_save_directory = '/content/drive/My Drive/faiss'
loader = DirectoryLoader (path=data_dirpath, glob="*.txt", loader_cls=TextLoader)
essays = loader.load()
text_splitter = MarkdownTextSplitter(chunk_size=1500, chunk_overlap=100)
documents = text_splitter.split_documents(essays)
documents_with_progress = tqdm(documents, total=len(documents))
print(len(documents_with_progress))
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cuda"}, cache_folder=embedding_cache_folder,)
vectorstore = FAISS.from_documents(documents_with_progress, embeddings)
vectorstore.save_local(index_save_directory)
```
```python
import os
from langchain import PromptTemplate, OpenAI
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
os.environ["OPENAI_API_KEY"] = 'YOUR-API-KEY'
embedding_cache_folder = '/content/drive/My Drive/embedding'
index_save_directory = '/content/drive/My Drive/faiss'
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cuda"}, cache_folder=embedding_cache_folder,)
vectorstore = FAISS.load_local(index_save_directory, embeddings)
llm = OpenAI(temperature=0)
prompt_template = PromptTemplate(
input_variables=['context', 'question'],
template="Use the following pieces of context to answer the question at the end. If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.\n\n{context}In\nQuestion: {question} \nHelpful Answer:"
)
qa_chain = RetrievalQA.from_llm(
llm=llm,
prompt=prompt_template,
retriever=vectorstore.as_retriever()
)
query = "Can you give me an example how how to write to a DWN?"
print(qa_chain(query, return_only_outputs=True))
```