```
from azure.storage.blob import BlobServiceClient
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.document_loaders import AzureBlobStorageContainerLoader
from langchain.text_splitter import CharacterTextSplitter
# Function to upload data to Azure Blob Storage
def upload_to_blob_storage(connection_string, container_name, directory_path):
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
for root, dirs, files in os.walk(directory_path):
for file in files:
file_path = os.path.join(root, file)
blob_name = os.path.relpath(file_path, directory_path)
blob_client = blob_service_client.get_blob_client(
container=container_name, blob=blob_name
)
with open(file_path, "rb") as data:
blob_client.upload_blob(data)
print(f"File {file_path} successfully uploaded to {blob_name} in Blob Storage!")
# Function to create vectors from data in Azure Blob Storage
def create_vectors(connection_string, container_name, vector_store_address, model):
embeddings = OpenAIEmbeddings(deployment=model, chunk_size=1)
index_name = "langchain-vector-demo"
vector_store = AzureSearch(
azure_search_endpoint=vector_store_address,
azure_search_key=os.environ.get("AZURE_COGNITIVE_SEARCH_API_KEY"),
index_name=index_name,
embedding_function=embeddings.embed_query,
)
loader = AzureBlobStorageContainerLoader(
conn_str=connection_string,
container=container_name,
)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=150, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
vector_store.add_documents(documents=docs)
print("Data loaded into vector store successfully")
# Define Azure Blob Storage and Cognitive Search details
connection_string = "YourBlobConnectionString"
container_name = "YourBlobContainer"
directory_path = "YourDataDirectory"
vector_store_address = f"https://${os.environ.get('AZURE_COGNITIVE_SEARCH_SERVICE_NAME')}.search.windows.net"
model = "text-embedding-ada-002"
# Upload data to Azure Blob Storage
upload_to_blob_storage(connection_string, container_name, directory_path)
# Create vectors from the uploaded data
create_vectors(connection_string, container_name, vector_store_address, model)
```