Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders.text import TextLoader | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from setup import * | |
| # Use a relative path: | |
| file = "Amazon_sagemaker_Faq.txt" # Assuming you have a data folder in your project | |
| loader = TextLoader(file_path=file) | |
| pages = [] | |
| for page in loader.load(): | |
| pages.append(page) | |
| docs = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50, | |
| add_start_index=True, | |
| separators=["\n", "\n\n"] | |
| ) | |
| all_splits = text_splitter.split_documents(docs) | |
| print(f"Split blog post into {len(all_splits)} sub-documents.") | |
| # Instead of Windows absolute path for persistence: | |
| # persist_directory = "D:\\Education\\AI\\AI-Agents\\Agentic-RAG" | |
| # Use a relative path: | |
| persist_directory = "./chroma_db" # This will create a chroma_db folder in your app's directory | |
| vector_store = Chroma.from_documents( | |
| documents=all_splits, | |
| collection_name='sagemaker-chroma', | |
| persist_directory=persist_directory, | |
| embedding=embeddings | |
| ) |