Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Memory optimization and database initialization script for Render deployment. | |
| """ | |
| import logging | |
| import os | |
| import sys | |
| from src.utils.memory_utils import clean_memory, log_memory_usage | |
| # Add src to path | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src")) | |
| def initialize_vector_store(): | |
| """Initialize vector store with memory management.""" | |
| from src.config import ( | |
| COLLECTION_NAME, | |
| CORPUS_DIRECTORY, | |
| DEFAULT_CHUNK_SIZE, | |
| DEFAULT_OVERLAP, | |
| EMBEDDING_DIMENSION, | |
| RANDOM_SEED, | |
| VECTOR_DB_PERSIST_PATH, | |
| ) | |
| from src.ingestion.ingestion_pipeline import IngestionPipeline | |
| from src.vector_store.vector_db import VectorDatabase | |
| log_memory_usage("Vector store initialization start") | |
| try: | |
| # Initialize vector database to check its state | |
| vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME) | |
| # Check if embeddings exist and have correct dimension | |
| if not vector_db.has_valid_embeddings(EMBEDDING_DIMENSION): | |
| logging.info("Vector store needs initialization - running ingestion") | |
| # Clean memory before starting ingestion | |
| clean_memory("Before ingestion") | |
| # Run ingestion pipeline to rebuild embeddings | |
| ingestion_pipeline = IngestionPipeline( | |
| chunk_size=DEFAULT_CHUNK_SIZE, | |
| overlap=DEFAULT_OVERLAP, | |
| seed=RANDOM_SEED, | |
| store_embeddings=True, | |
| ) | |
| # Process the corpus directory | |
| results = ingestion_pipeline.process_directory(CORPUS_DIRECTORY) | |
| if not results or len(results) == 0: | |
| logging.error("Ingestion failed or processed 0 chunks") | |
| return False | |
| else: | |
| logging.info(f"Ingestion completed: {len(results)} chunks processed") | |
| clean_memory("After ingestion") | |
| else: | |
| logging.info( | |
| f"Vector store is valid with {vector_db.get_count()} embeddings " | |
| f"of dimension {vector_db.get_embedding_dimension()}" | |
| ) | |
| log_memory_usage("Vector store initialization complete") | |
| return True | |
| except Exception as e: | |
| logging.error(f"Vector store initialization failed: {e}") | |
| return False | |
| def main(): | |
| """Main initialization function.""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| ) | |
| log_memory_usage("Script start") | |
| # Clean memory at start | |
| clean_memory("Script startup") | |
| # Initialize vector store | |
| success = initialize_vector_store() | |
| if success: | |
| logging.info("Memory optimization and initialization completed successfully") | |
| log_memory_usage("Script end") | |
| return 0 | |
| else: | |
| logging.error("Initialization failed") | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |