import os import sys # Ensure project root and src are on sys.path for tests PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) SRC_PATH = os.path.join(PROJECT_ROOT, "src") if PROJECT_ROOT not in sys.path: sys.path.insert(0, PROJECT_ROOT) if SRC_PATH not in sys.path: sys.path.insert(0, SRC_PATH) # Set environment variables to disable ChromaDB telemetry os.environ["ANONYMIZED_TELEMETRY"] = "False" os.environ["CHROMA_TELEMETRY"] = "False" from typing import List, Optional # noqa: E402 from unittest.mock import MagicMock, patch # noqa: E402 import pytest # noqa: E402 from app import app as flask_app # noqa: E402 @pytest.fixture(scope="session", autouse=True) def disable_chromadb_telemetry(): """Disable ChromaDB telemetry to avoid errors in tests""" patches = [] try: # Patch multiple telemetry-related functions patches.extend( [ patch( "chromadb.telemetry.product.posthog.capture", return_value=None, ), patch( "chromadb.telemetry.product.posthog.Posthog.capture", return_value=None, ), patch( "chromadb.telemetry.product.posthog.Posthog", return_value=MagicMock(), ), patch("chromadb.configure", return_value=None), ] ) for p in patches: p.start() yield except (ImportError, AttributeError): # If modules don't exist, continue without patching yield finally: for p in patches: try: p.stop() except Exception: pass @pytest.fixture def app(): """Flask application fixture.""" # Clear any cached services before each test to prevent state contamination flask_app.config["RAG_PIPELINE"] = None flask_app.config["INGESTION_PIPELINE"] = None flask_app.config["SEARCH_SERVICE"] = None # Also clear any module-level caches that might exist import sys modules_to_clear = [ "src.rag.rag_pipeline", "src.llm.llm_service", "src.search.search_service", "src.embedding.embedding_service", "src.vector_store.vector_db", ] for module_name in modules_to_clear: if module_name in sys.modules: # Clear any cached instances on the module module = sys.modules[module_name] for attr_name in dir(module): attr = getattr(module, attr_name) if hasattr(attr, "__dict__") and not attr_name.startswith("_"): # Clear instance dictionaries that might contain cached data if hasattr(attr, "_instances"): attr._instances = {} yield flask_app @pytest.fixture def client(app): """Flask test client fixture.""" return app.test_client() @pytest.fixture(autouse=True) def reset_mock_state(): """Fixture to reset any global mock state between tests.""" yield # Clean up any lingering mock state after each test import unittest.mock # Clear any patches that might have been left hanging unittest.mock.patch.stopall() class FakeEmbeddingService: """A mock embedding service that returns dummy data without loading a real model.""" def __init__( self, model_name: Optional[str] = None, device: Optional[str] = None, batch_size: Optional[int] = None, ): """Initializes the fake service. Ignores parameters and provides sensible defaults. """ self.model_name = model_name or "all-MiniLM-L6-v2" self.device = device or "cpu" self.batch_size = batch_size or 32 self.dim = 384 # Standard dimension for the model we are faking def embed_text(self, text: str): """Returns a dummy embedding for a single text.""" return [0.1] * self.dim def embed_texts(self, texts: List[str]): """Returns a list of dummy embeddings for multiple texts.""" return [[0.1] * self.dim for _ in texts] def get_embedding_dimension(self): """Returns the fixed dimension of the dummy embeddings.""" return self.dim @pytest.fixture(autouse=True) def mock_embedding_service(monkeypatch): """ Automatically replace the real EmbeddingService with the fake one. This fixture will be used for all tests and speeds them up by avoiding loading a real model. """ monkeypatch.setattr( "src.embedding.embedding_service.EmbeddingService", FakeEmbeddingService, ) monkeypatch.setattr( "src.ingestion.ingestion_pipeline.EmbeddingService", FakeEmbeddingService, ) monkeypatch.setattr( "src.search.search_service.EmbeddingService", FakeEmbeddingService, )