Seth McKnight
Add memory diagnostics endpoints and logging enhancements (#80)
0a7f9b4
import os
import sys
# Ensure project root and src are on sys.path for tests
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
if SRC_PATH not in sys.path:
sys.path.insert(0, SRC_PATH)
# Set environment variables to disable ChromaDB telemetry
os.environ["ANONYMIZED_TELEMETRY"] = "False"
os.environ["CHROMA_TELEMETRY"] = "False"
from typing import List, Optional # noqa: E402
from unittest.mock import MagicMock, patch # noqa: E402
import pytest # noqa: E402
from app import app as flask_app # noqa: E402
@pytest.fixture(scope="session", autouse=True)
def disable_chromadb_telemetry():
"""Disable ChromaDB telemetry to avoid errors in tests"""
patches = []
try:
# Patch multiple telemetry-related functions
patches.extend(
[
patch(
"chromadb.telemetry.product.posthog.capture",
return_value=None,
),
patch(
"chromadb.telemetry.product.posthog.Posthog.capture",
return_value=None,
),
patch(
"chromadb.telemetry.product.posthog.Posthog",
return_value=MagicMock(),
),
patch("chromadb.configure", return_value=None),
]
)
for p in patches:
p.start()
yield
except (ImportError, AttributeError):
# If modules don't exist, continue without patching
yield
finally:
for p in patches:
try:
p.stop()
except Exception:
pass
@pytest.fixture
def app():
"""Flask application fixture."""
# Clear any cached services before each test to prevent state contamination
flask_app.config["RAG_PIPELINE"] = None
flask_app.config["INGESTION_PIPELINE"] = None
flask_app.config["SEARCH_SERVICE"] = None
# Also clear any module-level caches that might exist
import sys
modules_to_clear = [
"src.rag.rag_pipeline",
"src.llm.llm_service",
"src.search.search_service",
"src.embedding.embedding_service",
"src.vector_store.vector_db",
]
for module_name in modules_to_clear:
if module_name in sys.modules:
# Clear any cached instances on the module
module = sys.modules[module_name]
for attr_name in dir(module):
attr = getattr(module, attr_name)
if hasattr(attr, "__dict__") and not attr_name.startswith("_"):
# Clear instance dictionaries that might contain cached data
if hasattr(attr, "_instances"):
attr._instances = {}
yield flask_app
@pytest.fixture
def client(app):
"""Flask test client fixture."""
return app.test_client()
@pytest.fixture(autouse=True)
def reset_mock_state():
"""Fixture to reset any global mock state between tests."""
yield
# Clean up any lingering mock state after each test
import unittest.mock
# Clear any patches that might have been left hanging
unittest.mock.patch.stopall()
class FakeEmbeddingService:
"""A mock embedding service that returns dummy data without loading a real model."""
def __init__(
self,
model_name: Optional[str] = None,
device: Optional[str] = None,
batch_size: Optional[int] = None,
):
"""Initializes the fake service.
Ignores parameters and provides sensible defaults.
"""
self.model_name = model_name or "all-MiniLM-L6-v2"
self.device = device or "cpu"
self.batch_size = batch_size or 32
self.dim = 384 # Standard dimension for the model we are faking
def embed_text(self, text: str):
"""Returns a dummy embedding for a single text."""
return [0.1] * self.dim
def embed_texts(self, texts: List[str]):
"""Returns a list of dummy embeddings for multiple texts."""
return [[0.1] * self.dim for _ in texts]
def get_embedding_dimension(self):
"""Returns the fixed dimension of the dummy embeddings."""
return self.dim
@pytest.fixture(autouse=True)
def mock_embedding_service(monkeypatch):
"""
Automatically replace the real EmbeddingService with the fake one.
This fixture will be used for all tests and speeds them up by avoiding
loading a real model.
"""
monkeypatch.setattr(
"src.embedding.embedding_service.EmbeddingService",
FakeEmbeddingService,
)
monkeypatch.setattr(
"src.ingestion.ingestion_pipeline.EmbeddingService",
FakeEmbeddingService,
)
monkeypatch.setattr(
"src.search.search_service.EmbeddingService",
FakeEmbeddingService,
)