File size: 6,652 Bytes
500761d
 
e3e3a84
c4b28eb
e3e3a84
 
0a7f9b4
 
 
 
 
 
c4b28eb
e3e3a84
 
 
 
c4b28eb
e3e3a84
 
 
 
c4b28eb
e3e3a84
 
 
 
 
 
32e4125
 
 
 
 
 
 
 
 
 
 
 
 
e3e3a84
c4b28eb
0a7f9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3e3a84
 
 
 
 
 
ffa0f3d
 
 
 
 
7793bb6
ffa0f3d
7793bb6
ffa0f3d
 
500761d
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159faf0
500761d
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import json

import pytest

from app import app as flask_app

# TODO: Re-enable these tests after memory monitoring is stabilized
# Current issue: Memory monitoring endpoints may behave differently in CI environment
# pytestmark = pytest.mark.skip(
#     reason="Memory monitoring endpoints disabled in CI until stabilized"
# )


@pytest.fixture
def app():
    yield flask_app


@pytest.fixture
def client(app):
    return app.test_client()


def test_health_endpoint(client):
    """
    Tests the /health endpoint.
    """
    response = client.get("/health")
    assert response.status_code == 200

    # Check that required fields are present
    response_data = response.json
    assert "status" in response_data
    assert "memory_mb" in response_data
    assert "timestamp" in response_data

    # Check status is ok
    assert response_data["status"] == "ok"

    # Check memory_mb is a number >= 0
    assert isinstance(response_data["memory_mb"], (int, float))
    assert response_data["memory_mb"] >= 0


def test_memory_diagnostics_endpoint(client):
    """Test /memory/diagnostics basic response."""
    resp = client.get("/memory/diagnostics")
    assert resp.status_code == 200
    data = resp.get_json()
    assert data["status"] == "success"
    assert "memory" in data
    assert "summary" in data["memory"]
    assert "rss_mb" in data["memory"]["summary"]


def test_memory_diagnostics_with_top(client):
    """Test /memory/diagnostics with include_top param (should not error)."""
    resp = client.get("/memory/diagnostics?include_top=1&limit=3")
    assert resp.status_code == 200
    data = resp.get_json()
    assert data["status"] == "success"
    # top_allocations may or may not be present depending on tracemalloc flag,
    # just ensure no error
    assert "memory" in data


def test_memory_force_clean_endpoint(client):
    """Test POST /memory/force-clean returns summary."""
    resp = client.post("/memory/force-clean", json={"label": "test"})
    assert resp.status_code == 200
    data = resp.get_json()
    assert data["status"] == "success"
    assert data["label"] == "test"
    assert "summary" in data
    assert "rss_mb" in data["summary"] or "rss_mb" in data["summary"].get("summary", {})


def test_index_endpoint(client):
    """
    Tests the / endpoint.
    """
    response = client.get("/")
    assert response.status_code == 200


def test_ingest_endpoint_exists():
    """Test that the ingest endpoint is available"""
    from app import app

    client = app.test_client()
    response = client.post("/ingest")
    # Should not be 404 (not found)
    assert response.status_code != 404


class TestSearchEndpoint:
    """Test cases for the /search endpoint"""

    def test_search_endpoint_valid_request(self, client):
        """Test search endpoint with valid request"""
        request_data = {"query": "remote work policy", "top_k": 3, "threshold": 0.3}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 200
        data = response.get_json()

        assert data["status"] == "success"
        assert data["query"] == "remote work policy"
        assert "results_count" in data
        assert "results" in data
        assert isinstance(data["results"], list)

    def test_search_endpoint_minimal_request(self, client):
        """Test search endpoint with minimal request (only query)"""
        request_data = {"query": "employee benefits"}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 200
        data = response.get_json()

        assert data["status"] == "success"
        assert data["query"] == "employee benefits"

    def test_search_endpoint_missing_query(self, client):
        """Test search endpoint with missing query parameter"""
        request_data = {"top_k": 5}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 400
        data = response.get_json()

        assert data["status"] == "error"
        assert "Query parameter is required" in data["message"]

    def test_search_endpoint_empty_query(self, client):
        """Test search endpoint with empty query"""
        request_data = {"query": ""}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 400
        data = response.get_json()

        assert data["status"] == "error"
        assert "non-empty string" in data["message"]

    def test_search_endpoint_invalid_top_k(self, client):
        """Test search endpoint with invalid top_k parameter"""
        request_data = {"query": "test query", "top_k": -1}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 400
        data = response.get_json()

        assert data["status"] == "error"
        assert "positive integer" in data["message"]

    def test_search_endpoint_invalid_threshold(self, client):
        """Test search endpoint with invalid threshold parameter"""
        request_data = {"query": "test query", "threshold": 1.5}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 400
        data = response.get_json()

        assert data["status"] == "error"
        assert "between 0 and 1" in data["message"]

    def test_search_endpoint_non_json_request(self, client):
        """Test search endpoint with non-JSON request"""
        response = client.post("/search", data="not json", content_type="text/plain")

        assert response.status_code == 400
        data = response.get_json()

        assert data["status"] == "error"
        assert "application/json" in data["message"]

    def test_search_endpoint_result_structure(self, client):
        """Test that search results have the correct structure"""
        request_data = {"query": "policy"}

        response = client.post("/search", data=json.dumps(request_data), content_type="application/json")

        assert response.status_code == 200
        data = response.get_json()

        if data["results_count"] > 0:
            result = data["results"][0]
            assert "chunk_id" in result
            assert "content" in result
            assert "similarity_score" in result
            assert "metadata" in result
            assert isinstance(result["similarity_score"], (int, float))