File size: 5,962 Bytes
23ffdb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python3
"""
Mock LLM function calls for testing CLI topic extraction without API costs.

This module patches requests.post to intercept HTTP calls to inference servers
and return mock responses instead.
"""

import json
import os

# Store original requests if it exists
_original_requests = None


def _generate_mock_response(prompt: str, system_prompt: str) -> str:
    """
    Generate a mock response that satisfies validation requirements.

    The response must:
    - Be longer than 120 characters
    - Contain a markdown table (with | characters)

    Args:
        prompt: The user prompt
        system_prompt: The system prompt

    Returns:
        A mock markdown table response
    """
    # Generate a simple markdown table that satisfies the validation
    # This mimics a topic extraction table response
    mock_table = """| Reference | General Topic | Sub-topic | Sentiment |
|-----------|---------------|-----------|-----------|
| 1 | Test Topic | Test Subtopic | Positive |
| 2 | Another Topic | Another Subtopic | Neutral |
| 3 | Third Topic | Third Subtopic | Negative |

This is a mock response from the test inference server. The actual content would be generated by a real LLM model, but for testing purposes, this dummy response allows us to verify that the CLI commands work correctly without incurring API costs."""

    return mock_table


def _estimate_tokens(text: str) -> int:
    """Estimate token count (rough approximation: ~4 characters per token)."""
    return max(1, len(text) // 4)


def mock_requests_post(url, **kwargs):
    """
    Mock version of requests.post that intercepts inference-server calls.

    Returns a mock response object that mimics the real requests.Response.
    """
    # Only mock inference-server URLs
    if "/v1/chat/completions" not in url:
        # For non-inference-server URLs, use real requests
        import requests

        return requests.post(url, **kwargs)

    # Extract payload
    payload = kwargs.get("json", {})
    messages = payload.get("messages", [])

    # Extract prompts
    system_prompt = ""
    user_prompt = ""
    for msg in messages:
        role = msg.get("role", "")
        content = msg.get("content", "")
        if role == "system":
            system_prompt = content
        elif role == "user":
            user_prompt = content

    # Generate mock response
    response_text = _generate_mock_response(user_prompt, system_prompt)

    # Estimate tokens
    input_tokens = _estimate_tokens(system_prompt + "\n" + user_prompt)
    output_tokens = _estimate_tokens(response_text)

    # Check if streaming is requested
    stream = payload.get("stream", False)

    if stream:
        # For streaming, create a mock response with iter_lines
        class MockStreamResponse:
            def __init__(self, text):
                self.text = text
                self.status_code = 200
                self.lines = []
                # Simulate streaming chunks
                chunk_size = 20
                for i in range(0, len(text), chunk_size):
                    chunk = text[i : i + chunk_size]
                    chunk_data = {
                        "choices": [
                            {
                                "delta": {"content": chunk},
                                "index": 0,
                                "finish_reason": None,
                            }
                        ]
                    }
                    self.lines.append(f"data: {json.dumps(chunk_data)}\n\n".encode())
                self.lines.append(b"data: [DONE]\n\n")
                self._line_index = 0

            def raise_for_status(self):
                pass

            def iter_lines(self):
                for line in self.lines:
                    yield line

        return MockStreamResponse(response_text)
    else:
        # For non-streaming, create a simple mock response
        class MockResponse:
            def __init__(self, text, input_tokens, output_tokens):
                self._json_data = {
                    "choices": [
                        {
                            "index": 0,
                            "finish_reason": "stop",
                            "message": {
                                "role": "assistant",
                                "content": text,
                            },
                        }
                    ],
                    "usage": {
                        "prompt_tokens": input_tokens,
                        "completion_tokens": output_tokens,
                        "total_tokens": input_tokens + output_tokens,
                    },
                }
                self.status_code = 200

            def raise_for_status(self):
                pass

            def json(self):
                return self._json_data

        return MockResponse(response_text, input_tokens, output_tokens)


def apply_mock_patches():
    """
    Apply patches to mock HTTP requests.
    This should be called before importing modules that use requests.
    """
    global _original_requests

    try:
        import requests

        _original_requests = requests.post
        requests.post = mock_requests_post
        print("[Mock] Patched requests.post for inference-server calls")
    except ImportError:
        # requests not imported yet, will be patched when imported
        pass


def restore_original():
    """Restore original requests.post if it was patched."""
    global _original_requests
    if _original_requests:
        try:
            import requests

            requests.post = _original_requests
            _original_requests = None
            print("[Mock] Restored original requests.post")
        except ImportError:
            pass


# Auto-apply patches if TEST_MODE environment variable is set
if os.environ.get("TEST_MODE") == "1" or os.environ.get("USE_MOCK_LLM") == "1":
    apply_mock_patches()