Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,962 Bytes
23ffdb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
#!/usr/bin/env python3
"""
Mock LLM function calls for testing CLI topic extraction without API costs.
This module patches requests.post to intercept HTTP calls to inference servers
and return mock responses instead.
"""
import json
import os
# Store original requests if it exists
_original_requests = None
def _generate_mock_response(prompt: str, system_prompt: str) -> str:
"""
Generate a mock response that satisfies validation requirements.
The response must:
- Be longer than 120 characters
- Contain a markdown table (with | characters)
Args:
prompt: The user prompt
system_prompt: The system prompt
Returns:
A mock markdown table response
"""
# Generate a simple markdown table that satisfies the validation
# This mimics a topic extraction table response
mock_table = """| Reference | General Topic | Sub-topic | Sentiment |
|-----------|---------------|-----------|-----------|
| 1 | Test Topic | Test Subtopic | Positive |
| 2 | Another Topic | Another Subtopic | Neutral |
| 3 | Third Topic | Third Subtopic | Negative |
This is a mock response from the test inference server. The actual content would be generated by a real LLM model, but for testing purposes, this dummy response allows us to verify that the CLI commands work correctly without incurring API costs."""
return mock_table
def _estimate_tokens(text: str) -> int:
"""Estimate token count (rough approximation: ~4 characters per token)."""
return max(1, len(text) // 4)
def mock_requests_post(url, **kwargs):
"""
Mock version of requests.post that intercepts inference-server calls.
Returns a mock response object that mimics the real requests.Response.
"""
# Only mock inference-server URLs
if "/v1/chat/completions" not in url:
# For non-inference-server URLs, use real requests
import requests
return requests.post(url, **kwargs)
# Extract payload
payload = kwargs.get("json", {})
messages = payload.get("messages", [])
# Extract prompts
system_prompt = ""
user_prompt = ""
for msg in messages:
role = msg.get("role", "")
content = msg.get("content", "")
if role == "system":
system_prompt = content
elif role == "user":
user_prompt = content
# Generate mock response
response_text = _generate_mock_response(user_prompt, system_prompt)
# Estimate tokens
input_tokens = _estimate_tokens(system_prompt + "\n" + user_prompt)
output_tokens = _estimate_tokens(response_text)
# Check if streaming is requested
stream = payload.get("stream", False)
if stream:
# For streaming, create a mock response with iter_lines
class MockStreamResponse:
def __init__(self, text):
self.text = text
self.status_code = 200
self.lines = []
# Simulate streaming chunks
chunk_size = 20
for i in range(0, len(text), chunk_size):
chunk = text[i : i + chunk_size]
chunk_data = {
"choices": [
{
"delta": {"content": chunk},
"index": 0,
"finish_reason": None,
}
]
}
self.lines.append(f"data: {json.dumps(chunk_data)}\n\n".encode())
self.lines.append(b"data: [DONE]\n\n")
self._line_index = 0
def raise_for_status(self):
pass
def iter_lines(self):
for line in self.lines:
yield line
return MockStreamResponse(response_text)
else:
# For non-streaming, create a simple mock response
class MockResponse:
def __init__(self, text, input_tokens, output_tokens):
self._json_data = {
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": text,
},
}
],
"usage": {
"prompt_tokens": input_tokens,
"completion_tokens": output_tokens,
"total_tokens": input_tokens + output_tokens,
},
}
self.status_code = 200
def raise_for_status(self):
pass
def json(self):
return self._json_data
return MockResponse(response_text, input_tokens, output_tokens)
def apply_mock_patches():
"""
Apply patches to mock HTTP requests.
This should be called before importing modules that use requests.
"""
global _original_requests
try:
import requests
_original_requests = requests.post
requests.post = mock_requests_post
print("[Mock] Patched requests.post for inference-server calls")
except ImportError:
# requests not imported yet, will be patched when imported
pass
def restore_original():
"""Restore original requests.post if it was patched."""
global _original_requests
if _original_requests:
try:
import requests
requests.post = _original_requests
_original_requests = None
print("[Mock] Restored original requests.post")
except ImportError:
pass
# Auto-apply patches if TEST_MODE environment variable is set
if os.environ.get("TEST_MODE") == "1" or os.environ.get("USE_MOCK_LLM") == "1":
apply_mock_patches()
|