Spaces:
Sleeping
Sleeping
Arif
commited on
Commit
Β·
bf03cdf
1
Parent(s):
39bf6ca
Still working with docker model runner
Browse files- backend/app/main.py +1 -0
- backend/app/services/llm_service.py +39 -33
- backend/requirements.txt +1 -0
- docker-compose.yml +7 -0
- frontend/.env.example +1 -1
- frontend/utils/api_client.py +1 -0
backend/app/main.py
CHANGED
|
@@ -44,6 +44,7 @@ async def lifespan(app: FastAPI):
|
|
| 44 |
|
| 45 |
llm_service = get_llm_service(
|
| 46 |
debug=settings.debug,
|
|
|
|
| 47 |
mlx_config=mlx_config,
|
| 48 |
docker_config=docker_config
|
| 49 |
)
|
|
|
|
| 44 |
|
| 45 |
llm_service = get_llm_service(
|
| 46 |
debug=settings.debug,
|
| 47 |
+
settings=settings,
|
| 48 |
mlx_config=mlx_config,
|
| 49 |
docker_config=docker_config
|
| 50 |
)
|
backend/app/services/llm_service.py
CHANGED
|
@@ -154,7 +154,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
|
|
| 154 |
self.client = httpx.AsyncClient(timeout=self.timeout)
|
| 155 |
|
| 156 |
# Test connection with health check
|
| 157 |
-
response = await self.client.get(f"{self.docker_url}/
|
| 158 |
|
| 159 |
if response.status_code == 200:
|
| 160 |
self.is_loaded = True
|
|
@@ -181,13 +181,13 @@ class LLMServiceDockerModelRunner(BaseLLMService):
|
|
| 181 |
}
|
| 182 |
|
| 183 |
response = await self.client.post(
|
| 184 |
-
f"{self.docker_url}/chat/completions",
|
| 185 |
json=payload
|
| 186 |
)
|
| 187 |
|
| 188 |
if response.status_code == 200:
|
| 189 |
result = response.json()
|
| 190 |
-
return result["choices"]["message"]["content"]
|
| 191 |
else:
|
| 192 |
self.logger.error(f"β Docker Model Runner error: {response.text}")
|
| 193 |
raise RuntimeError(f"Model Runner error: {response.status_code}")
|
|
@@ -236,22 +236,15 @@ class LLMServiceMock(BaseLLMService):
|
|
| 236 |
return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
|
| 237 |
|
| 238 |
|
| 239 |
-
def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict = None) -> BaseLLMService:
|
| 240 |
"""
|
| 241 |
Factory function to get appropriate LLM service
|
| 242 |
-
|
| 243 |
-
Args:
|
| 244 |
-
debug: If True, use MLX; if False, use Docker Model Runner
|
| 245 |
-
mlx_config: Config dict for MLX (model_name, max_tokens, temperature, device)
|
| 246 |
-
docker_config: Config dict for Docker Model Runner (model_name, max_tokens, temperature, url, timeout)
|
| 247 |
-
|
| 248 |
-
Returns:
|
| 249 |
-
Appropriate LLM service instance
|
| 250 |
"""
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
config = mlx_config or {
|
| 256 |
"model_name": "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
| 257 |
"max_tokens": 512,
|
|
@@ -260,21 +253,34 @@ def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict =
|
|
| 260 |
}
|
| 261 |
logger.info("π Mode: MLX (DEBUG=true)")
|
| 262 |
return LLMServiceMLX(**config)
|
| 263 |
-
|
| 264 |
-
logger.warning("β οΈ MLX
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
self.client = httpx.AsyncClient(timeout=self.timeout)
|
| 155 |
|
| 156 |
# Test connection with health check
|
| 157 |
+
response = await self.client.get(f"{self.docker_url}/api/tags")
|
| 158 |
|
| 159 |
if response.status_code == 200:
|
| 160 |
self.is_loaded = True
|
|
|
|
| 181 |
}
|
| 182 |
|
| 183 |
response = await self.client.post(
|
| 184 |
+
f"{self.docker_url}/api/chat/completions",
|
| 185 |
json=payload
|
| 186 |
)
|
| 187 |
|
| 188 |
if response.status_code == 200:
|
| 189 |
result = response.json()
|
| 190 |
+
return result["choices"][0]["message"]["content"]
|
| 191 |
else:
|
| 192 |
self.logger.error(f"β Docker Model Runner error: {response.text}")
|
| 193 |
raise RuntimeError(f"Model Runner error: {response.status_code}")
|
|
|
|
| 236 |
return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
|
| 237 |
|
| 238 |
|
| 239 |
+
def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict = None, settings=None) -> BaseLLMService:
|
| 240 |
"""
|
| 241 |
Factory function to get appropriate LLM service
|
| 242 |
+
Fallback chain: MLX β Docker Model Runner β Mock
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
"""
|
| 244 |
|
| 245 |
+
# Try MLX first
|
| 246 |
+
if debug and HAS_MLX:
|
| 247 |
+
try:
|
| 248 |
config = mlx_config or {
|
| 249 |
"model_name": "mlx-community/Llama-3.2-3B-Instruct-4bit",
|
| 250 |
"max_tokens": 512,
|
|
|
|
| 253 |
}
|
| 254 |
logger.info("π Mode: MLX (DEBUG=true)")
|
| 255 |
return LLMServiceMLX(**config)
|
| 256 |
+
except Exception as e:
|
| 257 |
+
logger.warning(f"β οΈ MLX failed: {e}")
|
| 258 |
+
|
| 259 |
+
# Try Docker Model Runner
|
| 260 |
+
docker_url = None
|
| 261 |
+
if docker_config:
|
| 262 |
+
docker_url = docker_config.get("docker_url")
|
| 263 |
+
elif settings:
|
| 264 |
+
docker_url = settings.docker_model_runner_url
|
| 265 |
+
|
| 266 |
+
if docker_url:
|
| 267 |
+
try:
|
| 268 |
+
config = docker_config or {
|
| 269 |
+
"model_name": settings.llm_model_name_docker if settings else "llama2",
|
| 270 |
+
"max_tokens": settings.llm_max_tokens if settings else 512,
|
| 271 |
+
"temperature": settings.llm_temperature if settings else 0.7,
|
| 272 |
+
"docker_url": docker_url,
|
| 273 |
+
"timeout": settings.docker_timeout if settings else 300
|
| 274 |
+
}
|
| 275 |
+
logger.info(f"π Mode: Docker Model Runner at {docker_url}")
|
| 276 |
+
return LLMServiceDockerModelRunner(**config)
|
| 277 |
+
except Exception as e:
|
| 278 |
+
logger.warning(f"β οΈ Docker Model Runner failed: {e}")
|
| 279 |
+
|
| 280 |
+
# Fallback to mock
|
| 281 |
+
logger.warning("β οΈ Using MOCK mode (no LLM available)")
|
| 282 |
+
return LLMServiceMock(
|
| 283 |
+
model_name="mock",
|
| 284 |
+
max_tokens=512,
|
| 285 |
+
temperature=0.7
|
| 286 |
+
)
|
backend/requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ pydantic==2.5.0
|
|
| 7 |
pydantic-settings==2.1.0
|
| 8 |
python-dotenv==1.0.0
|
| 9 |
aiofiles==23.2.1
|
|
|
|
|
|
| 7 |
pydantic-settings==2.1.0
|
| 8 |
python-dotenv==1.0.0
|
| 9 |
aiofiles==23.2.1
|
| 10 |
+
httpx==0.25.1
|
docker-compose.yml
CHANGED
|
@@ -5,12 +5,15 @@ services:
|
|
| 5 |
dockerfile: backend/Dockerfile
|
| 6 |
ports:
|
| 7 |
- "8000:8000"
|
|
|
|
|
|
|
| 8 |
environment:
|
| 9 |
- PYTHONUNBUFFERED=1
|
| 10 |
volumes:
|
| 11 |
- ./backend:/app
|
| 12 |
networks:
|
| 13 |
- llm-network
|
|
|
|
| 14 |
|
| 15 |
frontend:
|
| 16 |
build:
|
|
@@ -18,14 +21,18 @@ services:
|
|
| 18 |
dockerfile: frontend/Dockerfile
|
| 19 |
ports:
|
| 20 |
- "8501:8501"
|
|
|
|
|
|
|
| 21 |
environment:
|
| 22 |
- PYTHONUNBUFFERED=1
|
|
|
|
| 23 |
depends_on:
|
| 24 |
- backend
|
| 25 |
volumes:
|
| 26 |
- ./frontend:/app
|
| 27 |
networks:
|
| 28 |
- llm-network
|
|
|
|
| 29 |
|
| 30 |
networks:
|
| 31 |
llm-network:
|
|
|
|
| 5 |
dockerfile: backend/Dockerfile
|
| 6 |
ports:
|
| 7 |
- "8000:8000"
|
| 8 |
+
env_file:
|
| 9 |
+
- .env.local
|
| 10 |
environment:
|
| 11 |
- PYTHONUNBUFFERED=1
|
| 12 |
volumes:
|
| 13 |
- ./backend:/app
|
| 14 |
networks:
|
| 15 |
- llm-network
|
| 16 |
+
hostname: backend
|
| 17 |
|
| 18 |
frontend:
|
| 19 |
build:
|
|
|
|
| 21 |
dockerfile: frontend/Dockerfile
|
| 22 |
ports:
|
| 23 |
- "8501:8501"
|
| 24 |
+
env_file:
|
| 25 |
+
- .env.local
|
| 26 |
environment:
|
| 27 |
- PYTHONUNBUFFERED=1
|
| 28 |
+
- BACKEND_URL=http://backend:8000
|
| 29 |
depends_on:
|
| 30 |
- backend
|
| 31 |
volumes:
|
| 32 |
- ./frontend:/app
|
| 33 |
networks:
|
| 34 |
- llm-network
|
| 35 |
+
hostname: frontend
|
| 36 |
|
| 37 |
networks:
|
| 38 |
llm-network:
|
frontend/.env.example
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# Backend Configuration
|
| 2 |
-
BACKEND_URL=http://
|
| 3 |
|
| 4 |
# Optional: Streamlit specific
|
| 5 |
STREAMLIT_SERVER_HEADLESS=false
|
|
|
|
| 1 |
# Backend Configuration
|
| 2 |
+
BACKEND_URL=http://backend:8000
|
| 3 |
|
| 4 |
# Optional: Streamlit specific
|
| 5 |
STREAMLIT_SERVER_HEADLESS=false
|
frontend/utils/api_client.py
CHANGED
|
@@ -19,6 +19,7 @@ class APIClient:
|
|
| 19 |
"""Client for backend API communication"""
|
| 20 |
|
| 21 |
def __init__(self):
|
|
|
|
| 22 |
self.base_url = API_BASE_URL
|
| 23 |
self.timeout = TIMEOUT_LONG
|
| 24 |
|
|
|
|
| 19 |
"""Client for backend API communication"""
|
| 20 |
|
| 21 |
def __init__(self):
|
| 22 |
+
backend_url = BACKEND_URL
|
| 23 |
self.base_url = API_BASE_URL
|
| 24 |
self.timeout = TIMEOUT_LONG
|
| 25 |
|