Arif commited on
Commit
bf03cdf
Β·
1 Parent(s): 39bf6ca

Still working with docker model runner

Browse files
backend/app/main.py CHANGED
@@ -44,6 +44,7 @@ async def lifespan(app: FastAPI):
44
 
45
  llm_service = get_llm_service(
46
  debug=settings.debug,
 
47
  mlx_config=mlx_config,
48
  docker_config=docker_config
49
  )
 
44
 
45
  llm_service = get_llm_service(
46
  debug=settings.debug,
47
+ settings=settings,
48
  mlx_config=mlx_config,
49
  docker_config=docker_config
50
  )
backend/app/services/llm_service.py CHANGED
@@ -154,7 +154,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
154
  self.client = httpx.AsyncClient(timeout=self.timeout)
155
 
156
  # Test connection with health check
157
- response = await self.client.get(f"{self.docker_url}/models")
158
 
159
  if response.status_code == 200:
160
  self.is_loaded = True
@@ -181,13 +181,13 @@ class LLMServiceDockerModelRunner(BaseLLMService):
181
  }
182
 
183
  response = await self.client.post(
184
- f"{self.docker_url}/chat/completions",
185
  json=payload
186
  )
187
 
188
  if response.status_code == 200:
189
  result = response.json()
190
- return result["choices"]["message"]["content"]
191
  else:
192
  self.logger.error(f"❌ Docker Model Runner error: {response.text}")
193
  raise RuntimeError(f"Model Runner error: {response.status_code}")
@@ -236,22 +236,15 @@ class LLMServiceMock(BaseLLMService):
236
  return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
237
 
238
 
239
- def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict = None) -> BaseLLMService:
240
  """
241
  Factory function to get appropriate LLM service
242
-
243
- Args:
244
- debug: If True, use MLX; if False, use Docker Model Runner
245
- mlx_config: Config dict for MLX (model_name, max_tokens, temperature, device)
246
- docker_config: Config dict for Docker Model Runner (model_name, max_tokens, temperature, url, timeout)
247
-
248
- Returns:
249
- Appropriate LLM service instance
250
  """
251
 
252
- if debug:
253
- # Try MLX first
254
- if HAS_MLX:
255
  config = mlx_config or {
256
  "model_name": "mlx-community/Llama-3.2-3B-Instruct-4bit",
257
  "max_tokens": 512,
@@ -260,21 +253,34 @@ def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict =
260
  }
261
  logger.info("πŸ“Œ Mode: MLX (DEBUG=true)")
262
  return LLMServiceMLX(**config)
263
- else:
264
- logger.warning("⚠️ MLX not available, falling back to mock")
265
- return LLMServiceMock(
266
- model_name="mock-mlx",
267
- max_tokens=512,
268
- temperature=0.7
269
- )
270
- else:
271
- # Use Docker Model Runner
272
- config = docker_config or {
273
- "model_name": "Llama-3.2-3B-Instruct",
274
- "max_tokens": 512,
275
- "temperature": 0.7,
276
- "docker_url": "http://model-runner.docker.internal/engines/llama.cpp/v1",
277
- "timeout": 300
278
- }
279
- logger.info("πŸ“Œ Mode: Docker Model Runner (DEBUG=false)")
280
- return LLMServiceDockerModelRunner(**config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  self.client = httpx.AsyncClient(timeout=self.timeout)
155
 
156
  # Test connection with health check
157
+ response = await self.client.get(f"{self.docker_url}/api/tags")
158
 
159
  if response.status_code == 200:
160
  self.is_loaded = True
 
181
  }
182
 
183
  response = await self.client.post(
184
+ f"{self.docker_url}/api/chat/completions",
185
  json=payload
186
  )
187
 
188
  if response.status_code == 200:
189
  result = response.json()
190
+ return result["choices"][0]["message"]["content"]
191
  else:
192
  self.logger.error(f"❌ Docker Model Runner error: {response.text}")
193
  raise RuntimeError(f"Model Runner error: {response.status_code}")
 
236
  return f"Mock response: I processed your prompt about '{prompt[:40]}...' - please note I'm in mock mode with no real LLM."
237
 
238
 
239
+ def get_llm_service(debug: bool, mlx_config: dict = None, docker_config: dict = None, settings=None) -> BaseLLMService:
240
  """
241
  Factory function to get appropriate LLM service
242
+ Fallback chain: MLX β†’ Docker Model Runner β†’ Mock
 
 
 
 
 
 
 
243
  """
244
 
245
+ # Try MLX first
246
+ if debug and HAS_MLX:
247
+ try:
248
  config = mlx_config or {
249
  "model_name": "mlx-community/Llama-3.2-3B-Instruct-4bit",
250
  "max_tokens": 512,
 
253
  }
254
  logger.info("πŸ“Œ Mode: MLX (DEBUG=true)")
255
  return LLMServiceMLX(**config)
256
+ except Exception as e:
257
+ logger.warning(f"⚠️ MLX failed: {e}")
258
+
259
+ # Try Docker Model Runner
260
+ docker_url = None
261
+ if docker_config:
262
+ docker_url = docker_config.get("docker_url")
263
+ elif settings:
264
+ docker_url = settings.docker_model_runner_url
265
+
266
+ if docker_url:
267
+ try:
268
+ config = docker_config or {
269
+ "model_name": settings.llm_model_name_docker if settings else "llama2",
270
+ "max_tokens": settings.llm_max_tokens if settings else 512,
271
+ "temperature": settings.llm_temperature if settings else 0.7,
272
+ "docker_url": docker_url,
273
+ "timeout": settings.docker_timeout if settings else 300
274
+ }
275
+ logger.info(f"πŸ“Œ Mode: Docker Model Runner at {docker_url}")
276
+ return LLMServiceDockerModelRunner(**config)
277
+ except Exception as e:
278
+ logger.warning(f"⚠️ Docker Model Runner failed: {e}")
279
+
280
+ # Fallback to mock
281
+ logger.warning("⚠️ Using MOCK mode (no LLM available)")
282
+ return LLMServiceMock(
283
+ model_name="mock",
284
+ max_tokens=512,
285
+ temperature=0.7
286
+ )
backend/requirements.txt CHANGED
@@ -7,3 +7,4 @@ pydantic==2.5.0
7
  pydantic-settings==2.1.0
8
  python-dotenv==1.0.0
9
  aiofiles==23.2.1
 
 
7
  pydantic-settings==2.1.0
8
  python-dotenv==1.0.0
9
  aiofiles==23.2.1
10
+ httpx==0.25.1
docker-compose.yml CHANGED
@@ -5,12 +5,15 @@ services:
5
  dockerfile: backend/Dockerfile
6
  ports:
7
  - "8000:8000"
 
 
8
  environment:
9
  - PYTHONUNBUFFERED=1
10
  volumes:
11
  - ./backend:/app
12
  networks:
13
  - llm-network
 
14
 
15
  frontend:
16
  build:
@@ -18,14 +21,18 @@ services:
18
  dockerfile: frontend/Dockerfile
19
  ports:
20
  - "8501:8501"
 
 
21
  environment:
22
  - PYTHONUNBUFFERED=1
 
23
  depends_on:
24
  - backend
25
  volumes:
26
  - ./frontend:/app
27
  networks:
28
  - llm-network
 
29
 
30
  networks:
31
  llm-network:
 
5
  dockerfile: backend/Dockerfile
6
  ports:
7
  - "8000:8000"
8
+ env_file:
9
+ - .env.local
10
  environment:
11
  - PYTHONUNBUFFERED=1
12
  volumes:
13
  - ./backend:/app
14
  networks:
15
  - llm-network
16
+ hostname: backend
17
 
18
  frontend:
19
  build:
 
21
  dockerfile: frontend/Dockerfile
22
  ports:
23
  - "8501:8501"
24
+ env_file:
25
+ - .env.local
26
  environment:
27
  - PYTHONUNBUFFERED=1
28
+ - BACKEND_URL=http://backend:8000
29
  depends_on:
30
  - backend
31
  volumes:
32
  - ./frontend:/app
33
  networks:
34
  - llm-network
35
+ hostname: frontend
36
 
37
  networks:
38
  llm-network:
frontend/.env.example CHANGED
@@ -1,5 +1,5 @@
1
  # Backend Configuration
2
- BACKEND_URL=http://localhost:8000
3
 
4
  # Optional: Streamlit specific
5
  STREAMLIT_SERVER_HEADLESS=false
 
1
  # Backend Configuration
2
+ BACKEND_URL=http://backend:8000
3
 
4
  # Optional: Streamlit specific
5
  STREAMLIT_SERVER_HEADLESS=false
frontend/utils/api_client.py CHANGED
@@ -19,6 +19,7 @@ class APIClient:
19
  """Client for backend API communication"""
20
 
21
  def __init__(self):
 
22
  self.base_url = API_BASE_URL
23
  self.timeout = TIMEOUT_LONG
24
 
 
19
  """Client for backend API communication"""
20
 
21
  def __init__(self):
22
+ backend_url = BACKEND_URL
23
  self.base_url = API_BASE_URL
24
  self.timeout = TIMEOUT_LONG
25