Spaces:
Sleeping
Sleeping
| """ | |
| LLM Service for RAG Application | |
| This module provides integration with Large Language Models through multiple | |
| providers including OpenRouter and Groq, with fallback capabilities and | |
| comprehensive error handling. | |
| Updated: October 18, 2025 - CI/CD pipeline compatibility verification | |
| """ | |
| import logging | |
| import os | |
| import time | |
| from dataclasses import dataclass | |
| from typing import Any, Dict, List, Optional | |
| import requests | |
| from src.llm.llm_configuration_error import LLMConfigurationError | |
| logger = logging.getLogger(__name__) | |
| class LLMConfig: | |
| """Configuration for LLM providers.""" | |
| provider: str # "openrouter" or "groq" | |
| api_key: str | |
| model_name: str | |
| base_url: str | |
| max_tokens: int = 1000 | |
| temperature: float = 0.1 | |
| timeout: int = 30 | |
| class LLMResponse: | |
| """Standardized response from LLM providers.""" | |
| content: str | |
| provider: str | |
| model: str | |
| usage: Dict[str, Any] | |
| response_time: float | |
| success: bool | |
| error_message: Optional[str] = None | |
| class LLMService: | |
| """ | |
| Service for interacting with Large Language Models. | |
| Supports multiple providers with automatic fallback and retry logic. | |
| Designed for corporate policy Q&A with appropriate guardrails. | |
| """ | |
| def __init__(self, configs: List[LLMConfig]): | |
| """ | |
| Initialize LLMService with provider configurations. | |
| Args: | |
| configs: List of LLMConfig objects for different providers | |
| Raises: | |
| ValueError: If no valid configurations provided | |
| """ | |
| if not configs: | |
| raise ValueError("At least one LLM configuration must be provided") | |
| self.configs = configs | |
| self.current_config_index = 0 | |
| logger.info(f"LLMService initialized with {len(configs)} provider(s)") | |
| def from_environment(cls) -> "LLMService": | |
| """ | |
| Create LLMService instance from environment variables. | |
| Expected environment variables: | |
| - OPENROUTER_API_KEY: API key for OpenRouter | |
| - GROQ_API_KEY: API key for Groq | |
| Returns: | |
| LLMService instance with available providers | |
| Raises: | |
| ValueError: If no API keys found in environment | |
| """ | |
| configs = [] | |
| # OpenRouter configuration | |
| openrouter_key = os.getenv("OPENROUTER_API_KEY") | |
| if openrouter_key: | |
| configs.append( | |
| LLMConfig( | |
| provider="openrouter", | |
| api_key=openrouter_key, | |
| model_name="microsoft/wizardlm-2-8x22b", # Free tier model | |
| base_url="https://openrouter.ai/api/v1", | |
| max_tokens=1000, | |
| temperature=0.1, | |
| ) | |
| ) | |
| # Groq configuration | |
| groq_key = os.getenv("GROQ_API_KEY") | |
| if groq_key: | |
| configs.append( | |
| LLMConfig( | |
| provider="groq", | |
| api_key=groq_key, | |
| model_name="llama3-8b-8192", # Free tier model | |
| base_url="https://api.groq.com/openai/v1", | |
| max_tokens=1000, | |
| temperature=0.1, | |
| ) | |
| ) | |
| if not configs: | |
| raise LLMConfigurationError( | |
| "No LLM API keys found in environment. " "Please set OPENROUTER_API_KEY or GROQ_API_KEY" | |
| ) | |
| return cls(configs) | |
| def generate_response(self, prompt: str, max_retries: int = 2) -> LLMResponse: | |
| """ | |
| Generate response from LLM with fallback support. | |
| Args: | |
| prompt: Input prompt for the LLM | |
| max_retries: Maximum retry attempts per provider | |
| Returns: | |
| LLMResponse with generated content or error information | |
| """ | |
| last_error = None | |
| # Try each provider configuration | |
| for attempt in range(len(self.configs)): | |
| config = self.configs[self.current_config_index] | |
| try: | |
| logger.debug(f"Attempting generation with {config.provider}") | |
| response = self._call_provider(config, prompt, max_retries) | |
| if response.success: | |
| logger.info(f"Successfully generated response using {config.provider}") | |
| return response | |
| last_error = response.error_message | |
| logger.warning(f"Provider {config.provider} failed: {last_error}") | |
| except Exception as e: | |
| last_error = str(e) | |
| logger.error(f"Error with provider {config.provider}: {last_error}") | |
| # Move to next provider | |
| self.current_config_index = (self.current_config_index + 1) % len(self.configs) | |
| # All providers failed | |
| logger.error("All LLM providers failed") | |
| return LLMResponse( | |
| content="", | |
| provider="none", | |
| model="none", | |
| usage={}, | |
| response_time=0.0, | |
| success=False, | |
| error_message=f"All providers failed. Last error: {last_error}", | |
| ) | |
| def _call_provider(self, config: LLMConfig, prompt: str, max_retries: int) -> LLMResponse: | |
| """ | |
| Make API call to specific provider with retry logic. | |
| Args: | |
| config: Provider configuration | |
| prompt: Input prompt | |
| max_retries: Maximum retry attempts | |
| Returns: | |
| LLMResponse from the provider | |
| """ | |
| start_time = time.time() | |
| for attempt in range(max_retries + 1): | |
| try: | |
| headers = { | |
| "Authorization": f"Bearer {config.api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| # Add provider-specific headers | |
| if config.provider == "openrouter": | |
| referer_url = "https://github.com/sethmcknight/msse-ai-engineering" | |
| headers["HTTP-Referer"] = referer_url | |
| headers["X-Title"] = "MSSE RAG Application" | |
| payload = { | |
| "model": config.model_name, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "max_tokens": config.max_tokens, | |
| "temperature": config.temperature, | |
| } | |
| response = requests.post( | |
| f"{config.base_url}/chat/completions", | |
| headers=headers, | |
| json=payload, | |
| timeout=config.timeout, | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Extract response content | |
| content = data["choices"][0]["message"]["content"] | |
| usage = data.get("usage", {}) | |
| response_time = time.time() - start_time | |
| return LLMResponse( | |
| content=content, | |
| provider=config.provider, | |
| model=config.model_name, | |
| usage=usage, | |
| response_time=response_time, | |
| success=True, | |
| ) | |
| except requests.exceptions.RequestException as e: | |
| logger.warning(f"Request failed for {config.provider} (attempt {attempt + 1}): {e}") | |
| if attempt < max_retries: | |
| time.sleep(2**attempt) # Exponential backoff | |
| continue | |
| return LLMResponse( | |
| content="", | |
| provider=config.provider, | |
| model=config.model_name, | |
| usage={}, | |
| response_time=time.time() - start_time, | |
| success=False, | |
| error_message=str(e), | |
| ) | |
| except Exception as e: | |
| logger.error(f"Unexpected error with {config.provider}: {e}") | |
| return LLMResponse( | |
| content="", | |
| provider=config.provider, | |
| model=config.model_name, | |
| usage={}, | |
| response_time=time.time() - start_time, | |
| success=False, | |
| error_message=str(e), | |
| ) | |
| def health_check(self) -> Dict[str, Any]: | |
| """ | |
| Check health status of all configured providers. | |
| Returns: | |
| Dictionary with provider health status | |
| """ | |
| health_status = {} | |
| for config in self.configs: | |
| try: | |
| # Simple test prompt | |
| test_response = self._call_provider( | |
| config, | |
| "Hello, this is a test. Please respond with 'OK'.", | |
| max_retries=1, | |
| ) | |
| health_status[config.provider] = { | |
| "status": "healthy" if test_response.success else "unhealthy", | |
| "model": config.model_name, | |
| "response_time": test_response.response_time, | |
| "error": test_response.error_message, | |
| } | |
| except Exception as e: | |
| health_status[config.provider] = { | |
| "status": "unhealthy", | |
| "model": config.model_name, | |
| "response_time": 0.0, | |
| "error": str(e), | |
| } | |
| return health_status | |
| def get_available_providers(self) -> List[str]: | |
| """Get list of available provider names.""" | |
| return [config.provider for config in self.configs] | |