Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

msse-ai-engineering / src /llm /llm_service.py

sethmcknight

Refactor test cases for improved readability and consistency

159faf0 about 2 months ago

9.61 kB

	"""
	LLM Service for RAG Application

	This module provides integration with Large Language Models through multiple
	providers including OpenRouter and Groq, with fallback capabilities and
	comprehensive error handling.

	Updated: October 18, 2025 - CI/CD pipeline compatibility verification
	"""

	import logging
	import os
	import time
	from dataclasses import dataclass
	from typing import Any, Dict, List, Optional

	import requests

	from src.llm.llm_configuration_error import LLMConfigurationError

	logger = logging.getLogger(__name__)


	@dataclass
	class LLMConfig:
	"""Configuration for LLM providers."""

	provider: str # "openrouter" or "groq"
	api_key: str
	model_name: str
	base_url: str
	max_tokens: int = 1000
	temperature: float = 0.1
	timeout: int = 30


	@dataclass
	class LLMResponse:
	"""Standardized response from LLM providers."""

	content: str
	provider: str
	model: str
	usage: Dict[str, Any]
	response_time: float
	success: bool
	error_message: Optional[str] = None


	class LLMService:
	"""
	Service for interacting with Large Language Models.

	Supports multiple providers with automatic fallback and retry logic.
	Designed for corporate policy Q&A with appropriate guardrails.
	"""

	def __init__(self, configs: List[LLMConfig]):
	"""
	Initialize LLMService with provider configurations.

	Args:
	configs: List of LLMConfig objects for different providers

	Raises:
	ValueError: If no valid configurations provided
	"""
	if not configs:
	raise ValueError("At least one LLM configuration must be provided")

	self.configs = configs
	self.current_config_index = 0
	logger.info(f"LLMService initialized with {len(configs)} provider(s)")

	@classmethod
	def from_environment(cls) -> "LLMService":
	"""
	Create LLMService instance from environment variables.

	Expected environment variables:
	- OPENROUTER_API_KEY: API key for OpenRouter
	- GROQ_API_KEY: API key for Groq

	Returns:
	LLMService instance with available providers

	Raises:
	ValueError: If no API keys found in environment
	"""
	configs = []

	# OpenRouter configuration
	openrouter_key = os.getenv("OPENROUTER_API_KEY")
	if openrouter_key:
	configs.append(
	LLMConfig(
	provider="openrouter",
	api_key=openrouter_key,
	model_name="microsoft/wizardlm-2-8x22b", # Free tier model
	base_url="https://openrouter.ai/api/v1",
	max_tokens=1000,
	temperature=0.1,
	)
	)

	# Groq configuration
	groq_key = os.getenv("GROQ_API_KEY")
	if groq_key:
	configs.append(
	LLMConfig(
	provider="groq",
	api_key=groq_key,
	model_name="llama3-8b-8192", # Free tier model
	base_url="https://api.groq.com/openai/v1",
	max_tokens=1000,
	temperature=0.1,
	)
	)

	if not configs:
	raise LLMConfigurationError(
	"No LLM API keys found in environment. " "Please set OPENROUTER_API_KEY or GROQ_API_KEY"
	)

	return cls(configs)

	def generate_response(self, prompt: str, max_retries: int = 2) -> LLMResponse:
	"""
	Generate response from LLM with fallback support.

	Args:
	prompt: Input prompt for the LLM
	max_retries: Maximum retry attempts per provider

	Returns:
	LLMResponse with generated content or error information
	"""
	last_error = None

	# Try each provider configuration
	for attempt in range(len(self.configs)):
	config = self.configs[self.current_config_index]

	try:
	logger.debug(f"Attempting generation with {config.provider}")
	response = self._call_provider(config, prompt, max_retries)

	if response.success:
	logger.info(f"Successfully generated response using {config.provider}")
	return response

	last_error = response.error_message
	logger.warning(f"Provider {config.provider} failed: {last_error}")

	except Exception as e:
	last_error = str(e)
	logger.error(f"Error with provider {config.provider}: {last_error}")

	# Move to next provider
	self.current_config_index = (self.current_config_index + 1) % len(self.configs)

	# All providers failed
	logger.error("All LLM providers failed")
	return LLMResponse(
	content="",
	provider="none",
	model="none",
	usage={},
	response_time=0.0,
	success=False,
	error_message=f"All providers failed. Last error: {last_error}",
	)

	def _call_provider(self, config: LLMConfig, prompt: str, max_retries: int) -> LLMResponse:
	"""
	Make API call to specific provider with retry logic.

	Args:
	config: Provider configuration
	prompt: Input prompt
	max_retries: Maximum retry attempts

	Returns:
	LLMResponse from the provider
	"""
	start_time = time.time()

	for attempt in range(max_retries + 1):
	try:
	headers = {
	"Authorization": f"Bearer {config.api_key}",
	"Content-Type": "application/json",
	}

	# Add provider-specific headers
	if config.provider == "openrouter":
	referer_url = "https://github.com/sethmcknight/msse-ai-engineering"
	headers["HTTP-Referer"] = referer_url
	headers["X-Title"] = "MSSE RAG Application"

	payload = {
	"model": config.model_name,
	"messages": [{"role": "user", "content": prompt}],
	"max_tokens": config.max_tokens,
	"temperature": config.temperature,
	}

	response = requests.post(
	f"{config.base_url}/chat/completions",
	headers=headers,
	json=payload,
	timeout=config.timeout,
	)

	response.raise_for_status()
	data = response.json()

	# Extract response content
	content = data["choices"][0]["message"]["content"]
	usage = data.get("usage", {})

	response_time = time.time() - start_time

	return LLMResponse(
	content=content,
	provider=config.provider,
	model=config.model_name,
	usage=usage,
	response_time=response_time,
	success=True,
	)

	except requests.exceptions.RequestException as e:
	logger.warning(f"Request failed for {config.provider} (attempt {attempt + 1}): {e}")
	if attempt < max_retries:
	time.sleep(2**attempt) # Exponential backoff
	continue

	return LLMResponse(
	content="",
	provider=config.provider,
	model=config.model_name,
	usage={},
	response_time=time.time() - start_time,
	success=False,
	error_message=str(e),
	)

	except Exception as e:
	logger.error(f"Unexpected error with {config.provider}: {e}")
	return LLMResponse(
	content="",
	provider=config.provider,
	model=config.model_name,
	usage={},
	response_time=time.time() - start_time,
	success=False,
	error_message=str(e),
	)

	def health_check(self) -> Dict[str, Any]:
	"""
	Check health status of all configured providers.

	Returns:
	Dictionary with provider health status
	"""
	health_status = {}

	for config in self.configs:
	try:
	# Simple test prompt
	test_response = self._call_provider(
	config,
	"Hello, this is a test. Please respond with 'OK'.",
	max_retries=1,
	)

	health_status[config.provider] = {
	"status": "healthy" if test_response.success else "unhealthy",
	"model": config.model_name,
	"response_time": test_response.response_time,
	"error": test_response.error_message,
	}

	except Exception as e:
	health_status[config.provider] = {
	"status": "unhealthy",
	"model": config.model_name,
	"response_time": 0.0,
	"error": str(e),
	}

	return health_status

	def get_available_providers(self) -> List[str]:
	"""Get list of available provider names."""
	return [config.provider for config in self.configs]