msse-ai-engineering / src /llm /prompt_templates.py
sethmcknight
Refactor test cases for improved readability and consistency
159faf0
"""
Prompt Templates for Corporate Policy Q&A
This module contains predefined prompt templates optimized for
corporate policy question-answering with proper citation requirements.
"""
from dataclasses import dataclass
from typing import Dict, List
@dataclass
class PromptTemplate:
"""Template for generating prompts with context and citations."""
system_prompt: str
user_template: str
citation_format: str
class PromptTemplates:
"""
Collection of prompt templates for different types of policy questions.
Templates are designed to ensure:
- Accurate responses based on provided context
- Proper citation of source documents
- Adherence to corporate policy scope
- Consistent formatting and tone
"""
# System prompt for corporate policy assistant
SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents. # noqa: E501
IMPORTANT GUIDELINES:
1. Answer questions using ONLY the information provided in the context
2. If the context doesn't contain enough information to answer the question, say so explicitly # noqa: E501
3. Always cite your sources using the format: [Source: filename.md]
4. Be accurate, concise, and professional
5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department # noqa: E501
6. Do not make assumptions or provide information not explicitly stated in the context # noqa: E501
Your responses should be helpful while staying strictly within the scope of the provided corporate policies."""
@classmethod
def get_policy_qa_template(cls) -> PromptTemplate:
"""
Get the standard template for policy question-answering.
Returns:
PromptTemplate configured for corporate policy Q&A
"""
return PromptTemplate(
system_prompt=cls.SYSTEM_PROMPT,
user_template="""Based on the following corporate policy documents, please answer this question: {question} # noqa: E501
CONTEXT DOCUMENTS:
{context}
Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""", # noqa: E501
citation_format="[Source: {filename}]",
)
@classmethod
def get_clarification_template(cls) -> PromptTemplate:
"""
Get template for when clarification is needed.
Returns:
PromptTemplate for clarification requests
"""
return PromptTemplate(
system_prompt=cls.SYSTEM_PROMPT,
user_template="""The user asked: {question}
CONTEXT DOCUMENTS:
{context}
The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that: # noqa: E501
1. Acknowledges what information is available (if any)
2. Clearly states what information is missing
3. Suggests appropriate next steps (contact HR, check other resources, etc.)
4. Cites any relevant sources using [Source: filename.md] format""",
citation_format="[Source: {filename}]",
)
@classmethod
def get_off_topic_template(cls) -> PromptTemplate:
"""
Get template for off-topic questions.
Returns:
PromptTemplate for redirecting off-topic questions
"""
return PromptTemplate(
system_prompt=cls.SYSTEM_PROMPT,
user_template="""The user asked: {question}
This question appears to be outside the scope of our corporate policies. Please provide a polite response that: # noqa: E501
1. Acknowledges the question
2. Explains that this falls outside corporate policy documentation
3. Suggests appropriate resources (HR, IT, management, etc.)
4. Offers to help with any policy-related questions instead""",
citation_format="",
)
@staticmethod
def format_context(search_results: List[Dict]) -> str:
"""
Format search results into context for the prompt.
Args:
search_results: List of search results from SearchService
Returns:
Formatted context string for the prompt
"""
if not search_results:
return "No relevant policy documents found."
context_parts = []
for i, result in enumerate(search_results[:5], 1): # Limit to top 5 results
filename = result.get("metadata", {}).get("filename", "unknown")
content = result.get("content", "").strip()
similarity = result.get("similarity_score", 0.0)
context_parts.append(f"Document {i}: {filename} (relevance: {similarity:.2f})\n" f"Content: {content}\n")
return "\n---\n".join(context_parts)
@staticmethod
def extract_citations(response: str) -> List[str]:
"""
Extract citations from LLM response.
Args:
response: Generated response text
Returns:
List of extracted filenames from citations
"""
import re
# Pattern to match [Source: filename.md] format
citation_pattern = r"\[Source:\s*([^\]]+)\]"
matches = re.findall(citation_pattern, response)
# Clean up filenames
citations = []
for match in matches:
filename = match.strip()
if filename and filename not in citations:
citations.append(filename)
return citations
@staticmethod
def validate_citations(response: str, available_sources: List[str]) -> Dict[str, bool]:
"""
Validate that all citations in response refer to available sources.
Args:
response: Generated response text
available_sources: List of available source filenames
Returns:
Dictionary mapping citations to their validity
"""
citations = PromptTemplates.extract_citations(response)
validation = {}
for citation in citations:
# Check if citation matches any available source
valid = any(citation in source or source in citation for source in available_sources)
validation[citation] = valid
return validation
@staticmethod
def add_fallback_citations(response: str, search_results: List[Dict]) -> str:
"""
Add citations to response if none were provided by LLM.
Args:
response: Generated response text
search_results: Original search results used for context
Returns:
Response with added citations if needed
"""
existing_citations = PromptTemplates.extract_citations(response)
if existing_citations:
return response # Already has citations
if not search_results:
return response # No sources to cite
# Add citations from top search results
top_sources = []
for result in search_results[:3]: # Top 3 sources
filename = result.get("metadata", {}).get("filename", "")
if filename and filename not in top_sources:
top_sources.append(filename)
if top_sources:
citation_text = " [Sources: " + ", ".join(top_sources) + "]"
return response + citation_text
return response