Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

msse-ai-engineering / src /llm /prompt_templates.py

sethmcknight

Refactor test cases for improved readability and consistency

159faf0 about 2 months ago

7.4 kB

	"""
	Prompt Templates for Corporate Policy Q&A

	This module contains predefined prompt templates optimized for
	corporate policy question-answering with proper citation requirements.
	"""

	from dataclasses import dataclass
	from typing import Dict, List


	@dataclass
	class PromptTemplate:
	"""Template for generating prompts with context and citations."""

	system_prompt: str
	user_template: str
	citation_format: str


	class PromptTemplates:
	"""
	Collection of prompt templates for different types of policy questions.

	Templates are designed to ensure:
	- Accurate responses based on provided context
	- Proper citation of source documents
	- Adherence to corporate policy scope
	- Consistent formatting and tone
	"""

	# System prompt for corporate policy assistant
	SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents. # noqa: E501

	IMPORTANT GUIDELINES:
	1. Answer questions using ONLY the information provided in the context
	2. If the context doesn't contain enough information to answer the question, say so explicitly # noqa: E501
	3. Always cite your sources using the format: [Source: filename.md]
	4. Be accurate, concise, and professional
	5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department # noqa: E501
	6. Do not make assumptions or provide information not explicitly stated in the context # noqa: E501

	Your responses should be helpful while staying strictly within the scope of the provided corporate policies."""

	@classmethod
	def get_policy_qa_template(cls) -> PromptTemplate:
	"""
	Get the standard template for policy question-answering.

	Returns:
	PromptTemplate configured for corporate policy Q&A
	"""
	return PromptTemplate(
	system_prompt=cls.SYSTEM_PROMPT,
	user_template="""Based on the following corporate policy documents, please answer this question: {question} # noqa: E501

	CONTEXT DOCUMENTS:
	{context}

	Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""", # noqa: E501
	citation_format="[Source: {filename}]",
	)

	@classmethod
	def get_clarification_template(cls) -> PromptTemplate:
	"""
	Get template for when clarification is needed.

	Returns:
	PromptTemplate for clarification requests
	"""
	return PromptTemplate(
	system_prompt=cls.SYSTEM_PROMPT,
	user_template="""The user asked: {question}

	CONTEXT DOCUMENTS:
	{context}

	The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that: # noqa: E501
	1. Acknowledges what information is available (if any)
	2. Clearly states what information is missing
	3. Suggests appropriate next steps (contact HR, check other resources, etc.)
	4. Cites any relevant sources using [Source: filename.md] format""",
	citation_format="[Source: {filename}]",
	)

	@classmethod
	def get_off_topic_template(cls) -> PromptTemplate:
	"""
	Get template for off-topic questions.

	Returns:
	PromptTemplate for redirecting off-topic questions
	"""
	return PromptTemplate(
	system_prompt=cls.SYSTEM_PROMPT,
	user_template="""The user asked: {question}

	This question appears to be outside the scope of our corporate policies. Please provide a polite response that: # noqa: E501
	1. Acknowledges the question
	2. Explains that this falls outside corporate policy documentation
	3. Suggests appropriate resources (HR, IT, management, etc.)
	4. Offers to help with any policy-related questions instead""",
	citation_format="",
	)

	@staticmethod
	def format_context(search_results: List[Dict]) -> str:
	"""
	Format search results into context for the prompt.

	Args:
	search_results: List of search results from SearchService

	Returns:
	Formatted context string for the prompt
	"""
	if not search_results:
	return "No relevant policy documents found."

	context_parts = []
	for i, result in enumerate(search_results[:5], 1): # Limit to top 5 results
	filename = result.get("metadata", {}).get("filename", "unknown")
	content = result.get("content", "").strip()
	similarity = result.get("similarity_score", 0.0)

	context_parts.append(f"Document {i}: {filename} (relevance: {similarity:.2f})\n" f"Content: {content}\n")

	return "\n---\n".join(context_parts)

	@staticmethod
	def extract_citations(response: str) -> List[str]:
	"""
	Extract citations from LLM response.

	Args:
	response: Generated response text

	Returns:
	List of extracted filenames from citations
	"""
	import re

	# Pattern to match [Source: filename.md] format
	citation_pattern = r"\[Source:\s*([^\]]+)\]"
	matches = re.findall(citation_pattern, response)

	# Clean up filenames
	citations = []
	for match in matches:
	filename = match.strip()
	if filename and filename not in citations:
	citations.append(filename)

	return citations

	@staticmethod
	def validate_citations(response: str, available_sources: List[str]) -> Dict[str, bool]:
	"""
	Validate that all citations in response refer to available sources.

	Args:
	response: Generated response text
	available_sources: List of available source filenames

	Returns:
	Dictionary mapping citations to their validity
	"""
	citations = PromptTemplates.extract_citations(response)
	validation = {}

	for citation in citations:
	# Check if citation matches any available source
	valid = any(citation in source or source in citation for source in available_sources)
	validation[citation] = valid

	return validation

	@staticmethod
	def add_fallback_citations(response: str, search_results: List[Dict]) -> str:
	"""
	Add citations to response if none were provided by LLM.

	Args:
	response: Generated response text
	search_results: Original search results used for context

	Returns:
	Response with added citations if needed
	"""
	existing_citations = PromptTemplates.extract_citations(response)

	if existing_citations:
	return response # Already has citations

	if not search_results:
	return response # No sources to cite

	# Add citations from top search results
	top_sources = []
	for result in search_results[:3]: # Top 3 sources
	filename = result.get("metadata", {}).get("filename", "")
	if filename and filename not in top_sources:
	top_sources.append(filename)

	if top_sources:
	citation_text = " [Sources: " + ", ".join(top_sources) + "]"
	return response + citation_text

	return response