Spaces:
Sleeping
Sleeping
| """ | |
| Prompt Templates for Corporate Policy Q&A | |
| This module contains predefined prompt templates optimized for | |
| corporate policy question-answering with proper citation requirements. | |
| """ | |
| from dataclasses import dataclass | |
| from typing import Dict, List | |
| class PromptTemplate: | |
| """Template for generating prompts with context and citations.""" | |
| system_prompt: str | |
| user_template: str | |
| citation_format: str | |
| class PromptTemplates: | |
| """ | |
| Collection of prompt templates for different types of policy questions. | |
| Templates are designed to ensure: | |
| - Accurate responses based on provided context | |
| - Proper citation of source documents | |
| - Adherence to corporate policy scope | |
| - Consistent formatting and tone | |
| """ | |
| # System prompt for corporate policy assistant | |
| SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents. # noqa: E501 | |
| IMPORTANT GUIDELINES: | |
| 1. Answer questions using ONLY the information provided in the context | |
| 2. If the context doesn't contain enough information to answer the question, say so explicitly # noqa: E501 | |
| 3. Always cite your sources using the format: [Source: filename.md] | |
| 4. Be accurate, concise, and professional | |
| 5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department # noqa: E501 | |
| 6. Do not make assumptions or provide information not explicitly stated in the context # noqa: E501 | |
| Your responses should be helpful while staying strictly within the scope of the provided corporate policies.""" | |
| def get_policy_qa_template(cls) -> PromptTemplate: | |
| """ | |
| Get the standard template for policy question-answering. | |
| Returns: | |
| PromptTemplate configured for corporate policy Q&A | |
| """ | |
| return PromptTemplate( | |
| system_prompt=cls.SYSTEM_PROMPT, | |
| user_template="""Based on the following corporate policy documents, please answer this question: {question} # noqa: E501 | |
| CONTEXT DOCUMENTS: | |
| {context} | |
| Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""", # noqa: E501 | |
| citation_format="[Source: {filename}]", | |
| ) | |
| def get_clarification_template(cls) -> PromptTemplate: | |
| """ | |
| Get template for when clarification is needed. | |
| Returns: | |
| PromptTemplate for clarification requests | |
| """ | |
| return PromptTemplate( | |
| system_prompt=cls.SYSTEM_PROMPT, | |
| user_template="""The user asked: {question} | |
| CONTEXT DOCUMENTS: | |
| {context} | |
| The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that: # noqa: E501 | |
| 1. Acknowledges what information is available (if any) | |
| 2. Clearly states what information is missing | |
| 3. Suggests appropriate next steps (contact HR, check other resources, etc.) | |
| 4. Cites any relevant sources using [Source: filename.md] format""", | |
| citation_format="[Source: {filename}]", | |
| ) | |
| def get_off_topic_template(cls) -> PromptTemplate: | |
| """ | |
| Get template for off-topic questions. | |
| Returns: | |
| PromptTemplate for redirecting off-topic questions | |
| """ | |
| return PromptTemplate( | |
| system_prompt=cls.SYSTEM_PROMPT, | |
| user_template="""The user asked: {question} | |
| This question appears to be outside the scope of our corporate policies. Please provide a polite response that: # noqa: E501 | |
| 1. Acknowledges the question | |
| 2. Explains that this falls outside corporate policy documentation | |
| 3. Suggests appropriate resources (HR, IT, management, etc.) | |
| 4. Offers to help with any policy-related questions instead""", | |
| citation_format="", | |
| ) | |
| def format_context(search_results: List[Dict]) -> str: | |
| """ | |
| Format search results into context for the prompt. | |
| Args: | |
| search_results: List of search results from SearchService | |
| Returns: | |
| Formatted context string for the prompt | |
| """ | |
| if not search_results: | |
| return "No relevant policy documents found." | |
| context_parts = [] | |
| for i, result in enumerate(search_results[:5], 1): # Limit to top 5 results | |
| filename = result.get("metadata", {}).get("filename", "unknown") | |
| content = result.get("content", "").strip() | |
| similarity = result.get("similarity_score", 0.0) | |
| context_parts.append(f"Document {i}: {filename} (relevance: {similarity:.2f})\n" f"Content: {content}\n") | |
| return "\n---\n".join(context_parts) | |
| def extract_citations(response: str) -> List[str]: | |
| """ | |
| Extract citations from LLM response. | |
| Args: | |
| response: Generated response text | |
| Returns: | |
| List of extracted filenames from citations | |
| """ | |
| import re | |
| # Pattern to match [Source: filename.md] format | |
| citation_pattern = r"\[Source:\s*([^\]]+)\]" | |
| matches = re.findall(citation_pattern, response) | |
| # Clean up filenames | |
| citations = [] | |
| for match in matches: | |
| filename = match.strip() | |
| if filename and filename not in citations: | |
| citations.append(filename) | |
| return citations | |
| def validate_citations(response: str, available_sources: List[str]) -> Dict[str, bool]: | |
| """ | |
| Validate that all citations in response refer to available sources. | |
| Args: | |
| response: Generated response text | |
| available_sources: List of available source filenames | |
| Returns: | |
| Dictionary mapping citations to their validity | |
| """ | |
| citations = PromptTemplates.extract_citations(response) | |
| validation = {} | |
| for citation in citations: | |
| # Check if citation matches any available source | |
| valid = any(citation in source or source in citation for source in available_sources) | |
| validation[citation] = valid | |
| return validation | |
| def add_fallback_citations(response: str, search_results: List[Dict]) -> str: | |
| """ | |
| Add citations to response if none were provided by LLM. | |
| Args: | |
| response: Generated response text | |
| search_results: Original search results used for context | |
| Returns: | |
| Response with added citations if needed | |
| """ | |
| existing_citations = PromptTemplates.extract_citations(response) | |
| if existing_citations: | |
| return response # Already has citations | |
| if not search_results: | |
| return response # No sources to cite | |
| # Add citations from top search results | |
| top_sources = [] | |
| for result in search_results[:3]: # Top 3 sources | |
| filename = result.get("metadata", {}).get("filename", "") | |
| if filename and filename not in top_sources: | |
| top_sources.append(filename) | |
| if top_sources: | |
| citation_text = " [Sources: " + ", ".join(top_sources) + "]" | |
| return response + citation_text | |
| return response | |