File size: 7,398 Bytes
c280a92
 
 
508a7e5
c280a92
 
 
 
508a7e5
c280a92
 
 
 
 
508a7e5
c280a92
 
 
 
 
 
 
 
508a7e5
c280a92
 
 
 
 
 
 
 
508a7e5
c280a92
 
 
508a7e5
c280a92
 
508a7e5
 
c280a92
 
 
 
 
 
 
508a7e5
c280a92
 
 
 
 
508a7e5
c280a92
 
 
 
508a7e5
 
c280a92
 
 
 
 
 
508a7e5
c280a92
 
 
 
 
 
 
 
 
 
508a7e5
c280a92
 
 
 
508a7e5
c280a92
 
 
 
 
 
508a7e5
c280a92
 
 
 
 
 
 
508a7e5
c280a92
 
 
 
508a7e5
c280a92
 
 
 
 
 
508a7e5
c280a92
 
508a7e5
c280a92
 
 
 
 
508a7e5
c280a92
 
 
 
 
508a7e5
159faf0
508a7e5
c280a92
 
 
 
 
 
508a7e5
c280a92
 
508a7e5
c280a92
 
 
 
508a7e5
c280a92
508a7e5
c280a92
508a7e5
c280a92
 
 
 
 
 
508a7e5
c280a92
 
 
159faf0
c280a92
 
508a7e5
c280a92
 
 
508a7e5
c280a92
 
 
 
 
508a7e5
c280a92
 
159faf0
c280a92
508a7e5
c280a92
 
 
508a7e5
c280a92
 
508a7e5
c280a92
 
 
508a7e5
c280a92
 
 
 
508a7e5
c280a92
 
508a7e5
c280a92
 
508a7e5
c280a92
 
 
 
 
 
508a7e5
c280a92
 
 
508a7e5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
Prompt Templates for Corporate Policy Q&A

This module contains predefined prompt templates optimized for
corporate policy question-answering with proper citation requirements.
"""

from dataclasses import dataclass
from typing import Dict, List


@dataclass
class PromptTemplate:
    """Template for generating prompts with context and citations."""

    system_prompt: str
    user_template: str
    citation_format: str


class PromptTemplates:
    """
    Collection of prompt templates for different types of policy questions.

    Templates are designed to ensure:
    - Accurate responses based on provided context
    - Proper citation of source documents
    - Adherence to corporate policy scope
    - Consistent formatting and tone
    """

    # System prompt for corporate policy assistant
    SYSTEM_PROMPT = """You are a helpful corporate policy assistant. Your job is to answer questions about company policies based ONLY on the provided context documents.  # noqa: E501

IMPORTANT GUIDELINES:
1. Answer questions using ONLY the information provided in the context
2. If the context doesn't contain enough information to answer the question, say so explicitly  # noqa: E501
3. Always cite your sources using the format: [Source: filename.md]
4. Be accurate, concise, and professional
5. If asked about topics not covered in the policies, politely redirect to HR or appropriate department  # noqa: E501
6. Do not make assumptions or provide information not explicitly stated in the context  # noqa: E501

Your responses should be helpful while staying strictly within the scope of the provided corporate policies."""

    @classmethod
    def get_policy_qa_template(cls) -> PromptTemplate:
        """
        Get the standard template for policy question-answering.

        Returns:
            PromptTemplate configured for corporate policy Q&A
        """
        return PromptTemplate(
            system_prompt=cls.SYSTEM_PROMPT,
            user_template="""Based on the following corporate policy documents, please answer this question: {question}  # noqa: E501

CONTEXT DOCUMENTS:
{context}

Please provide a clear, accurate answer based on the information above. Include citations for all information using the format [Source: filename.md].""",  # noqa: E501
            citation_format="[Source: {filename}]",
        )

    @classmethod
    def get_clarification_template(cls) -> PromptTemplate:
        """
        Get template for when clarification is needed.

        Returns:
            PromptTemplate for clarification requests
        """
        return PromptTemplate(
            system_prompt=cls.SYSTEM_PROMPT,
            user_template="""The user asked: {question}

CONTEXT DOCUMENTS:
{context}

The provided context documents don't contain sufficient information to fully answer this question. Please provide a helpful response that:  # noqa: E501
1. Acknowledges what information is available (if any)
2. Clearly states what information is missing
3. Suggests appropriate next steps (contact HR, check other resources, etc.)
4. Cites any relevant sources using [Source: filename.md] format""",
            citation_format="[Source: {filename}]",
        )

    @classmethod
    def get_off_topic_template(cls) -> PromptTemplate:
        """
        Get template for off-topic questions.

        Returns:
            PromptTemplate for redirecting off-topic questions
        """
        return PromptTemplate(
            system_prompt=cls.SYSTEM_PROMPT,
            user_template="""The user asked: {question}

This question appears to be outside the scope of our corporate policies. Please provide a polite response that:  # noqa: E501
1. Acknowledges the question
2. Explains that this falls outside corporate policy documentation
3. Suggests appropriate resources (HR, IT, management, etc.)
4. Offers to help with any policy-related questions instead""",
            citation_format="",
        )

    @staticmethod
    def format_context(search_results: List[Dict]) -> str:
        """
        Format search results into context for the prompt.

        Args:
            search_results: List of search results from SearchService

        Returns:
            Formatted context string for the prompt
        """
        if not search_results:
            return "No relevant policy documents found."

        context_parts = []
        for i, result in enumerate(search_results[:5], 1):  # Limit to top 5 results
            filename = result.get("metadata", {}).get("filename", "unknown")
            content = result.get("content", "").strip()
            similarity = result.get("similarity_score", 0.0)

            context_parts.append(f"Document {i}: {filename} (relevance: {similarity:.2f})\n" f"Content: {content}\n")

        return "\n---\n".join(context_parts)

    @staticmethod
    def extract_citations(response: str) -> List[str]:
        """
        Extract citations from LLM response.

        Args:
            response: Generated response text

        Returns:
            List of extracted filenames from citations
        """
        import re

        # Pattern to match [Source: filename.md] format
        citation_pattern = r"\[Source:\s*([^\]]+)\]"
        matches = re.findall(citation_pattern, response)

        # Clean up filenames
        citations = []
        for match in matches:
            filename = match.strip()
            if filename and filename not in citations:
                citations.append(filename)

        return citations

    @staticmethod
    def validate_citations(response: str, available_sources: List[str]) -> Dict[str, bool]:
        """
        Validate that all citations in response refer to available sources.

        Args:
            response: Generated response text
            available_sources: List of available source filenames

        Returns:
            Dictionary mapping citations to their validity
        """
        citations = PromptTemplates.extract_citations(response)
        validation = {}

        for citation in citations:
            # Check if citation matches any available source
            valid = any(citation in source or source in citation for source in available_sources)
            validation[citation] = valid

        return validation

    @staticmethod
    def add_fallback_citations(response: str, search_results: List[Dict]) -> str:
        """
        Add citations to response if none were provided by LLM.

        Args:
            response: Generated response text
            search_results: Original search results used for context

        Returns:
            Response with added citations if needed
        """
        existing_citations = PromptTemplates.extract_citations(response)

        if existing_citations:
            return response  # Already has citations

        if not search_results:
            return response  # No sources to cite

        # Add citations from top search results
        top_sources = []
        for result in search_results[:3]:  # Top 3 sources
            filename = result.get("metadata", {}).get("filename", "")
            if filename and filename not in top_sources:
                top_sources.append(filename)

        if top_sources:
            citation_text = " [Sources: " + ", ".join(top_sources) + "]"
            return response + citation_text

        return response