Spaces:

InstaDeepAI
/

sentinel

Runtime error

App Files Files Community

jeuko commited on Oct 23

Commit

94a0f4c

verified ·

1 Parent(s): 96dba57

Sync from GitHub (main)

Browse files

Files changed (24) hide show

configs/output_format/assessment.yaml +4 -4
prompts/instruction/assessment.md +1 -1
scripts/benchmark_llm.py +695 -0
src/sentinel/conversation.py +6 -2
src/sentinel/models.py +14 -0
src/sentinel/probability_aggregation.py +230 -0
src/sentinel/reporting.py +393 -129
src/sentinel/risk_aggregation.py +131 -0
src/sentinel/risk_models/__init__.py +3 -0
src/sentinel/risk_models/base.py +36 -1
src/sentinel/risk_models/boadicea.py +8 -0
src/sentinel/risk_models/claus.py +8 -0
src/sentinel/risk_models/crc_pro.py +12 -3
src/sentinel/risk_models/extended_pbcg.py +8 -0
src/sentinel/risk_models/gail.py +9 -1
src/sentinel/risk_models/llpi.py +8 -0
src/sentinel/risk_models/mrat.py +8 -0
src/sentinel/risk_models/pcpt.py +8 -0
src/sentinel/risk_models/plcom2012.py +8 -0
src/sentinel/risk_models/prostate_mortality.py +8 -0
src/sentinel/risk_models/qcancer.py +14 -0
src/sentinel/risk_models/tyrer_cuzick.py +8 -0
tests/test_probability_aggregation.py +477 -0
tests/test_risk_models/test_gail_model.py +1 -1

configs/output_format/assessment.yaml CHANGED Viewed

@@ -3,7 +3,7 @@ format_instructions: |
   - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
   - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
   - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
-  - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}.  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
   - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
   The output must be formatted as a valid JSON instance with the following structure:
@@ -13,7 +13,7 @@ format_instructions: |
     "identified_risk_factors": [
       {{
         "description": "string - A human-readable description of the risk factor identified from the user's profile.",
-        "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other).  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
       }}
     ],
     "llm_risk_interpretations": [
@@ -25,8 +25,8 @@ format_instructions: |
         "contributing_factors": [
           {{
             "description": "string - A human-readable description of the risk factor",
-            "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other).  You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. ",
-            "strength": "string - The assessed contribution strength (Major, Moderate, Minor)"
           }}
         ]
       }}

   - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
   - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
   - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
+  - **STRICT CATEGORY REQUIREMENT**: For ALL "category" fields in "identified_risk_factors" and "contributing_factors", you MUST use EXACTLY one of these values: {allowed_categories}. DO NOT create new categories like "Symptom", "Dermatologic", or any other value not in this list. Map symptoms to "Clinical Observation", dermatologic factors to "Lifestyle" or "Demographics" as appropriate, and use "Other" ONLY as a last resort when no other category fits.
   - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
   The output must be formatted as a valid JSON instance with the following structure:
     "identified_risk_factors": [
       {{
         "description": "string - A human-readable description of the risk factor identified from the user's profile.",
+        "category": "string - MUST be EXACTLY one of: Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, or Other. DO NOT use any other values like 'Symptom' or 'Dermatologic'. Map symptoms to 'Clinical Observation'. Use 'Other' only as last resort."
       }}
     ],
     "llm_risk_interpretations": [
         "contributing_factors": [
           {{
             "description": "string - A human-readable description of the risk factor",
+            "category": "string - MUST be EXACTLY one of: Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, or Other. DO NOT use any other values like 'Symptom' or 'Dermatologic'. Map symptoms to 'Clinical Observation'. Use 'Other' only as last resort.",
+            "strength": "string - MUST be exactly one of: Major, Moderate, or Minor"
           }}
         ]
       }}

prompts/instruction/assessment.md CHANGED Viewed

@@ -18,6 +18,6 @@ Your role is to:
 6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
-7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
 **Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.

 6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
+7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly. **CRITICAL**: When categorizing risk factors, you MUST use ONLY the exact category names provided in the FORMAT INSTRUCTIONS. Do NOT invent new categories like "Symptom", "Dermatologic", or any other value. If you encounter symptoms, categorize them as "Clinical Observation". If unsure, use "Other" rather than creating a new category.
 **Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.

scripts/benchmark_llm.py ADDED Viewed

	@@ -0,0 +1,695 @@

+"""LLM Benchmarking Script
+Measures token usage, costs, and timing for cancer risk assessments
+across different LLM backends.
+"""
+import argparse
+import csv
+import functools
+import os
+import time
+from collections import defaultdict
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+import requests
+import yaml
+from dotenv import load_dotenv
+from langchain_community.callbacks.manager import get_openai_callback
+from loguru import logger
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.lib.units import inch
+from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
+from sentinel.config import AppConfig, ModelConfig, ResourcePaths
+from sentinel.factory import SentinelFactory
+from sentinel.utils import load_user_file
+load_dotenv()
+@dataclass
+class ModelPricing:
+    """Pricing per 1 million tokens in USD.
+    Attributes:
+        input_per_million: Cost per 1M input tokens (USD)
+        output_per_million: Cost per 1M output tokens (USD)
+    """
+    input_per_million: float
+    output_per_million: float
+@dataclass
+class BenchmarkModelConfig:
+    """Model configuration for benchmarking.
+    Attributes:
+        provider: Provider key (google, openai, local)
+        model_name: Model identifier used by the provider
+        pricing: Pricing information per 1M tokens
+    """
+    provider: str
+    model_name: str
+    pricing: ModelPricing
+# Sources:
+# - https://ai.google.dev/pricing
+# - https://openai.com/api/pricing/
+BENCHMARK_MODELS = [
+    BenchmarkModelConfig(
+        provider="google",
+        model_name="gemini-2.5-pro",
+        pricing=ModelPricing(input_per_million=1.25, output_per_million=10.00),
+    ),
+    BenchmarkModelConfig(
+        provider="google",
+        model_name="gemini-2.5-flash-lite",
+        pricing=ModelPricing(input_per_million=0.1, output_per_million=0.4),
+    ),
+]
+@dataclass
+class TokenUsage:
+    """Token usage statistics for a single assessment.
+    Attributes:
+        input_tokens: Tokens in the prompt/input
+        output_tokens: Tokens in the model's response
+    """
+    input_tokens: int
+    output_tokens: int
+    @property
+    def total_tokens(self) -> int:
+        """Total tokens used.
+        Returns:
+            Sum of input and output tokens
+        """
+        return self.input_tokens + self.output_tokens
+@dataclass
+class BenchmarkResult:
+    """Results from a single model/profile benchmark run.
+    Attributes:
+        model_name: Name of the model
+        provider: Provider key (openai, google, local)
+        profile_name: Name of the profile
+        token_usage: Token usage statistics
+        cost: Cost in USD
+        assessment_time_seconds: Time taken for assessment in seconds
+    """
+    model_name: str
+    provider: str
+    profile_name: str
+    token_usage: TokenUsage
+    cost: float
+    assessment_time_seconds: float
+def calculate_cost(token_usage: TokenUsage, pricing: ModelPricing) -> float:
+    """Calculate cost based on token usage and model pricing.
+    Args:
+        token_usage: Token usage statistics
+        pricing: Model pricing per 1M tokens
+    Returns:
+        Cost in USD
+    """
+    input_cost = (token_usage.input_tokens / 1_000_000) * pricing.input_per_million
+    output_cost = (token_usage.output_tokens / 1_000_000) * pricing.output_per_million
+    return input_cost + output_cost
+def validate_directory_input(func: Callable[..., Any]) -> Callable[..., Any]:
+    """Decorator to validate directory argument.
+    Args:
+        func: Function to decorate
+    Returns:
+        Decorated function that validates directory input
+    """
+    @functools.wraps(func)
+    def wrapper(directory: Path, *args: Any, **kwargs: Any) -> Any:
+        """Wrapper function to validate directory input.
+        Args:
+            directory: Path to directory to validate
+            *args: Additional positional arguments
+            **kwargs: Additional keyword arguments
+        Returns:
+            Result of the wrapped function
+        Raises:
+            FileNotFoundError: If the directory does not exist
+            NotADirectoryError: If the path is not a directory
+            ValueError: If the directory is empty
+        """
+        if not directory.exists():
+            raise FileNotFoundError(f"Directory not found: {directory}")
+        if not directory.is_dir():
+            raise NotADirectoryError(f"Not a directory: {directory}")
+        if not any(directory.iterdir()):
+            raise ValueError(f"Directory is empty: {directory}")
+        return func(directory, *args, **kwargs)
+    return wrapper
+def get_available_models() -> list[BenchmarkModelConfig]:
+    """Get list of available models for benchmarking.
+    Returns:
+        List of configured benchmark models
+    """
+    return BENCHMARK_MODELS
+@validate_directory_input
+def load_benchmark_profiles(benchmark_dir: Path) -> list[dict[str, Any]]:
+    """Load benchmark profiles.
+    Args:
+        benchmark_dir: Directory containing benchmark YAML files
+    Returns:
+        List of dicts with 'name' and 'path' keys
+    """
+    profiles = []
+    for yaml_file in sorted(benchmark_dir.glob("*.yaml")):
+        profiles.append({"name": yaml_file.stem, "path": yaml_file})
+    return profiles
+def create_knowledge_base_paths(workspace_root: Path) -> ResourcePaths:
+    """Build resource path configuration from workspace root.
+    Args:
+        workspace_root: Path to workspace root directory
+    Returns:
+        ResourcePaths configuration object
+    """
+    return ResourcePaths(
+        persona=workspace_root / "prompts/persona/default.md",
+        instruction_assessment=workspace_root / "prompts/instruction/assessment.md",
+        instruction_conversation=workspace_root / "prompts/instruction/conversation.md",
+        output_format_assessment=workspace_root
+        / "configs/output_format/assessment.yaml",
+        output_format_conversation=workspace_root
+        / "configs/output_format/conversation.yaml",
+        cancer_modules_dir=workspace_root / "configs/knowledge_base/cancer_modules",
+        dx_protocols_dir=workspace_root / "configs/knowledge_base/dx_protocols",
+    )
+def validate_backend(provider: str, model_name: str) -> None:
+    """Validate that backend is accessible.
+    Args:
+        provider: Provider key (e.g. "openai", "google", "local")
+        model_name: Model identifier
+    Raises:
+        ValueError: If the backend is not accessible
+    """
+    if provider == "openai":
+        if not os.getenv("OPENAI_API_KEY"):
+            raise ValueError("OPENAI_API_KEY not set")
+    elif provider == "google":
+        if not os.getenv("GOOGLE_API_KEY"):
+            raise ValueError("GOOGLE_API_KEY not set")
+    elif provider == "local":
+        ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+        response = requests.get(f"{ollama_base_url}/api/tags", timeout=2)
+        if response.status_code != 200:
+            raise ValueError("Ollama server not responding")
+        models = response.json().get("models", [])
+        model_names = [m.get("name") for m in models]
+        if model_name not in model_names:
+            raise ValueError(f"Model not found. Run: ollama pull {model_name}")
+def run_assessment(
+    model_config: BenchmarkModelConfig, profile_path: Path
+) -> BenchmarkResult:
+    """Run a single assessment and capture token usage.
+    Args:
+        model_config: Model configuration with pricing
+        profile_path: Path to profile YAML file
+    Returns:
+        BenchmarkResult with cost and token usage
+    """
+    validate_backend(model_config.provider, model_config.model_name)
+    workspace_root = Path(__file__).parent.parent
+    with open(workspace_root / "configs/config.yaml") as f:
+        default_config = yaml.safe_load(f)
+    app_config = AppConfig(
+        model=ModelConfig(
+            provider=model_config.provider,
+            model_name=model_config.model_name,
+        ),
+        knowledge_base_paths=create_knowledge_base_paths(workspace_root),
+        selected_cancer_modules=default_config["knowledge_base"]["cancer_modules"],
+        selected_dx_protocols=default_config["knowledge_base"]["dx_protocols"],
+    )
+    factory = SentinelFactory(app_config)
+    conversation = factory.create_conversation_manager()
+    user = load_user_file(str(profile_path))
+    start_time = time.perf_counter()
+    with get_openai_callback() as cb:
+        conversation.initial_assessment(user)
+        input_tokens = cb.prompt_tokens
+        output_tokens = cb.completion_tokens
+    end_time = time.perf_counter()
+    assessment_time = end_time - start_time
+    token_usage = TokenUsage(input_tokens, output_tokens)
+    cost = calculate_cost(token_usage, model_config.pricing)
+    return BenchmarkResult(
+        model_name=model_config.model_name,
+        provider=model_config.provider,
+        profile_name=profile_path.stem,
+        token_usage=token_usage,
+        cost=cost,
+        assessment_time_seconds=assessment_time,
+    )
+def print_results(results: list[BenchmarkResult]) -> None:
+    """Print formatted results to console.
+    Args:
+        results: List of benchmark results
+    """
+    by_model = defaultdict(list)
+    for result in results:
+        by_model[result.model_name].append(result)
+    lines = []
+    lines.append("\n╔══════════════════════════════════════════════════════════════╗")
+    lines.append("║              LLM Cost Benchmark Results                      ║")
+    lines.append("╚═══════════════════════════════════���══════════════════════════╝\n")
+    for model_name, model_results in sorted(by_model.items()):
+        provider = model_results[0].provider
+        lines.append(f"Model: {model_name} ({provider})")
+        num_results = len(model_results)
+        avg_cost = sum(result.cost for result in model_results) / num_results
+        avg_input = (
+            sum(result.token_usage.input_tokens for result in model_results)
+            / num_results
+        )
+        avg_output = (
+            sum(result.token_usage.output_tokens for result in model_results)
+            / num_results
+        )
+        avg_time = (
+            sum(result.assessment_time_seconds for result in model_results)
+            / num_results
+        )
+        for result_index, result in enumerate(model_results):
+            is_last = result_index == num_results - 1
+            prefix = "└─" if is_last else "├─"
+            indent = "   " if is_last else "│  "
+            lines.append(f"{prefix} Profile: {result.profile_name}")
+            lines.append(f"{indent}├─ Input:  {result.token_usage.input_tokens:,}")
+            lines.append(f"{indent}├─ Output: {result.token_usage.output_tokens:,}")
+            lines.append(f"{indent}├─ Cost: ${result.cost:.4f}")
+            lines.append(f"{indent}└─ Time: {result.assessment_time_seconds:.2f}s")
+        lines.append(f"└─ Average: ${avg_cost:.4f}")
+        lines.append(f"   ├─ Tokens: {avg_input:,.0f} input, {avg_output:,.0f} output")
+        lines.append(f"   └─ Time: {avg_time:.2f}s\n")
+    lines.append("═══════════════════════════════════════════════════════════════")
+    lines.append("Summary - Model Ranking (Cheapest to Most Expensive)")
+    lines.append("───────────────────────────────────────────────────────────────")
+    model_averages = sorted(
+        (
+            (
+                model_name,
+                sum(result.cost for result in model_results) / len(model_results),
+            )
+            for model_name, model_results in by_model.items()
+        ),
+        key=lambda model_avg_tuple: model_avg_tuple[1],
+    )
+    for rank, (model_name, avg_cost) in enumerate(model_averages, 1):
+        prefix = (
+            "🥇"
+            if rank == 1
+            else "🥈"
+            if rank == 2
+            else "🥉"
+            if rank == 3
+            else f"{rank}."
+        )
+        lines.append(f"{prefix:<4} {model_name:<25} ${avg_cost:.4f}")
+    lines.append("\n═══════════════════════════════════════════════════════════════")
+    lines.append("Summary - Timing Performance (Fastest to Slowest)")
+    lines.append("───────────────────────────────────────────────────────────────")
+    model_timing = sorted(
+        (
+            (
+                model_name,
+                sum(result.assessment_time_seconds for result in model_results)
+                / len(model_results),
+            )
+            for model_name, model_results in by_model.items()
+        ),
+        key=lambda model_time_tuple: model_time_tuple[1],
+    )
+    for rank, (model_name, avg_time) in enumerate(model_timing, 1):
+        prefix = (
+            "🥇"
+            if rank == 1
+            else "🥈"
+            if rank == 2
+            else "🥉"
+            if rank == 3
+            else f"{rank}."
+        )
+        lines.append(f"{prefix:<4} {model_name:<25} {avg_time:.2f}s")
+    lines.append(f"\nTotal: {len(results)} assessments across {len(by_model)} models")
+    lines.append("═══════════════════════════════════════════════════════════════\n")
+    logger.info("\n".join(lines))
+def export_to_csv(results: list[BenchmarkResult], output_path: Path) -> None:
+    """Export results to CSV file.
+    Args:
+        results: List of benchmark results
+        output_path: Path to output CSV file
+    """
+    with open(output_path, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(
+            [
+                "model_name",
+                "provider",
+                "profile_name",
+                "input_tokens",
+                "output_tokens",
+                "total_tokens",
+                "cost_usd",
+                "assessment_time_seconds",
+            ]
+        )
+        for result in results:
+            writer.writerow(
+                [
+                    result.model_name,
+                    result.provider,
+                    result.profile_name,
+                    result.token_usage.input_tokens,
+                    result.token_usage.output_tokens,
+                    result.token_usage.total_tokens,
+                    f"{result.cost:.6f}",
+                    f"{result.assessment_time_seconds:.3f}",
+                ]
+            )
+    logger.success(f"Results exported to: {output_path}")
+def export_to_pdf(
+    results: list[BenchmarkResult],
+    output_path: Path,
+) -> None:
+    """Export results to PDF file with formatted table.
+    Args:
+        results: List of benchmark results
+        output_path: Path to output PDF file
+    """
+    doc = SimpleDocTemplate(
+        str(output_path),
+        pagesize=letter,
+        leftMargin=0.75 * inch,
+        rightMargin=0.75 * inch,
+        topMargin=0.75 * inch,
+        bottomMargin=0.75 * inch,
+    )
+    elements = []
+    styles = getSampleStyleSheet()
+    title = Paragraph(
+        "<b>LLM Benchmark Report</b>",
+        styles["Title"],
+    )
+    elements.append(title)
+    elements.append(Spacer(1, 0.2 * inch))
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    timestamp_text = Paragraph(
+        f"Generated: {timestamp}",
+        styles["Normal"],
+    )
+    elements.append(timestamp_text)
+    elements.append(Spacer(1, 0.3 * inch))
+    by_model = defaultdict(list)
+    for result in results:
+        by_model[result.model_name].append(result)
+    pricing_lookup = {model.model_name: model.pricing for model in BENCHMARK_MODELS}
+    results_desc = Paragraph(
+        "Average cost and timing for running a single cancer risk assessment given a completed patient questionnaire.",
+        styles["Normal"],
+    )
+    elements.append(results_desc)
+    elements.append(Spacer(1, 0.2 * inch))
+    table_data = [
+        [
+            "Model",
+            "Provider",
+            "Avg Cost\nper Report",
+            "Input Price\n(per 1M)",
+            "Output Price\n(per 1M)",
+            "Avg Input\nTokens",
+            "Avg Output\nTokens",
+            "Avg Time\n(seconds)",
+        ]
+    ]
+    # Sort by average cost (cheapest first)
+    sorted_models = sorted(
+        by_model.items(),
+        key=lambda model_tuple: sum(result.cost for result in model_tuple[1])
+        / len(model_tuple[1]),
+    )
+    for model_name, model_results in sorted_models:
+        provider = model_results[0].provider
+        num_results = len(model_results)
+        avg_cost = sum(result.cost for result in model_results) / num_results
+        avg_input = (
+            sum(result.token_usage.input_tokens for result in model_results)
+            / num_results
+        )
+        avg_output = (
+            sum(result.token_usage.output_tokens for result in model_results)
+            / num_results
+        )
+        avg_time = (
+            sum(result.assessment_time_seconds for result in model_results)
+            / num_results
+        )
+        pricing = pricing_lookup.get(model_name)
+        input_price = f"${pricing.input_per_million:.2f}" if pricing else "N/A"
+        output_price = f"${pricing.output_per_million:.2f}" if pricing else "N/A"
+        table_data.append(
+            [
+                model_name,
+                provider,
+                f"${avg_cost:.4f}",
+                input_price,
+                output_price,
+                f"{avg_input:,.0f}",
+                f"{avg_output:,.0f}",
+                f"{avg_time:.1f}",
+            ]
+        )
+    table = Table(
+        table_data,
+        colWidths=[
+            1.4 * inch,
+            0.75 * inch,
+            0.8 * inch,
+            0.75 * inch,
+            0.75 * inch,
+            0.7 * inch,
+            0.7 * inch,
+            0.65 * inch,
+        ],
+    )
+    table_style = TableStyle(
+        [
+            # Header styling
+            ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4A90E2")),
+            ("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
+            ("ALIGN", (0, 0), (-1, 0), "CENTER"),
+            ("VALIGN", (0, 0), (-1, 0), "MIDDLE"),
+            ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, 0), 8),
+            ("BOTTOMPADDING", (0, 0), (-1, 0), 10),
+            ("TOPPADDING", (0, 0), (-1, 0), 10),
+            # Data rows styling
+            ("BACKGROUND", (0, 1), (-1, -1), colors.beige),
+            ("TEXTCOLOR", (0, 1), (-1, -1), colors.black),
+            ("ALIGN", (0, 1), (1, -1), "LEFT"),
+            ("ALIGN", (2, 1), (-1, -1), "CENTER"),
+            ("VALIGN", (0, 1), (-1, -1), "MIDDLE"),
+            ("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
+            ("FONTSIZE", (0, 1), (-1, -1), 8),
+            ("TOPPADDING", (0, 1), (-1, -1), 7),
+            ("BOTTOMPADDING", (0, 1), (-1, -1), 7),
+            # Alternating row colors
+            ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.beige, colors.lightgrey]),
+            # Grid
+            ("GRID", (0, 0), (-1, -1), 1, colors.black),
+        ]
+    )
+    table.setStyle(table_style)
+    elements.append(table)
+    elements.append(Spacer(1, 0.3 * inch))
+    doc.build(elements)
+    logger.success(f"PDF report generated: {output_path}")
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments.
+    Returns:
+        Parsed command-line arguments
+    """
+    workspace_root = Path(__file__).parent.parent
+    parser = argparse.ArgumentParser(description="Benchmark LLM costs")
+    parser.add_argument(
+        "--benchmark-dir",
+        type=Path,
+        default=workspace_root / "examples/benchmark",
+        help="Benchmark profile directory",
+    )
+    parser.add_argument(
+        "--models",
+        nargs="+",
+        help="Specific models to test (by name)",
+    )
+    parser.add_argument(
+        "--profiles",
+        nargs="+",
+        help="Specific profiles to test",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        help="Export to CSV",
+    )
+    return parser.parse_args()
+def main() -> None:
+    """Main entry point.
+    Raises:
+        ValueError: If no matching models or profiles found
+    """
+    args = parse_args()
+    logger.info("Loading benchmark configuration...")
+    all_models = get_available_models()
+    logger.info("Loading profiles...")
+    all_profiles = load_benchmark_profiles(args.benchmark_dir)
+    if args.models:
+        all_models = [model for model in all_models if model.model_name in args.models]
+        if not all_models:
+            raise ValueError(f"No matching models: {args.models}")
+    if args.profiles:
+        all_profiles = [
+            profile for profile in all_profiles if profile["name"] in args.profiles
+        ]
+        if not all_profiles:
+            raise ValueError(f"No matching profiles: {args.profiles}")
+    logger.info(
+        f"\nRunning {len(all_models)} model(s) x {len(all_profiles)} profile(s)...\n"
+    )
+    results = []
+    for model_index, model in enumerate(all_models, 1):
+        for profile in all_profiles:
+            logger.info(
+                f"[{model_index}/{len(all_models)}] {model.model_name}: {profile['name']}"
+            )
+            result = run_assessment(model, profile["path"])
+            results.append(result)
+    print_results(results)
+    # Generate PDF report with timestamp
+    workspace_root = Path(__file__).parent.parent
+    outputs_dir = workspace_root / "outputs"
+    outputs_dir.mkdir(exist_ok=True)
+    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    pdf_path = outputs_dir / f"llm_benchmark_{timestamp}.pdf"
+    export_to_pdf(results, pdf_path)
+    if args.output:
+        export_to_csv(results, args.output)
+if __name__ == "__main__":
+    main()

src/sentinel/conversation.py CHANGED Viewed

@@ -8,7 +8,10 @@ from langchain_core.runnables.base import Runnable
 from .llm_service import extract_thinking
 from .models import ConversationResponse, InitialAssessment
-from .risk_aggregation import format_scores_for_llm, group_scores_by_cancer_type
 from .user_input import UserInput
@@ -58,8 +61,9 @@ class ConversationManager:
         if risk_scores is None:
             # Try to get from user if it has risk_scores attribute
             risk_scores = getattr(user, "risk_scores", [])
         grouped_scores = group_scores_by_cancer_type(risk_scores)
-        formatted_scores = format_scores_for_llm(grouped_scores)
         # Invoke LLM with scores as separate context
         result = self.structured_chain.invoke(

 from .llm_service import extract_thinking
 from .models import ConversationResponse, InitialAssessment
+from .risk_aggregation import (
+    format_scores_with_aggregation,
+    group_scores_by_cancer_type,
+)
 from .user_input import UserInput
         if risk_scores is None:
             # Try to get from user if it has risk_scores attribute
             risk_scores = getattr(user, "risk_scores", [])
+        formatted_scores = format_scores_with_aggregation(risk_scores)
         grouped_scores = group_scores_by_cancer_type(risk_scores)
         # Invoke LLM with scores as separate context
         result = self.structured_chain.invoke(

src/sentinel/models.py CHANGED Viewed

@@ -1591,6 +1591,20 @@ class RiskScore(SentinelBaseModel):
     references: list[str] | None = Field(
         default=None, description="References to the risk score"
     )
 # ---------------------------------------------------------------------------

     references: list[str] | None = Field(
         default=None, description="References to the risk score"
     )
+    probability_percent: float | None = Field(
+        default=None,
+        description="Numeric probability as percentage (0-100) if score is probability-based",
+        ge=0,
+        le=100,
+    )
+    time_horizon_years: float | None = Field(
+        default=None,
+        description="Time horizon in years for probability (e.g., 5, 10, lifetime=79)",
+    )
+    score_type: Literal["probability", "categorical", "not_applicable"] = Field(
+        default="probability",
+        description="Type of score output",
+    )
 # ---------------------------------------------------------------------------

src/sentinel/probability_aggregation.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""Probability aggregation utilities for risk scores.
+This module provides functions to aggregate probability-based risk scores by cancer type
+and time horizon, and to separate probability-based scores from categorical/diagnostic scores.
+"""
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from sentinel.models import RiskScore
+def normalize_cancer_type(cancer_type: str) -> str:
+    """Normalize cancer type name for consistent grouping.
+    Removes "cancer" suffix, trims whitespace, and converts to lowercase.
+    Args:
+        cancer_type: Raw cancer type string (e.g., "Breast Cancer", "breast", "BREAST").
+    Returns:
+        Normalized cancer type (e.g., "breast").
+    Examples:
+        >>> normalize_cancer_type("Breast Cancer")
+        'breast'
+        >>> normalize_cancer_type("Lung cancer")
+        'lung'
+        >>> normalize_cancer_type("PROSTATE")
+        'prostate'
+    """
+    if not cancer_type:
+        return ""
+    # Convert to lowercase and strip whitespace
+    normalized = cancer_type.strip().lower()
+    # Remove "cancer" suffix (with or without space before it)
+    normalized = re.sub(r"\s*cancer\s*$", "", normalized)
+    # Clean up any remaining whitespace
+    normalized = normalized.strip()
+    return normalized
+def get_display_cancer_type(cancer_type: str) -> str:
+    """Get display-friendly cancer type name.
+    Capitalizes first letter and removes "cancer" suffix.
+    Args:
+        cancer_type: Normalized cancer type string.
+    Returns:
+        Display-friendly cancer type name.
+    Examples:
+        >>> get_display_cancer_type("breast")
+        'Breast'
+        >>> get_display_cancer_type("lung")
+        'Lung'
+    """
+    return cancer_type.title()
+def categorize_risk(probability_percent: float, time_horizon_years: float) -> str:
+    """Categorize risk level based on probability and time horizon.
+    Args:
+        probability_percent: Probability as percentage (0-100).
+        time_horizon_years: Time horizon in years.
+    Returns:
+        Risk category string.
+    """
+    # For very short horizons (< 10 years), use stricter thresholds
+    if time_horizon_years < 10:
+        if probability_percent < 0.5:
+            return "Very Low"
+        elif probability_percent < 1.5:
+            return "Low"
+        elif probability_percent < 3.0:
+            return "Moderate"
+        elif probability_percent < 5.0:
+            return "Moderately High"
+        else:
+            return "High"
+    # For 10-year and lifetime risks, use higher thresholds
+    else:
+        if probability_percent < 1.0:
+            return "Very Low"
+        elif probability_percent < 3.0:
+            return "Low"
+        elif probability_percent < 7.0:
+            return "Moderate"
+        elif probability_percent < 15.0:
+            return "Moderately High"
+        else:
+            return "High"
+@dataclass
+class AggregatedRisk:
+    """Aggregated risk for a cancer type at a specific time horizon.
+    Attributes:
+        cancer_type: The cancer type being assessed (normalized, lowercase).
+        time_horizon_years: Time horizon in years for the aggregated probability.
+        avg_probability_percent: Average probability across all contributing models.
+        risk_category: Discrete risk category (e.g., "Low", "Moderate", "High").
+        model_count: Number of models that contributed to this aggregation.
+        individual_scores: List of original RiskScore objects that were aggregated.
+    """
+    cancer_type: str
+    time_horizon_years: float
+    avg_probability_percent: float
+    risk_category: str
+    model_count: int
+    individual_scores: list[RiskScore]
+def aggregate_probabilities(scores: list[RiskScore]) -> list[AggregatedRisk]:
+    """Aggregate probability scores by cancer type and time horizon.
+    Groups scores by cancer type and time horizon, then calculates the average
+    probability for each group. Only includes scores with score_type="probability".
+    Args:
+        scores: List of RiskScore objects to aggregate.
+    Returns:
+        List of AggregatedRisk objects, sorted by cancer type then time horizon.
+    Example:
+        >>> scores = [
+        ...     RiskScore(name="Gail", cancer_type="breast", probability_percent=1.5,
+        ...               time_horizon_years=5.0, score_type="probability", ...),
+        ...     RiskScore(name="BOADICEA", cancer_type="breast", probability_percent=2.0,
+        ...               time_horizon_years=10.0, score_type="probability", ...),
+        ... ]
+        >>> aggregated = aggregate_probabilities(scores)
+        >>> len(aggregated)
+        2  # One for 5-year breast, one for 10-year breast
+    """
+    # Group scores by (cancer_type, time_horizon_years)
+    grouped: dict[tuple[str, float], list[RiskScore]] = defaultdict(list)
+    for score in scores:
+        # Skip non-probability scores
+        if score.score_type != "probability":
+            continue
+        # Skip scores without required fields
+        if (
+            not score.cancer_type
+            or score.probability_percent is None
+            or score.time_horizon_years is None
+        ):
+            continue
+        # Normalize cancer type for grouping (removes "cancer" suffix, standardizes)
+        cancer_type = normalize_cancer_type(score.cancer_type)
+        key = (cancer_type, score.time_horizon_years)
+        grouped[key].append(score)
+    # Calculate averages for each group
+    aggregated = []
+    for (cancer_type, time_horizon), group_scores in grouped.items():
+        probabilities = [s.probability_percent for s in group_scores]
+        avg_probability = sum(probabilities) / len(probabilities)
+        # Categorize risk level
+        risk_category = categorize_risk(avg_probability, time_horizon)
+        aggregated.append(
+            AggregatedRisk(
+                cancer_type=cancer_type,
+                time_horizon_years=time_horizon,
+                avg_probability_percent=avg_probability,
+                risk_category=risk_category,
+                model_count=len(group_scores),
+                individual_scores=group_scores,
+            )
+        )
+    # Sort by cancer type, then by time horizon
+    aggregated.sort(key=lambda agg: (agg.cancer_type, agg.time_horizon_years))
+    return aggregated
+def separate_score_types(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
+    """Separate scores into probability, categorical, and not_applicable groups.
+    Args:
+        scores: List of RiskScore objects to separate.
+    Returns:
+        Dictionary with keys "probability", "categorical", and "not_applicable",
+        each mapping to a list of RiskScore objects of that type.
+    Example:
+        >>> scores = [
+        ...     RiskScore(name="Gail", score_type="probability", ...),
+        ...     RiskScore(name="PCPT", score_type="categorical", ...),
+        ...     RiskScore(name="Model", score="N/A: Age out of range", score_type="not_applicable", ...),
+        ... ]
+        >>> separated = separate_score_types(scores)
+        >>> len(separated["probability"])
+        1
+        >>> len(separated["categorical"])
+        1
+        >>> len(separated["not_applicable"])
+        1
+    """
+    result: dict[str, list[RiskScore]] = {
+        "probability": [],
+        "categorical": [],
+        "not_applicable": [],
+    }
+    for score in scores:
+        score_type = score.score_type
+        if score_type in result:
+            result[score_type].append(score)
+    return result

src/sentinel/reporting.py CHANGED Viewed

@@ -36,6 +36,12 @@ from .models import (
     ContributionStrength,
     InitialAssessment,
     RiskFactorCategory,
 )
 from .user_input import UserInput
@@ -133,6 +139,39 @@ def _get_rec_color(level: int | None, color_format: str = "hex"):
     return PDF_COLORS[color_key] if color_format == "pdf" else HEX_COLORS[color_key]
 def _markdown_to_reportlab(md_text: str) -> str:
     """Convert Markdown text to ReportLab-compatible HTML-like markup.
@@ -180,6 +219,7 @@ def generate_excel_report(
     _create_summary_sheet(wb, assessment, user_input)
     _create_risk_scores_sheet(wb, assessment)
     _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
     _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
@@ -466,12 +506,12 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
     header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
     wrap_alignment = Alignment(wrap_text=True, vertical="top")
-    ws.merge_cells("A1:E1")
     ws["A1"] = "Calculated Risk Scores (Ground Truth)"
     ws["A1"].font = title_font
     ws["A1"].alignment = Alignment(horizontal="center")
-    ws.merge_cells("A2:E2")
     ws["A2"] = "Scores calculated using validated clinical risk models"
     ws["A2"].alignment = Alignment(horizontal="center")
@@ -481,8 +521,17 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
         ws.cell(row=current_row, column=1, value="No risk scores calculated")
         return
-    # Create headers
-    headers = ["Cancer Type", "Model Name", "Score", "Interpretation", "References"]
     for col_idx, header in enumerate(headers, 1):
         cell = ws.cell(row=current_row, column=col_idx, value=header)
         cell.font = header_font
@@ -500,13 +549,30 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
             ws.cell(row=current_row, column=2, value=score.name)
             ws.cell(row=current_row, column=3, value=score.score or "N/A")
             interp_cell = ws.cell(
-                row=current_row, column=4, value=score.interpretation or "N/A"
             )
             interp_cell.alignment = wrap_alignment
             refs = "; ".join(score.references) if score.references else "N/A"
-            refs_cell = ws.cell(row=current_row, column=5, value=refs)
             refs_cell.alignment = wrap_alignment
             current_row += 1
@@ -515,8 +581,98 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
     ws.column_dimensions["A"].width = 20
     ws.column_dimensions["B"].width = 25
     ws.column_dimensions["C"].width = 15
-    ws.column_dimensions["D"].width = 50
-    ws.column_dimensions["E"].width = 40
 def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
@@ -787,65 +943,183 @@ def generate_pdf_report(
     story.append(Paragraph("Assessment", heading_style))
     story.append(Spacer(1, SPACER_NORMAL))
-    # --- NEW: Calculated Risk Scores Section ---
     if assessment.calculated_risk_scores:
-        story.append(Paragraph("Calculated Risk Scores", subheading_style))
-        story.append(Spacer(1, SPACER_SMALL))
-        risk_scores_intro = """
-        The following risk scores have been calculated using validated clinical risk models.
-        Each score represents a quantitative assessment based on your specific profile.
-        """
-        story.append(Paragraph(risk_scores_intro, styles["BodyText"]))
-        story.append(Spacer(1, SPACER_SMALL))
-        # Create table for calculated risk scores
-        score_data = [
-            [
-                Paragraph(h, table_header_style)
-                for h in ["Cancer Type", "Model", "Score", "Interpretation"]
             ]
-        ]
-        score_style_cmds = [
-            (
-                "BACKGROUND",
-                (0, 0),
-                (-1, 0),
-                colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
-            ),
-            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
-            ("GRID", (0, 0), (-1, -1), 1, colors.black),
-            ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
-            ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
-            ("TOPPADDING", (0, 0), (-1, -1), 4),
-        ]
-        # Sort by cancer type and add rows
-        for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
-            for i, score in enumerate(scores):
-                # Only show cancer type on first row for each cancer
-                cancer_cell = Paragraph(cancer_type, table_body_style) if i == 0 else ""
-                score_data.append(
                     [
-                        cancer_cell,
                         Paragraph(score.name, table_body_style),
                         Paragraph(score.score or "N/A", table_body_style),
-                        Paragraph(score.interpretation or "N/A", table_body_style),
                     ]
                 )
-        score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
-        scaled_widths = [w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths]
-        scores_table = Table(
-            score_data, colWidths=scaled_widths, style=score_style_cmds, splitByRow=1
-        )
-        story.append(scores_table)
-        story.append(Spacer(1, SPACER_NORMAL))
-    # --- New 3-Column Summary Section ---
     headers = [
         Paragraph("<b>Overall Risk Score</b>", summary_header_style),
         Paragraph("<b>Risk Breakdown</b>", summary_header_style),
-        Paragraph("<b>Dx Recommendations</b>", summary_header_style),
     ]
     gauge = ""
@@ -853,16 +1127,13 @@ def generate_pdf_report(
         gauge = _create_risk_gauge(assessment.overall_risk_score, width=120, height=70)
     risk_panel = _create_risk_breakdown_chart(
-        assessment.risk_assessments, width=150, height=70
-    )
-    dx_panel = _create_dx_recommendations_summary(
-        assessment.dx_recommendations, width=150, height=70
     )
-    content_row = [gauge, risk_panel, dx_panel]
     summary_data = [headers, content_row]
-    summary_table = Table(summary_data, colWidths=[2.1 * inch, 2.2 * inch, 2.2 * inch])
     summary_table.setStyle(
         TableStyle(
             [
@@ -1059,77 +1330,8 @@ def generate_pdf_report(
             )
             story.append(factor_table)
-        if ra.recommended_steps:
-            story.append(Spacer(1, SPACER_SMALL))
-            story.append(Paragraph("<b>Recommended steps</b>", styles["BodyText"]))
-            steps = (
-                ra.recommended_steps
-                if isinstance(ra.recommended_steps, list)
-                else [ra.recommended_steps]
-            )
-            for step in steps:
-                p = Paragraph(f"• {step}", indented_style)
-                story.append(p)
         story.append(Spacer(1, SPACER_NORMAL))
-    story.append(Paragraph("Diagnostic Recommendations", subheading_style))
-    story.append(Spacer(1, SPACER_SMALL))
-    dx_intro_text = """
-    Based on your risk profile, the following diagnostic tests are recommended. The recommendation
-    level is on a scale from 1 (lowest priority) to 5 (highest priority/urgency).
-    """
-    story.append(Paragraph(dx_intro_text, styles["BodyText"]))
-    story.append(Spacer(1, SPACER_SMALL))
-    dx_data = [
-        [
-            Paragraph(h, table_header_style)
-            for h in ["Test", "Rec.", "Frequency", "Rationale"]
-        ]
-    ]
-    dx_style_cmds = [
-        (
-            "BACKGROUND",
-            (0, 0),
-            (-1, 0),
-            colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
-        ),
-        ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
-        ("GRID", (0, 0), (-1, -1), 1, colors.black),
-        ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
-        ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
-        ("TOPPADDING", (0, 0), (-1, -1), 4),
-    ]
-    sorted_dx_recommendations = sorted(
-        assessment.dx_recommendations,
-        key=lambda x: x.recommendation_level or 0,
-        reverse=True,
-    )
-    for i, dr in enumerate(sorted_dx_recommendations, 1):
-        dx_data.append(
-            [
-                Paragraph(dr.test_name or "", table_body_style),
-                Paragraph(str(dr.recommendation_level), table_body_style_centered),
-                Paragraph(dr.frequency or "", table_body_style),
-                Paragraph(dr.rationale or "", table_body_style),
-            ]
-        )
-        dx_style_cmds.append(
-            (
-                "BACKGROUND",
-                (1, i),
-                (1, i),
-                _get_rec_color(dr.recommendation_level, "pdf"),
-            )
-        )
-    dx_widths = [1.5 * inch, 0.5 * inch, 1.5 * inch, 2.9 * inch]
-    scaled_widths = [w * (CONTENT_WIDTH / sum(dx_widths)) for w in dx_widths]
-    dx_table = Table(
-        dx_data, colWidths=scaled_widths, style=dx_style_cmds, splitByRow=1
-    )
-    story.append(dx_table)
     story.append(Spacer(1, SPACER_NORMAL))
     disclaimer = """
     IMPORTANT: This assessment does not replace professional medical advice.
@@ -1137,11 +1339,73 @@ def generate_pdf_report(
     story.append(Paragraph(disclaimer, styles["BodyText"]))
     # --- Appendix Section ---
-    if assessment.thinking or assessment.reasoning:
         story.append(PageBreak())
         story.append(Paragraph("Appendix", heading_style))
         story.append(Spacer(1, SPACER_NORMAL))
         if assessment.thinking:
             story.append(Paragraph("Thinking Process", subheading_style))
             # Use a preformatted style for better readability of raw text

     ContributionStrength,
     InitialAssessment,
     RiskFactorCategory,
+    RiskScore,
+)
+from .probability_aggregation import (
+    aggregate_probabilities,
+    get_display_cancer_type,
+    separate_score_types,
 )
 from .user_input import UserInput
     return PDF_COLORS[color_key] if color_format == "pdf" else HEX_COLORS[color_key]
+def _categorize_numeric_score(model_name: str, score_value: float) -> str:
+    """Categorize numeric risk scores from models like GAIL and CRC-PRO.
+    Args:
+        model_name: Name of the risk model.
+        score_value: Numeric score value.
+    Returns:
+        Risk category string.
+    """
+    model_lower = model_name.lower()
+    if "gail" in model_lower:
+        # GAIL: 5-year breast cancer risk
+        # High Risk: >= 1.67%, Average Risk: < 1.67%
+        if score_value >= 1.67:
+            return "High Risk"
+        else:
+            return "Average Risk"
+    elif "crc" in model_lower or "crc_pro" in model_lower:
+        # CRC-PRO: 10-year colorectal cancer risk
+        # Low: <0.5%, Moderate: 0.5-2.0%, High: >2.0%
+        if score_value < 0.5:
+            return "Low Risk"
+        elif score_value < 2.0:
+            return "Moderate Risk"
+        else:
+            return "High Risk"
+    return "N/A"
 def _markdown_to_reportlab(md_text: str) -> str:
     """Convert Markdown text to ReportLab-compatible HTML-like markup.
     _create_summary_sheet(wb, assessment, user_input)
     _create_risk_scores_sheet(wb, assessment)
+    _create_aggregated_probabilities_sheet(wb, assessment)
     _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
     _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
     header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
     wrap_alignment = Alignment(wrap_text=True, vertical="top")
+    ws.merge_cells("A1:H1")
     ws["A1"] = "Calculated Risk Scores (Ground Truth)"
     ws["A1"].font = title_font
     ws["A1"].alignment = Alignment(horizontal="center")
+    ws.merge_cells("A2:H2")
     ws["A2"] = "Scores calculated using validated clinical risk models"
     ws["A2"].alignment = Alignment(horizontal="center")
         ws.cell(row=current_row, column=1, value="No risk scores calculated")
         return
+    # Create headers with new columns
+    headers = [
+        "Cancer Type",
+        "Model Name",
+        "Score",
+        "Probability (%)",
+        "Time Horizon (years)",
+        "Score Type",
+        "Interpretation",
+        "References",
+    ]
     for col_idx, header in enumerate(headers, 1):
         cell = ws.cell(row=current_row, column=col_idx, value=header)
         cell.font = header_font
             ws.cell(row=current_row, column=2, value=score.name)
             ws.cell(row=current_row, column=3, value=score.score or "N/A")
+            # Add new probability fields
+            prob_value = (
+                f"{score.probability_percent:.2f}"
+                if score.probability_percent is not None
+                else "N/A"
+            )
+            ws.cell(row=current_row, column=4, value=prob_value)
+            horizon_value = (
+                str(score.time_horizon_years)
+                if score.time_horizon_years is not None
+                else "N/A"
+            )
+            ws.cell(row=current_row, column=5, value=horizon_value)
+            ws.cell(row=current_row, column=6, value=score.score_type)
             interp_cell = ws.cell(
+                row=current_row, column=7, value=score.interpretation or "N/A"
             )
             interp_cell.alignment = wrap_alignment
             refs = "; ".join(score.references) if score.references else "N/A"
+            refs_cell = ws.cell(row=current_row, column=8, value=refs)
             refs_cell.alignment = wrap_alignment
             current_row += 1
     ws.column_dimensions["A"].width = 20
     ws.column_dimensions["B"].width = 25
     ws.column_dimensions["C"].width = 15
+    ws.column_dimensions["D"].width = 15
+    ws.column_dimensions["E"].width = 20
+    ws.column_dimensions["F"].width = 15
+    ws.column_dimensions["G"].width = 50
+    ws.column_dimensions["H"].width = 40
+def _create_aggregated_probabilities_sheet(
+    wb: Workbook, assessment: InitialAssessment
+) -> None:
+    """Create a worksheet with aggregated probability scores.
+    Args:
+        wb: An openpyxl workbook.
+        assessment: The structured initial assessment containing calculated scores.
+    """
+    ws = wb.create_sheet("Aggregated Probabilities")
+    title_font = Font(bold=True, size=16, name="Calibri")
+    header_font = Font(bold=True, color=HEX_COLORS["header_font"], name="Calibri")
+    header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
+    ws.merge_cells("A1:F1")
+    ws["A1"] = "Aggregated Probability Scores"
+    ws["A1"].font = title_font
+    ws["A1"].alignment = Alignment(horizontal="center")
+    ws.merge_cells("A2:F2")
+    ws["A2"] = "Average probabilities when multiple models assess the same cancer type"
+    ws["A2"].alignment = Alignment(horizontal="center")
+    current_row = 4
+    # Collect all scores from the grouped format
+    all_scores: list[RiskScore] = []
+    for scores_list in assessment.calculated_risk_scores.values():
+        all_scores.extend(scores_list)
+    # Separate and aggregate
+    separated = separate_score_types(all_scores)
+    probability_scores = separated["probability"]
+    if not probability_scores:
+        ws.cell(
+            row=current_row, column=1, value="No probability-based scores calculated"
+        )
+        return
+    aggregated = aggregate_probabilities(probability_scores)
+    # Create headers
+    headers = [
+        "Cancer Type",
+        "Time Horizon (years)",
+        "Average Probability (%)",
+        "Model Count",
+        "Contributing Models",
+        "Individual Probabilities",
+    ]
+    for col_idx, header in enumerate(headers, 1):
+        cell = ws.cell(row=current_row, column=col_idx, value=header)
+        cell.font = header_font
+        cell.fill = header_fill
+    current_row += 1
+    # Add aggregated scores
+    for agg in aggregated:
+        # Use display function for clean cancer type name
+        display_name = get_display_cancer_type(agg.cancer_type)
+        ws.cell(row=current_row, column=1, value=display_name)
+        ws.cell(row=current_row, column=2, value=str(agg.time_horizon_years))
+        ws.cell(row=current_row, column=3, value=f"{agg.avg_probability_percent:.2f}")
+        ws.cell(row=current_row, column=4, value=str(agg.model_count))
+        model_names = ", ".join(s.name for s in agg.individual_scores)
+        ws.cell(row=current_row, column=5, value=model_names)
+        individual_probs = ", ".join(
+            f"{s.name}: {s.probability_percent:.2f}%" for s in agg.individual_scores
+        )
+        ws.cell(row=current_row, column=6, value=individual_probs)
+        current_row += 1
+    # Set column widths
+    ws.column_dimensions["A"].width = 20
+    ws.column_dimensions["B"].width = 20
+    ws.column_dimensions["C"].width = 25
+    ws.column_dimensions["D"].width = 15
+    ws.column_dimensions["E"].width = 30
+    ws.column_dimensions["F"].width = 50
 def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
     story.append(Paragraph("Assessment", heading_style))
     story.append(Spacer(1, SPACER_NORMAL))
+    # --- Aggregated Probability Scores Section (Early in document) ---
     if assessment.calculated_risk_scores:
+        # Collect all scores from grouped format
+        all_scores: list[RiskScore] = []
+        for scores_list in assessment.calculated_risk_scores.values():
+            all_scores.extend(scores_list)
+        # Separate and aggregate
+        separated = separate_score_types(all_scores)
+        probability_scores = separated["probability"]
+        categorical_scores = separated["categorical"]
+        if probability_scores:
+            aggregated = aggregate_probabilities(probability_scores)
+            if aggregated:
+                story.append(
+                    Paragraph("Aggregated Probability Scores", subheading_style)
+                )
+                story.append(Spacer(1, SPACER_SMALL))
+                agg_intro = """
+                When multiple models assess the same cancer type, we aggregate their probabilities
+                to provide a comprehensive risk estimate. The table below shows averaged probabilities
+                by cancer type and time horizon.
+                """
+                story.append(Paragraph(agg_intro, styles["BodyText"]))
+                story.append(Spacer(1, SPACER_SMALL))
+                # Create table for aggregated probabilities
+                agg_data = [
+                    [
+                        Paragraph(h, table_header_style)
+                        for h in [
+                            "Cancer Type",
+                            "Time Horizon",
+                            "Probability",
+                            "Models",
+                        ]
+                    ]
+                ]
+                agg_style_cmds = [
+                    (
+                        "BACKGROUND",
+                        (0, 0),
+                        (-1, 0),
+                        colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
+                    ),
+                    ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+                    ("GRID", (0, 0), (-1, -1), 1, colors.black),
+                    ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+                    ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+                    ("TOPPADDING", (0, 0), (-1, -1), 4),
+                ]
+                for agg in aggregated:
+                    model_names = ", ".join(s.name for s in agg.individual_scores)
+                    time_horizon_text = (
+                        f"{agg.time_horizon_years:.1f} years"
+                        if agg.time_horizon_years != 79.0
+                        else "Lifetime"
+                    )
+                    # Use display function to get clean cancer type name
+                    display_cancer_type = get_display_cancer_type(agg.cancer_type)
+                    agg_data.append(
+                        [
+                            Paragraph(display_cancer_type, table_body_style),
+                            Paragraph(time_horizon_text, table_body_style),
+                            Paragraph(
+                                f"{agg.avg_probability_percent:.2f}%", table_body_style
+                            ),
+                            Paragraph(
+                                f"{model_names} (n={agg.model_count})", table_body_style
+                            ),
+                        ]
+                    )
+                agg_widths = [1.5 * inch, 1.5 * inch, 1.2 * inch, 2.3 * inch]
+                agg_scaled_widths = [
+                    w * (CONTENT_WIDTH / sum(agg_widths)) for w in agg_widths
+                ]
+                agg_table = Table(
+                    agg_data,
+                    colWidths=agg_scaled_widths,
+                    style=agg_style_cmds,
+                    splitByRow=1,
+                )
+                story.append(agg_table)
+                story.append(Spacer(1, SPACER_NORMAL))
+        # Add categorical/diagnostic scores section if present (Early in document)
+        if categorical_scores:
+            story.append(Paragraph("Categorical Risk Assessments", subheading_style))
+            story.append(Spacer(1, SPACER_SMALL))
+            cat_intro = """
+            The following assessments provide numeric risk scores with categorical classifications.
+            These models output absolute risk values that are categorized based on validated thresholds.
+            """
+            story.append(Paragraph(cat_intro, styles["BodyText"]))
+            story.append(Spacer(1, SPACER_SMALL))
+            # Create table for categorical scores
+            cat_data = [
+                [
+                    Paragraph(h, table_header_style)
+                    for h in [
+                        "Model",
+                        "Cancer Type",
+                        "Score",
+                        "Time Horizon",
+                        "Risk Category",
+                    ]
+                ]
             ]
+            cat_style_cmds = [
+                (
+                    "BACKGROUND",
+                    (0, 0),
+                    (-1, 0),
+                    colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
+                ),
+                ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+                ("GRID", (0, 0), (-1, -1), 1, colors.black),
+                ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+                ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+                ("TOPPADDING", (0, 0), (-1, -1), 4),
+            ]
+            for score in categorical_scores:
+                # Format time horizon if available
+                time_horizon_text = "N/A"
+                if score.time_horizon_years is not None:
+                    if score.time_horizon_years == 79.0:
+                        time_horizon_text = "Lifetime"
+                    else:
+                        time_horizon_text = f"{score.time_horizon_years:.1f} years"
+                # Calculate risk category from numeric score
+                risk_category_text = "N/A"
+                try:
+                    # Try to extract numeric value from score
+                    score_value = float(score.score or "0")
+                    risk_category_text = _categorize_numeric_score(
+                        score.name,
+                        score_value,
+                    )
+                except (ValueError, TypeError):
+                    # If score is not numeric, leave as N/A
+                    pass
+                cat_data.append(
                     [
                         Paragraph(score.name, table_body_style),
+                        Paragraph(score.cancer_type or "N/A", table_body_style),
                         Paragraph(score.score or "N/A", table_body_style),
+                        Paragraph(time_horizon_text, table_body_style),
+                        Paragraph(risk_category_text, table_body_style),
                     ]
                 )
+            cat_widths = [1.3 * inch, 1.3 * inch, 0.8 * inch, 1.2 * inch, 1.9 * inch]
+            cat_scaled_widths = [
+                w * (CONTENT_WIDTH / sum(cat_widths)) for w in cat_widths
+            ]
+            cat_table = Table(
+                cat_data,
+                colWidths=cat_scaled_widths,
+                style=cat_style_cmds,
+                splitByRow=1,
+            )
+            story.append(cat_table)
+            story.append(Spacer(1, SPACER_NORMAL))
+    # --- New 2-Column Summary Section ---
     headers = [
         Paragraph("<b>Overall Risk Score</b>", summary_header_style),
         Paragraph("<b>Risk Breakdown</b>", summary_header_style),
     ]
     gauge = ""
         gauge = _create_risk_gauge(assessment.overall_risk_score, width=120, height=70)
     risk_panel = _create_risk_breakdown_chart(
+        assessment.risk_assessments, width=200, height=70
     )
+    content_row = [gauge, risk_panel]
     summary_data = [headers, content_row]
+    summary_table = Table(summary_data, colWidths=[3.2 * inch, 3.3 * inch])
     summary_table.setStyle(
         TableStyle(
             [
             )
             story.append(factor_table)
         story.append(Spacer(1, SPACER_NORMAL))
     story.append(Spacer(1, SPACER_NORMAL))
     disclaimer = """
     IMPORTANT: This assessment does not replace professional medical advice.
     story.append(Paragraph(disclaimer, styles["BodyText"]))
     # --- Appendix Section ---
+    if assessment.thinking or assessment.reasoning or assessment.calculated_risk_scores:
         story.append(PageBreak())
         story.append(Paragraph("Appendix", heading_style))
         story.append(Spacer(1, SPACER_NORMAL))
+        # --- Calculated Risk Scores Section (In Appendix) ---
+        if assessment.calculated_risk_scores:
+            story.append(Paragraph("Calculated Risk Scores", subheading_style))
+            story.append(Spacer(1, SPACER_SMALL))
+            score_intro_text = """
+            The following risk scores have been calculated using validated clinical risk models.
+            These scores provide detailed quantitative estimates of cancer risk based on your profile.
+            """
+            story.append(Paragraph(score_intro_text, styles["BodyText"]))
+            story.append(Spacer(1, SPACER_SMALL))
+            score_data = [
+                [
+                    Paragraph(h, table_header_style)
+                    for h in ["Cancer Type", "Model", "Score", "Interpretation"]
+                ]
+            ]
+            score_style_cmds = [
+                (
+                    "BACKGROUND",
+                    (0, 0),
+                    (-1, 0),
+                    colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
+                ),
+                ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+                ("GRID", (0, 0), (-1, -1), 1, colors.black),
+                ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+                ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+                ("TOPPADDING", (0, 0), (-1, -1), 4),
+            ]
+            # Sort by cancer type and add rows
+            for cancer_type, scores in sorted(
+                assessment.calculated_risk_scores.items()
+            ):
+                for i, score in enumerate(scores):
+                    # Only show cancer type on first row for each cancer
+                    cancer_cell = (
+                        Paragraph(cancer_type, table_body_style) if i == 0 else ""
+                    )
+                    score_data.append(
+                        [
+                            cancer_cell,
+                            Paragraph(score.name, table_body_style),
+                            Paragraph(score.score or "N/A", table_body_style),
+                            Paragraph(score.interpretation or "N/A", table_body_style),
+                        ]
+                    )
+            score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
+            scaled_widths = [
+                w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths
+            ]
+            scores_table = Table(
+                score_data,
+                colWidths=scaled_widths,
+                style=score_style_cmds,
+                splitByRow=1,
+            )
+            story.append(scores_table)
+            story.append(Spacer(1, SPACER_NORMAL))
         if assessment.thinking:
             story.append(Paragraph("Thinking Process", subheading_style))
             # Use a preformatted style for better readability of raw text

src/sentinel/risk_aggregation.py CHANGED Viewed

@@ -3,6 +3,12 @@
 from collections import defaultdict
 from .models import RiskScore
 def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
@@ -96,3 +102,128 @@ def format_scores_for_pdf(
         List of (cancer_type, scores) tuples sorted by cancer type.
     """
     return sorted(grouped_scores.items())

 from collections import defaultdict
 from .models import RiskScore
+from .probability_aggregation import (
+    AggregatedRisk,
+    aggregate_probabilities,
+    get_display_cancer_type,
+    separate_score_types,
+)
 def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
         List of (cancer_type, scores) tuples sorted by cancer type.
     """
     return sorted(grouped_scores.items())
+def format_probabilities_for_llm(
+    aggregated: list[AggregatedRisk], categorical: list[RiskScore]
+) -> str:
+    """Format aggregated probabilities and categorical scores for LLM context.
+    Args:
+        aggregated: List of aggregated probability risks.
+        categorical: List of categorical/diagnostic scores.
+    Returns:
+        Formatted string representation for LLM consumption.
+    """
+    lines = []
+    lines.append("# Calculated Risk Scores (Ground Truth)\n")
+    lines.append(
+        "The following risk scores have been calculated using validated models:\n"
+    )
+    # Group aggregated risks by cancer type
+    if aggregated:
+        lines.append("\n## Probability-Based Risk Scores\n")
+        lines.append(
+            "These scores represent time-based probabilities of developing cancer:\n"
+        )
+        current_cancer = None
+        for agg in aggregated:
+            # Add cancer type header if changed
+            if agg.cancer_type != current_cancer:
+                current_cancer = agg.cancer_type
+                display_name = get_display_cancer_type(agg.cancer_type)
+                lines.append(f"\n### {display_name}\n")
+            # Format time horizon nicely
+            if agg.time_horizon_years == 79.0:
+                horizon_text = "Lifetime"
+            else:
+                horizon_text = f"{agg.time_horizon_years:.1f}-year"
+            # Add aggregated score
+            if agg.model_count == 1:
+                model = agg.individual_scores[0]
+                lines.append(f"**{model.name}** ({horizon_text} risk)")
+                lines.append(f"- **Probability**: {agg.avg_probability_percent:.2f}%")
+                if model.description:
+                    lines.append(f"- **Description**: {model.description}")
+                if model.interpretation:
+                    lines.append(f"- **Interpretation**: {model.interpretation}")
+            else:
+                model_names = ", ".join(s.name for s in agg.individual_scores)
+                lines.append(
+                    f"**Aggregated Risk** from {agg.model_count} models ({model_names})"
+                )
+                lines.append(
+                    f"- **Average {horizon_text} Probability**: {agg.avg_probability_percent:.2f}%"
+                )
+                lines.append(
+                    "- **Individual Probabilities**: "
+                    + ", ".join(
+                        f"{s.name}: {s.probability_percent:.2f}%"
+                        for s in agg.individual_scores
+                    )
+                )
+            lines.append("")  # Empty line
+    # Add categorical scores
+    if categorical:
+        lines.append("\n## Diagnostic/Categorical Risk Scores\n")
+        lines.append(
+            "These scores represent diagnostic categories or non-time-based assessments:\n"
+        )
+        for score in categorical:
+            lines.append(f"\n### {score.name}")
+            if score.cancer_type:
+                lines.append(f"- **Cancer Type**: {score.cancer_type}")
+            lines.append(f"- **Result**: {score.score}")
+            if score.description:
+                lines.append(f"- **Description**: {score.description}")
+            if score.interpretation:
+                lines.append(f"- **Interpretation**: {score.interpretation}")
+            lines.append("")  # Empty line
+    lines.append("\n---\n")
+    lines.append("**Important**: These scores are the ground truth. Your task is to:")
+    lines.append("1. Explain what these scores mean for the patient in clear language")
+    lines.append(
+        "2. Identify and highlight key risk factors contributing to elevated scores"
+    )
+    lines.append("3. Provide actionable context and insights based on these scores")
+    lines.append(
+        "4. DO NOT generate your own risk levels - explain and contextualize the calculated ones\n"
+    )
+    return "\n".join(lines)
+def format_scores_with_aggregation(scores: list[RiskScore]) -> str:
+    """Format risk scores with probability aggregation for LLM context.
+    This is an enhanced version of format_scores_for_llm that aggregates
+    probability-based scores by cancer type and time horizon.
+    Args:
+        scores: List of RiskScore objects.
+    Returns:
+        Formatted string representation for LLM consumption.
+    """
+    if not scores:
+        return "No risk scores calculated."
+    # Separate scores by type
+    separated = separate_score_types(scores)
+    probability_scores = separated["probability"]
+    categorical_scores = separated["categorical"]
+    # Aggregate probability scores
+    aggregated = aggregate_probabilities(probability_scores)
+    # Format for LLM
+    return format_probabilities_for_llm(aggregated, categorical_scores)

src/sentinel/risk_models/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from sentinel.risk_models.pcpt import PCPTRiskModel
 from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
 from sentinel.risk_models.prostate_mortality import ProstateMortalityRiskModel
 from sentinel.risk_models.qcancer import QCancerRiskModel
 RISK_MODELS = [
     GailRiskModel,
@@ -24,6 +25,7 @@ RISK_MODELS = [
     QCancerRiskModel,
     ClausRiskModel,
     MRATRiskModel,
 ]
 __all__ = [
@@ -33,4 +35,5 @@ __all__ = [
     "LLPiRiskModel",
     "MRATRiskModel",
     "PLCOm2012RiskModel",
 ]

 from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
 from sentinel.risk_models.prostate_mortality import ProstateMortalityRiskModel
 from sentinel.risk_models.qcancer import QCancerRiskModel
+from sentinel.risk_models.tyrer_cuzick import TyrerCuzickRiskModel
 RISK_MODELS = [
     GailRiskModel,
     QCancerRiskModel,
     ClausRiskModel,
     MRATRiskModel,
+    TyrerCuzickRiskModel,
 ]
 __all__ = [
     "LLPiRiskModel",
     "MRATRiskModel",
     "PLCOm2012RiskModel",
+    "TyrerCuzickRiskModel",
 ]

src/sentinel/risk_models/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Abstract base classes for risk model implementations."""
 from abc import ABC, abstractmethod
 from typing import Any
@@ -44,6 +45,34 @@ class RiskModel(ABC):
     def references(self) -> list[str]:
         """Return academic or source references for the model."""
     @staticmethod
     def _get_nested_field(obj: Any, path: str) -> Any:
         """Navigate a dotted path to retrieve a nested field value.
@@ -108,11 +137,17 @@ class RiskModel(ABC):
         Returns:
             A populated RiskScore object for this model.
         """
         return RiskScore(
             name=self.name,
-            score=self.compute_score(user),
             cancer_type=self.cancer_type(),
             description=self.description(),
             interpretation=self.interpretation(),
             references=self.references(),
         )

 """Abstract base classes for risk model implementations."""
+import re
 from abc import ABC, abstractmethod
 from typing import Any
     def references(self) -> list[str]:
         """Return academic or source references for the model."""
+    @abstractmethod
+    def time_horizon_years(self) -> float | None:
+        """Return time horizon in years for probability output.
+        Returns:
+            Time horizon in years (e.g., 5, 10, 79 for lifetime), or None if not applicable.
+        """
+    def _parse_probability(self, score_str: str) -> tuple[float | None, str]:
+        """Parse probability from score string and determine score type.
+        Args:
+            score_str: The score string returned by compute_score().
+        Returns:
+            Tuple of (probability_percent, score_type).
+        """
+        if score_str.startswith("N/A"):
+            return (None, "not_applicable")
+        # Extract percentage using regex
+        match = re.search(r"(\d+\.?\d*)%", score_str)
+        if match:
+            return (float(match.group(1)), "probability")
+        # If no percentage found, treat as categorical
+        return (None, "categorical")
     @staticmethod
     def _get_nested_field(obj: Any, path: str) -> Any:
         """Navigate a dotted path to retrieve a nested field value.
         Returns:
             A populated RiskScore object for this model.
         """
+        score_str = self.compute_score(user)
+        probability, score_type = self._parse_probability(score_str)
         return RiskScore(
             name=self.name,
+            score=score_str,
             cancer_type=self.cancer_type(),
             description=self.description(),
             interpretation=self.interpretation(),
             references=self.references(),
+            probability_percent=probability,
+            time_horizon_years=self.time_horizon_years(),
+            score_type=score_type,
         )

src/sentinel/risk_models/boadicea.py CHANGED Viewed

@@ -190,3 +190,11 @@ class BOADICEARiskModel(RiskModel):
             "Antoniou et al. Average risks of breast and ovarian cancer associated with BRCA1 or BRCA2 "
             "mutations detected in case series unselected for family history. Am J Hum Genet. 2003;72(5):1117-1130.",
         ]

             "Antoniou et al. Average risks of breast and ovarian cancer associated with BRCA1 or BRCA2 "
             "mutations detected in case series unselected for family history. Am J Hum Genet. 2003;72(5):1117-1130.",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the BOADICEA model.
+        Returns:
+            The time horizon in years (10 years for BOADICEA).
+        """
+        return 10.0

src/sentinel/risk_models/claus.py CHANGED Viewed

@@ -571,6 +571,14 @@ class ClausRiskModel(RiskModel):
             "Reference implementation: https://github.com/ColorGenomics/risk-models",
         ]
 def _bin_age_to_index(age: int) -> int:
     """Convert age to table index.

             "Reference implementation: https://github.com/ColorGenomics/risk-models",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the Claus model.
+        Returns:
+            The time horizon in years (79 years - lifetime risk to age 79).
+        """
+        return 79.0
 def _bin_age_to_index(age: int) -> int:
     """Convert age to table index.

src/sentinel/risk_models/crc_pro.py CHANGED Viewed

@@ -216,9 +216,10 @@ class CRCProRiskModel(RiskModel):
             str: Human-readable interpretation guidance.
         """
         return (
-            "The output represents the % probability of developing colorectal "
-            "cancer within 10 years. Elevated results should be reviewed with a "
-            "qualified healthcare professional."
         )
     def references(self) -> list[str]:
@@ -233,6 +234,14 @@ class CRCProRiskModel(RiskModel):
             "Multi-Ethnic Cohort Study. J Am Board Fam Med. 2014;27(1):42-55."
         ]
     # --- Internal helpers -----------------------------------------------
     def _build_input(self, user: UserInput, sex: Sex, age: int):
         """Build the input for the model.

             str: Human-readable interpretation guidance.
         """
         return (
+            "Risk categories: Low Risk (<0.5%), Moderate Risk (0.5-2.0%), "
+            "High Risk (>2.0%). The percentage indicates the 10-year absolute "
+            "risk of developing colorectal cancer. Elevated results should be "
+            "reviewed with a qualified healthcare professional."
         )
     def references(self) -> list[str]:
             "Multi-Ethnic Cohort Study. J Am Board Fam Med. 2014;27(1):42-55."
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the CRC-PRO model.
+        Returns:
+            The time horizon in years (10 years for CRC-PRO).
+        """
+        return 10.0
     # --- Internal helpers -----------------------------------------------
     def _build_input(self, user: UserInput, sex: Sex, age: int):
         """Build the input for the model.

src/sentinel/risk_models/extended_pbcg.py CHANGED Viewed

@@ -279,6 +279,14 @@ class ExtendedPBCGRiskModel(RiskModel):
             "prostate cancer risk prediction. BMC Med Res Methodol. 2022;22:200.",
         ]
     def _get_feature_value_direct(
         self,
         user: UserInput,

             "prostate cancer risk prediction. BMC Med Res Methodol. 2022;22:200.",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the Extended PBCG model.
+        Returns:
+            The time horizon in years (None for Extended PBCG - no fixed horizon).
+        """
+        return None
     def _get_feature_value_direct(
         self,
         user: UserInput,

src/sentinel/risk_models/gail.py CHANGED Viewed

@@ -805,7 +805,15 @@ class GailRiskModel(RiskModel):
         return "The Gail Model (Breast Cancer Risk Assessment Tool) calculates a woman's chance of developing invasive breast cancer over a given time interval. It uses demographic and reproductive history factors to project risk for women with no prior breast cancer, DCIS or LCIS. Typically applied to ages 35-85."
     def interpretation(self) -> str:
-        return "A score of 1.66 or higher is generally considered above average. Results should be discussed with a healthcare professional. The model does not include all possible risk factors and may overestimate risk for non-white populations."
     def references(self) -> list[str]:
         return ["National Cancer Institute Breast Cancer Risk Assessment Tool"]

         return "The Gail Model (Breast Cancer Risk Assessment Tool) calculates a woman's chance of developing invasive breast cancer over a given time interval. It uses demographic and reproductive history factors to project risk for women with no prior breast cancer, DCIS or LCIS. Typically applied to ages 35-85."
     def interpretation(self) -> str:
+        return "High Risk is defined as a 5-year risk of 1.67% or higher. Average Risk is below 1.67%. Women classified as High Risk may be candidates for preventive interventions. Results should be discussed with a healthcare professional. The model does not include all possible risk factors and may overestimate risk for non-white populations."
     def references(self) -> list[str]:
         return ["National Cancer Institute Breast Cancer Risk Assessment Tool"]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the Gail model.
+        Returns:
+            The time horizon in years (5 years for Gail).
+        """
+        return 5.0

src/sentinel/risk_models/llpi.py CHANGED Viewed

@@ -228,3 +228,11 @@ class LLPiRiskModel(RiskModel):
             "LLPi: Liverpool lung project risk prediction model for lung cancer incidence. "
             "Cancer Prev Res (Phila) 2015;8:570-5."
         ]

             "LLPi: Liverpool lung project risk prediction model for lung cancer incidence. "
             "Cancer Prev Res (Phila) 2015;8:570-5."
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the LLPi model.
+        Returns:
+            The time horizon in years (8.7 years for LLPi).
+        """
+        return 8.7

src/sentinel/risk_models/mrat.py CHANGED Viewed

@@ -140,6 +140,14 @@ class MRATRiskModel(RiskModel):
             "Fears TR et al. Identifying individuals at high risk for melanoma: J Am Acad Dermatol. 2006;55:819-826.",
         ]
     def absolute_risk(self, user: UserInput) -> float:
         """Compute the 5-year melanoma absolute risk percentage.

             "Fears TR et al. Identifying individuals at high risk for melanoma: J Am Acad Dermatol. 2006;55:819-826.",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the MRAT model.
+        Returns:
+            The time horizon in years (5 years for MRAT).
+        """
+        return 5.0
     def absolute_risk(self, user: UserInput) -> float:
         """Compute the 5-year melanoma absolute risk percentage.

src/sentinel/risk_models/pcpt.py CHANGED Viewed

@@ -631,3 +631,11 @@ class PCPTRiskModel(RiskModel):
             "Calculator 2.0 for the prediction of low- versus high-grade "
             "prostate cancer. Urology. 2014;83(6):1362-1367.",
         ]

             "Calculator 2.0 for the prediction of low- versus high-grade "
             "prostate cancer. Urology. 2014;83(6):1362-1367.",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the PCPT model.
+        Returns:
+            The time horizon in years (None for PCPT - no fixed horizon).
+        """
+        return None

src/sentinel/risk_models/plcom2012.py CHANGED Viewed

@@ -259,3 +259,11 @@ class PLCOm2012RiskModel(RiskModel):
         return [
             "Tammemägi, M. C., et al. (2013). Selection of individuals for lung-cancer screening by modeling lung-cancer risk. New England Journal of Medicine, 368(8), 728-736."
         ]

         return [
             "Tammemägi, M. C., et al. (2013). Selection of individuals for lung-cancer screening by modeling lung-cancer risk. New England Journal of Medicine, 368(8), 728-736."
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the PLCOm2012 model.
+        Returns:
+            The time horizon in years (6 years for PLCOm2012).
+        """
+        return 6.0

src/sentinel/risk_models/prostate_mortality.py CHANGED Viewed

@@ -221,3 +221,11 @@ class ProstateMortalityRiskModel(RiskModel):
             "BMC Med. 2019;17:144.",
             "Predict Prostate: https://prostate.predict.cam/tool",
         ]

             "BMC Med. 2019;17:144.",
             "Predict Prostate: https://prostate.predict.cam/tool",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the Prostate Mortality model.
+        Returns:
+            The time horizon in years (15 years for Prostate Mortality).
+        """
+        return 15.0

src/sentinel/risk_models/qcancer.py CHANGED Viewed

@@ -1858,6 +1858,9 @@ class QCancerRiskModel(RiskModel):
                 description="10-year probability of not developing cancer",
                 interpretation="Baseline probability - higher values indicate lower overall cancer risk",
                 references=self.references(),
             )
         )
@@ -1877,6 +1880,9 @@ class QCancerRiskModel(RiskModel):
                         "Values >1% warrant clinical review."
                     ),
                     references=self.references(),
                 )
             )
@@ -1888,6 +1894,14 @@ class QCancerRiskModel(RiskModel):
             "ClinRisk Ltd. QCancer-2013 source code (GNU AGPL v3).",
         ]
     def _format_risks(self, risks: dict[str, float], is_female: bool) -> str:
         """Format probabilities as semicolon-separated string.

                 description="10-year probability of not developing cancer",
                 interpretation="Baseline probability - higher values indicate lower overall cancer risk",
                 references=self.references(),
+                probability_percent=no_cancer_pct,
+                time_horizon_years=10.0,
+                score_type="probability",
             )
         )
                         "Values >1% warrant clinical review."
                     ),
                     references=self.references(),
+                    probability_percent=pct,
+                    time_horizon_years=10.0,
+                    score_type="probability",
                 )
             )
             "ClinRisk Ltd. QCancer-2013 source code (GNU AGPL v3).",
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the QCancer model.
+        Returns:
+            The time horizon in years (10 years for QCancer).
+        """
+        return 10.0
     def _format_risks(self, risks: dict[str, float], is_female: bool) -> str:
         """Format probabilities as semicolon-separated string.

src/sentinel/risk_models/tyrer_cuzick.py CHANGED Viewed

@@ -1320,3 +1320,11 @@ class TyrerCuzickRiskModel(RiskModel):
             "Tyrer, J., Duffy, S. W., & Cuzick, J. (2004). A breast cancer prediction model "
             "incorporating familial and personal risk factors. Statistics in Medicine, 23(7), 1111-1130."
         ]

             "Tyrer, J., Duffy, S. W., & Cuzick, J. (2004). A breast cancer prediction model "
             "incorporating familial and personal risk factors. Statistics in Medicine, 23(7), 1111-1130."
         ]
+    def time_horizon_years(self) -> float | None:
+        """Return the time horizon in years for the Tyrer-Cuzick model.
+        Returns:
+            The time horizon in years (10 years for Tyrer-Cuzick).
+        """
+        return 10.0

tests/test_probability_aggregation.py ADDED Viewed

	@@ -0,0 +1,477 @@

+"""Tests for probability aggregation utilities."""
+import pytest
+from sentinel.models import RiskScore
+from sentinel.probability_aggregation import (
+    AggregatedRisk,
+    aggregate_probabilities,
+    categorize_risk,
+    get_display_cancer_type,
+    normalize_cancer_type,
+    separate_score_types,
+)
+class TestAggregateProbabilities:
+    """Test probability aggregation functionality."""
+    def test_single_model_per_cancer_type(self):
+        """Test aggregation with one model per cancer type."""
+        scores = [
+            RiskScore(
+                name="Gail",
+                score="1.5%",
+                cancer_type="breast",
+                probability_percent=1.5,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="PLCOm2012",
+                score="2.3%",
+                cancer_type="lung",
+                probability_percent=2.3,
+                time_horizon_years=6.0,
+                score_type="probability",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert len(aggregated) == 2
+        # Check breast cancer aggregation
+        breast_agg = next(agg for agg in aggregated if agg.cancer_type == "breast")
+        assert breast_agg.time_horizon_years == 5.0
+        assert breast_agg.avg_probability_percent == 1.5
+        assert breast_agg.risk_category == "Moderate"  # 1.5% for 5-year horizon
+        assert breast_agg.model_count == 1
+        assert len(breast_agg.individual_scores) == 1
+        assert breast_agg.individual_scores[0].name == "Gail"
+        # Check lung cancer aggregation
+        lung_agg = next(agg for agg in aggregated if agg.cancer_type == "lung")
+        assert lung_agg.time_horizon_years == 6.0
+        assert lung_agg.avg_probability_percent == 2.3
+        assert lung_agg.risk_category == "Moderate"  # 2.3% for 6-year horizon
+        assert lung_agg.model_count == 1
+    def test_multiple_models_same_cancer_same_horizon(self):
+        """Test aggregation when multiple models assess same cancer with same time horizon."""
+        scores = [
+            RiskScore(
+                name="Model1",
+                score="2.0%",
+                cancer_type="breast",
+                probability_percent=2.0,
+                time_horizon_years=10.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model2",
+                score="3.0%",
+                cancer_type="breast",
+                probability_percent=3.0,
+                time_horizon_years=10.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model3",
+                score="4.0%",
+                cancer_type="breast",
+                probability_percent=4.0,
+                time_horizon_years=10.0,
+                score_type="probability",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert len(aggregated) == 1
+        agg = aggregated[0]
+        assert agg.cancer_type == "breast"
+        assert agg.time_horizon_years == 10.0
+        assert agg.avg_probability_percent == pytest.approx(3.0)  # (2+3+4)/3
+        assert agg.risk_category == "Moderate"  # 3.0% for 10-year horizon
+        assert agg.model_count == 3
+        assert len(agg.individual_scores) == 3
+    def test_multiple_models_same_cancer_different_horizons(self):
+        """Test aggregation with same cancer type but different time horizons."""
+        scores = [
+            RiskScore(
+                name="Model1",
+                score="1.5%",
+                cancer_type="breast",
+                probability_percent=1.5,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model2",
+                score="3.0%",
+                cancer_type="breast",
+                probability_percent=3.0,
+                time_horizon_years=10.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model3",
+                score="15.0%",
+                cancer_type="breast",
+                probability_percent=15.0,
+                time_horizon_years=79.0,
+                score_type="probability",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert len(aggregated) == 3  # Three different time horizons
+        # Verify each time horizon is separate
+        horizons = {agg.time_horizon_years for agg in aggregated}
+        assert horizons == {5.0, 10.0, 79.0}
+        # Verify each has single model
+        for agg in aggregated:
+            assert agg.model_count == 1
+    def test_excludes_non_probability_scores(self):
+        """Test that non-probability scores are excluded from aggregation."""
+        scores = [
+            RiskScore(
+                name="Gail",
+                score="1.5%",
+                cancer_type="breast",
+                probability_percent=1.5,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="PCPT",
+                score="No Cancer: 45%, Low Grade: 30%, High Grade: 25%",
+                cancer_type="prostate",
+                probability_percent=None,
+                time_horizon_years=None,
+                score_type="categorical",
+            ),
+            RiskScore(
+                name="Model",
+                score="N/A: Age out of range",
+                cancer_type="lung",
+                probability_percent=None,
+                time_horizon_years=None,
+                score_type="not_applicable",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert len(aggregated) == 1
+        assert aggregated[0].cancer_type == "breast"
+    def test_empty_list(self):
+        """Test aggregation with empty score list."""
+        aggregated = aggregate_probabilities([])
+        assert aggregated == []
+    def test_all_non_probability_scores(self):
+        """Test aggregation when all scores are non-probability."""
+        scores = [
+            RiskScore(
+                name="PCPT",
+                score="Results",
+                cancer_type="prostate",
+                score_type="categorical",
+            ),
+            RiskScore(
+                name="Model",
+                score="N/A",
+                cancer_type="lung",
+                score_type="not_applicable",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert aggregated == []
+    def test_case_insensitive_cancer_type_grouping(self):
+        """Test that cancer types are grouped case-insensitively."""
+        scores = [
+            RiskScore(
+                name="Model1",
+                score="1.5%",
+                cancer_type="Breast",
+                probability_percent=1.5,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model2",
+                score="1.8%",
+                cancer_type="breast",
+                probability_percent=1.8,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+            RiskScore(
+                name="Model3",
+                score="1.7%",
+                cancer_type="BREAST",
+                probability_percent=1.7,
+                time_horizon_years=5.0,
+                score_type="probability",
+            ),
+        ]
+        aggregated = aggregate_probabilities(scores)
+        assert len(aggregated) == 1
+        assert aggregated[0].cancer_type == "breast"  # normalized to lowercase
+        assert aggregated[0].model_count == 3
+        assert aggregated[0].avg_probability_percent == pytest.approx(1.6667, abs=0.001)
+class TestSeparateScoreTypes:
+    """Test score type separation functionality."""
+    def test_separate_all_types(self):
+        """Test separation of all three score types."""
+        scores = [
+            RiskScore(
+                name="Gail",
+                score="1.5%",
+                score_type="probability",
+            ),
+            RiskScore(
+                name="BOADICEA",
+                score="2.0%",
+                score_type="probability",
+            ),
+            RiskScore(
+                name="PCPT",
+                score="No Cancer: 45%",
+                score_type="categorical",
+            ),
+            RiskScore(
+                name="Model",
+                score="N/A: Age out of range",
+                score_type="not_applicable",
+            ),
+            RiskScore(
+                name="Model2",
+                score="N/A: Invalid",
+                score_type="not_applicable",
+            ),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["probability"]) == 2
+        assert len(separated["categorical"]) == 1
+        assert len(separated["not_applicable"]) == 2
+    def test_empty_list(self):
+        """Test separation with empty list."""
+        separated = separate_score_types([])
+        assert separated["probability"] == []
+        assert separated["categorical"] == []
+        assert separated["not_applicable"] == []
+    def test_only_probabilities(self):
+        """Test separation when all scores are probabilities."""
+        scores = [
+            RiskScore(name="Model1", score="1%", score_type="probability"),
+            RiskScore(name="Model2", score="2%", score_type="probability"),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["probability"]) == 2
+        assert separated["categorical"] == []
+        assert separated["not_applicable"] == []
+class TestFilterFunctions:
+    """Test individual filter functions."""
+    def test_separate_score_types_for_probability(self):
+        """Test using separate_score_types to get probability scores."""
+        scores = [
+            RiskScore(name="Model1", score="1%", score_type="probability"),
+            RiskScore(name="Model2", score="Result", score_type="categorical"),
+            RiskScore(name="Model3", score="2%", score_type="probability"),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["probability"]) == 2
+        assert all(
+            score.score_type == "probability" for score in separated["probability"]
+        )
+    def test_separate_score_types_for_categorical(self):
+        """Test using separate_score_types to get categorical scores."""
+        scores = [
+            RiskScore(name="Model1", score="1%", score_type="probability"),
+            RiskScore(name="Model2", score="Result", score_type="categorical"),
+            RiskScore(name="Model3", score="N/A", score_type="not_applicable"),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["categorical"]) == 1
+        assert separated["categorical"][0].score_type == "categorical"
+    def test_separate_score_types_for_not_applicable(self):
+        """Test using separate_score_types to get not_applicable scores."""
+        scores = [
+            RiskScore(name="Model1", score="1%", score_type="probability"),
+            RiskScore(
+                name="Model2", score="N/A: Reason 1", score_type="not_applicable"
+            ),
+            RiskScore(
+                name="Model3", score="N/A: Reason 2", score_type="not_applicable"
+            ),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["not_applicable"]) == 2
+        assert all(
+            score.score_type == "not_applicable"
+            for score in separated["not_applicable"]
+        )
+    def test_separate_score_types_for_all_types(self):
+        """Test using separate_score_types to get all score types at once."""
+        scores = [
+            RiskScore(name="Model1", score="1%", score_type="probability"),
+            RiskScore(name="Model2", score="2%", score_type="probability"),
+            RiskScore(name="Model3", score="Result", score_type="categorical"),
+            RiskScore(name="Model4", score="N/A: Age", score_type="not_applicable"),
+        ]
+        separated = separate_score_types(scores)
+        assert len(separated["probability"]) == 2
+        assert len(separated["categorical"]) == 1
+        assert len(separated["not_applicable"]) == 1
+class TestAggregatedRiskDataclass:
+    """Test the AggregatedRisk dataclass."""
+    def test_dataclass_creation(self):
+        """Test creating an AggregatedRisk object."""
+        score = RiskScore(
+            name="Gail",
+            score="1.5%",
+            cancer_type="breast",
+            probability_percent=1.5,
+            time_horizon_years=5.0,
+            score_type="probability",
+        )
+        agg = AggregatedRisk(
+            cancer_type="breast",
+            time_horizon_years=5.0,
+            avg_probability_percent=1.5,
+            risk_category="Low",
+            model_count=1,
+            individual_scores=[score],
+        )
+        assert agg.cancer_type == "breast"
+        assert agg.time_horizon_years == 5.0
+        assert agg.avg_probability_percent == 1.5
+        assert agg.risk_category == "Low"
+        assert agg.model_count == 1
+        assert len(agg.individual_scores) == 1
+class TestNormalizeCancerType:
+    """Test cancer type normalization."""
+    def test_normalize_with_cancer_suffix(self):
+        """Test removing 'cancer' suffix."""
+        assert normalize_cancer_type("Breast Cancer") == "breast"
+        assert normalize_cancer_type("Lung cancer") == "lung"
+        assert normalize_cancer_type("PROSTATE CANCER") == "prostate"
+    def test_normalize_without_cancer_suffix(self):
+        """Test normalization without 'cancer' suffix."""
+        assert normalize_cancer_type("Breast") == "breast"
+        assert normalize_cancer_type("LUNG") == "lung"
+        assert normalize_cancer_type("Prostate") == "prostate"
+    def test_normalize_with_whitespace(self):
+        """Test trimming whitespace."""
+        assert normalize_cancer_type("  Breast Cancer  ") == "breast"
+        assert normalize_cancer_type("Lung   cancer") == "lung"
+    def test_normalize_empty_string(self):
+        """Test empty string."""
+        assert normalize_cancer_type("") == ""
+    def test_display_cancer_type(self):
+        """Test display-friendly cancer type names."""
+        assert get_display_cancer_type("breast") == "Breast"
+        assert get_display_cancer_type("lung") == "Lung"
+        assert get_display_cancer_type("prostate") == "Prostate"
+class TestCategorizeRisk:
+    """Test risk categorization."""
+    def test_categorize_short_horizon_very_low(self):
+        """Test very low risk for short time horizon."""
+        assert categorize_risk(0.3, 5.0) == "Very Low"
+    def test_categorize_short_horizon_low(self):
+        """Test low risk for short time horizon."""
+        assert categorize_risk(1.0, 5.0) == "Low"
+    def test_categorize_short_horizon_moderate(self):
+        """Test moderate risk for short time horizon."""
+        assert categorize_risk(2.0, 5.0) == "Moderate"
+    def test_categorize_short_horizon_moderately_high(self):
+        """Test moderately high risk for short time horizon."""
+        assert categorize_risk(4.0, 5.0) == "Moderately High"
+    def test_categorize_short_horizon_high(self):
+        """Test high risk for short time horizon."""
+        assert categorize_risk(6.0, 5.0) == "High"
+    def test_categorize_long_horizon_very_low(self):
+        """Test very low risk for long time horizon."""
+        assert categorize_risk(0.5, 10.0) == "Very Low"
+    def test_categorize_long_horizon_low(self):
+        """Test low risk for long time horizon."""
+        assert categorize_risk(2.0, 10.0) == "Low"
+    def test_categorize_long_horizon_moderate(self):
+        """Test moderate risk for long time horizon."""
+        assert categorize_risk(5.0, 10.0) == "Moderate"
+    def test_categorize_long_horizon_moderately_high(self):
+        """Test moderately high risk for long time horizon."""
+        assert categorize_risk(10.0, 10.0) == "Moderately High"
+    def test_categorize_long_horizon_high(self):
+        """Test high risk for long time horizon."""
+        assert categorize_risk(20.0, 10.0) == "High"
+    def test_categorize_lifetime_risk(self):
+        """Test categorization for lifetime risk."""
+        assert categorize_risk(0.5, 79.0) == "Very Low"
+        assert categorize_risk(2.0, 79.0) == "Low"
+        assert categorize_risk(5.0, 79.0) == "Moderate"
+        assert categorize_risk(12.0, 79.0) == "Moderately High"
+        assert categorize_risk(20.0, 79.0) == "High"

tests/test_risk_models/test_gail_model.py CHANGED Viewed

@@ -362,6 +362,6 @@ class TestGailModel:
             "Gail Model" in self.model.description()
             or "BCRAT" in self.model.description()
         )
-        assert "1.66" in self.model.interpretation()
         assert isinstance(self.model.references(), list)
         assert len(self.model.references()) > 0

             "Gail Model" in self.model.description()
             or "BCRAT" in self.model.description()
         )
+        assert "1.67" in self.model.interpretation()
         assert isinstance(self.model.references(), list)
         assert len(self.model.references()) > 0