jeuko commited on
Commit
94a0f4c
·
verified ·
1 Parent(s): 96dba57

Sync from GitHub (main)

Browse files
configs/output_format/assessment.yaml CHANGED
@@ -3,7 +3,7 @@ format_instructions: |
3
  - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
4
  - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
5
  - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
6
- - The ONLY allowed values for the "category" field in "identified_risk_factors" and "contributing_factors" objects are: {allowed_categories}. You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category.
7
  - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
8
 
9
  The output must be formatted as a valid JSON instance with the following structure:
@@ -13,7 +13,7 @@ format_instructions: |
13
  "identified_risk_factors": [
14
  {{
15
  "description": "string - A human-readable description of the risk factor identified from the user's profile.",
16
- "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. "
17
  }}
18
  ],
19
  "llm_risk_interpretations": [
@@ -25,8 +25,8 @@ format_instructions: |
25
  "contributing_factors": [
26
  {{
27
  "description": "string - A human-readable description of the risk factor",
28
- "category": "string - One of the predefined categories (Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, Other). You MUST prioritize placing factors into the primary, specific categories. The 'Other' category is to be used ONLY as a last resort when a factor is clinically significant but genuinely cannot be classified into any other available category. ",
29
- "strength": "string - The assessed contribution strength (Major, Moderate, Minor)"
30
  }}
31
  ]
32
  }}
 
3
  - Return ONLY valid JSON. Do not include any explanatory text, disclaimers, or additional content before or after the JSON.
4
  - The `RISK SCORES (GROUND TRUTH)` section contains validated risk scores. You MUST provide interpretations and explanations for these scores, NOT generate new risk levels.
5
  - Provide a diagnostic recommendation for EVERY diagnostic protocol provided in the `DIAGNOSTIC PROTOCOLS` (i.e. {diagnostic_protocols}).
6
+ - **STRICT CATEGORY REQUIREMENT**: For ALL "category" fields in "identified_risk_factors" and "contributing_factors", you MUST use EXACTLY one of these values: {allowed_categories}. DO NOT create new categories like "Symptom", "Dermatologic", or any other value not in this list. Map symptoms to "Clinical Observation", dermatologic factors to "Lifestyle" or "Demographics" as appropriate, and use "Other" ONLY as a last resort when no other category fits.
7
  - The ONLY allowed values for the "strength" field in "contributing_factors" objects are: {allowed_strengths}.
8
 
9
  The output must be formatted as a valid JSON instance with the following structure:
 
13
  "identified_risk_factors": [
14
  {{
15
  "description": "string - A human-readable description of the risk factor identified from the user's profile.",
16
+ "category": "string - MUST be EXACTLY one of: Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, or Other. DO NOT use any other values like 'Symptom' or 'Dermatologic'. Map symptoms to 'Clinical Observation'. Use 'Other' only as last resort."
17
  }}
18
  ],
19
  "llm_risk_interpretations": [
 
25
  "contributing_factors": [
26
  {{
27
  "description": "string - A human-readable description of the risk factor",
28
+ "category": "string - MUST be EXACTLY one of: Lifestyle, Family History, Personal Medical History, Demographics, Female-Specific, Clinical Observation, or Other. DO NOT use any other values like 'Symptom' or 'Dermatologic'. Map symptoms to 'Clinical Observation'. Use 'Other' only as last resort.",
29
+ "strength": "string - MUST be exactly one of: Major, Moderate, or Minor"
30
  }}
31
  ]
32
  }}
prompts/instruction/assessment.md CHANGED
@@ -18,6 +18,6 @@ Your role is to:
18
 
19
  6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
20
 
21
- 7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly.
22
 
23
  **Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.
 
18
 
19
  6. **Maintain consistency**: Ensure your explanations and recommendations align with the calculated risk scores and established guidelines. Do not contradict the quantitative scores.
20
 
21
+ 7. **Structure the output**: Generate the JSON response following the `FORMAT INSTRUCTIONS` exactly. **CRITICAL**: When categorizing risk factors, you MUST use ONLY the exact category names provided in the FORMAT INSTRUCTIONS. Do NOT invent new categories like "Symptom", "Dermatologic", or any other value. If you encounter symptoms, categorize them as "Clinical Observation". If unsure, use "Other" rather than creating a new category.
22
 
23
  **Critical**: You are an interpreter and explainer of risk data, NOT a risk calculator. The validated risk models have already determined the risk levels - your job is to make them understandable and actionable for the patient.
scripts/benchmark_llm.py ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM Benchmarking Script
2
+
3
+ Measures token usage, costs, and timing for cancer risk assessments
4
+ across different LLM backends.
5
+ """
6
+
7
+ import argparse
8
+ import csv
9
+ import functools
10
+ import os
11
+ import time
12
+ from collections import defaultdict
13
+ from collections.abc import Callable
14
+ from dataclasses import dataclass
15
+ from datetime import datetime
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import requests
20
+ import yaml
21
+ from dotenv import load_dotenv
22
+ from langchain_community.callbacks.manager import get_openai_callback
23
+ from loguru import logger
24
+ from reportlab.lib import colors
25
+ from reportlab.lib.pagesizes import letter
26
+ from reportlab.lib.styles import getSampleStyleSheet
27
+ from reportlab.lib.units import inch
28
+ from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
29
+
30
+ from sentinel.config import AppConfig, ModelConfig, ResourcePaths
31
+ from sentinel.factory import SentinelFactory
32
+ from sentinel.utils import load_user_file
33
+
34
+ load_dotenv()
35
+
36
+
37
+ @dataclass
38
+ class ModelPricing:
39
+ """Pricing per 1 million tokens in USD.
40
+
41
+ Attributes:
42
+ input_per_million: Cost per 1M input tokens (USD)
43
+ output_per_million: Cost per 1M output tokens (USD)
44
+ """
45
+
46
+ input_per_million: float
47
+ output_per_million: float
48
+
49
+
50
+ @dataclass
51
+ class BenchmarkModelConfig:
52
+ """Model configuration for benchmarking.
53
+
54
+ Attributes:
55
+ provider: Provider key (google, openai, local)
56
+ model_name: Model identifier used by the provider
57
+ pricing: Pricing information per 1M tokens
58
+ """
59
+
60
+ provider: str
61
+ model_name: str
62
+ pricing: ModelPricing
63
+
64
+
65
+ # Sources:
66
+ # - https://ai.google.dev/pricing
67
+ # - https://openai.com/api/pricing/
68
+ BENCHMARK_MODELS = [
69
+ BenchmarkModelConfig(
70
+ provider="google",
71
+ model_name="gemini-2.5-pro",
72
+ pricing=ModelPricing(input_per_million=1.25, output_per_million=10.00),
73
+ ),
74
+ BenchmarkModelConfig(
75
+ provider="google",
76
+ model_name="gemini-2.5-flash-lite",
77
+ pricing=ModelPricing(input_per_million=0.1, output_per_million=0.4),
78
+ ),
79
+ ]
80
+
81
+
82
+ @dataclass
83
+ class TokenUsage:
84
+ """Token usage statistics for a single assessment.
85
+
86
+ Attributes:
87
+ input_tokens: Tokens in the prompt/input
88
+ output_tokens: Tokens in the model's response
89
+ """
90
+
91
+ input_tokens: int
92
+ output_tokens: int
93
+
94
+ @property
95
+ def total_tokens(self) -> int:
96
+ """Total tokens used.
97
+
98
+ Returns:
99
+ Sum of input and output tokens
100
+ """
101
+ return self.input_tokens + self.output_tokens
102
+
103
+
104
+ @dataclass
105
+ class BenchmarkResult:
106
+ """Results from a single model/profile benchmark run.
107
+
108
+ Attributes:
109
+ model_name: Name of the model
110
+ provider: Provider key (openai, google, local)
111
+ profile_name: Name of the profile
112
+ token_usage: Token usage statistics
113
+ cost: Cost in USD
114
+ assessment_time_seconds: Time taken for assessment in seconds
115
+ """
116
+
117
+ model_name: str
118
+ provider: str
119
+ profile_name: str
120
+ token_usage: TokenUsage
121
+ cost: float
122
+ assessment_time_seconds: float
123
+
124
+
125
+ def calculate_cost(token_usage: TokenUsage, pricing: ModelPricing) -> float:
126
+ """Calculate cost based on token usage and model pricing.
127
+
128
+ Args:
129
+ token_usage: Token usage statistics
130
+ pricing: Model pricing per 1M tokens
131
+
132
+ Returns:
133
+ Cost in USD
134
+ """
135
+ input_cost = (token_usage.input_tokens / 1_000_000) * pricing.input_per_million
136
+ output_cost = (token_usage.output_tokens / 1_000_000) * pricing.output_per_million
137
+ return input_cost + output_cost
138
+
139
+
140
+ def validate_directory_input(func: Callable[..., Any]) -> Callable[..., Any]:
141
+ """Decorator to validate directory argument.
142
+
143
+ Args:
144
+ func: Function to decorate
145
+
146
+ Returns:
147
+ Decorated function that validates directory input
148
+ """
149
+
150
+ @functools.wraps(func)
151
+ def wrapper(directory: Path, *args: Any, **kwargs: Any) -> Any:
152
+ """Wrapper function to validate directory input.
153
+
154
+ Args:
155
+ directory: Path to directory to validate
156
+ *args: Additional positional arguments
157
+ **kwargs: Additional keyword arguments
158
+
159
+ Returns:
160
+ Result of the wrapped function
161
+
162
+ Raises:
163
+ FileNotFoundError: If the directory does not exist
164
+ NotADirectoryError: If the path is not a directory
165
+ ValueError: If the directory is empty
166
+ """
167
+ if not directory.exists():
168
+ raise FileNotFoundError(f"Directory not found: {directory}")
169
+ if not directory.is_dir():
170
+ raise NotADirectoryError(f"Not a directory: {directory}")
171
+ if not any(directory.iterdir()):
172
+ raise ValueError(f"Directory is empty: {directory}")
173
+ return func(directory, *args, **kwargs)
174
+
175
+ return wrapper
176
+
177
+
178
+ def get_available_models() -> list[BenchmarkModelConfig]:
179
+ """Get list of available models for benchmarking.
180
+
181
+ Returns:
182
+ List of configured benchmark models
183
+ """
184
+ return BENCHMARK_MODELS
185
+
186
+
187
+ @validate_directory_input
188
+ def load_benchmark_profiles(benchmark_dir: Path) -> list[dict[str, Any]]:
189
+ """Load benchmark profiles.
190
+
191
+ Args:
192
+ benchmark_dir: Directory containing benchmark YAML files
193
+
194
+ Returns:
195
+ List of dicts with 'name' and 'path' keys
196
+ """
197
+ profiles = []
198
+ for yaml_file in sorted(benchmark_dir.glob("*.yaml")):
199
+ profiles.append({"name": yaml_file.stem, "path": yaml_file})
200
+ return profiles
201
+
202
+
203
+ def create_knowledge_base_paths(workspace_root: Path) -> ResourcePaths:
204
+ """Build resource path configuration from workspace root.
205
+
206
+ Args:
207
+ workspace_root: Path to workspace root directory
208
+
209
+ Returns:
210
+ ResourcePaths configuration object
211
+ """
212
+ return ResourcePaths(
213
+ persona=workspace_root / "prompts/persona/default.md",
214
+ instruction_assessment=workspace_root / "prompts/instruction/assessment.md",
215
+ instruction_conversation=workspace_root / "prompts/instruction/conversation.md",
216
+ output_format_assessment=workspace_root
217
+ / "configs/output_format/assessment.yaml",
218
+ output_format_conversation=workspace_root
219
+ / "configs/output_format/conversation.yaml",
220
+ cancer_modules_dir=workspace_root / "configs/knowledge_base/cancer_modules",
221
+ dx_protocols_dir=workspace_root / "configs/knowledge_base/dx_protocols",
222
+ )
223
+
224
+
225
+ def validate_backend(provider: str, model_name: str) -> None:
226
+ """Validate that backend is accessible.
227
+
228
+ Args:
229
+ provider: Provider key (e.g. "openai", "google", "local")
230
+ model_name: Model identifier
231
+
232
+ Raises:
233
+ ValueError: If the backend is not accessible
234
+ """
235
+ if provider == "openai":
236
+ if not os.getenv("OPENAI_API_KEY"):
237
+ raise ValueError("OPENAI_API_KEY not set")
238
+ elif provider == "google":
239
+ if not os.getenv("GOOGLE_API_KEY"):
240
+ raise ValueError("GOOGLE_API_KEY not set")
241
+ elif provider == "local":
242
+ ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
243
+ response = requests.get(f"{ollama_base_url}/api/tags", timeout=2)
244
+ if response.status_code != 200:
245
+ raise ValueError("Ollama server not responding")
246
+ models = response.json().get("models", [])
247
+ model_names = [m.get("name") for m in models]
248
+ if model_name not in model_names:
249
+ raise ValueError(f"Model not found. Run: ollama pull {model_name}")
250
+
251
+
252
+ def run_assessment(
253
+ model_config: BenchmarkModelConfig, profile_path: Path
254
+ ) -> BenchmarkResult:
255
+ """Run a single assessment and capture token usage.
256
+
257
+ Args:
258
+ model_config: Model configuration with pricing
259
+ profile_path: Path to profile YAML file
260
+
261
+ Returns:
262
+ BenchmarkResult with cost and token usage
263
+ """
264
+ validate_backend(model_config.provider, model_config.model_name)
265
+
266
+ workspace_root = Path(__file__).parent.parent
267
+
268
+ with open(workspace_root / "configs/config.yaml") as f:
269
+ default_config = yaml.safe_load(f)
270
+
271
+ app_config = AppConfig(
272
+ model=ModelConfig(
273
+ provider=model_config.provider,
274
+ model_name=model_config.model_name,
275
+ ),
276
+ knowledge_base_paths=create_knowledge_base_paths(workspace_root),
277
+ selected_cancer_modules=default_config["knowledge_base"]["cancer_modules"],
278
+ selected_dx_protocols=default_config["knowledge_base"]["dx_protocols"],
279
+ )
280
+
281
+ factory = SentinelFactory(app_config)
282
+ conversation = factory.create_conversation_manager()
283
+ user = load_user_file(str(profile_path))
284
+
285
+ start_time = time.perf_counter()
286
+ with get_openai_callback() as cb:
287
+ conversation.initial_assessment(user)
288
+ input_tokens = cb.prompt_tokens
289
+ output_tokens = cb.completion_tokens
290
+ end_time = time.perf_counter()
291
+
292
+ assessment_time = end_time - start_time
293
+ token_usage = TokenUsage(input_tokens, output_tokens)
294
+ cost = calculate_cost(token_usage, model_config.pricing)
295
+
296
+ return BenchmarkResult(
297
+ model_name=model_config.model_name,
298
+ provider=model_config.provider,
299
+ profile_name=profile_path.stem,
300
+ token_usage=token_usage,
301
+ cost=cost,
302
+ assessment_time_seconds=assessment_time,
303
+ )
304
+
305
+
306
+ def print_results(results: list[BenchmarkResult]) -> None:
307
+ """Print formatted results to console.
308
+
309
+ Args:
310
+ results: List of benchmark results
311
+ """
312
+ by_model = defaultdict(list)
313
+ for result in results:
314
+ by_model[result.model_name].append(result)
315
+
316
+ lines = []
317
+ lines.append("\n╔══════════════════════════════════════════════════════════════╗")
318
+ lines.append("║ LLM Cost Benchmark Results ║")
319
+ lines.append("╚═══════════════════════════════════���══════════════════════════╝\n")
320
+
321
+ for model_name, model_results in sorted(by_model.items()):
322
+ provider = model_results[0].provider
323
+ lines.append(f"Model: {model_name} ({provider})")
324
+
325
+ num_results = len(model_results)
326
+ avg_cost = sum(result.cost for result in model_results) / num_results
327
+ avg_input = (
328
+ sum(result.token_usage.input_tokens for result in model_results)
329
+ / num_results
330
+ )
331
+ avg_output = (
332
+ sum(result.token_usage.output_tokens for result in model_results)
333
+ / num_results
334
+ )
335
+ avg_time = (
336
+ sum(result.assessment_time_seconds for result in model_results)
337
+ / num_results
338
+ )
339
+
340
+ for result_index, result in enumerate(model_results):
341
+ is_last = result_index == num_results - 1
342
+ prefix = "└─" if is_last else "├─"
343
+ indent = " " if is_last else "│ "
344
+ lines.append(f"{prefix} Profile: {result.profile_name}")
345
+ lines.append(f"{indent}├─ Input: {result.token_usage.input_tokens:,}")
346
+ lines.append(f"{indent}├─ Output: {result.token_usage.output_tokens:,}")
347
+ lines.append(f"{indent}├─ Cost: ${result.cost:.4f}")
348
+ lines.append(f"{indent}└─ Time: {result.assessment_time_seconds:.2f}s")
349
+
350
+ lines.append(f"└─ Average: ${avg_cost:.4f}")
351
+ lines.append(f" ├─ Tokens: {avg_input:,.0f} input, {avg_output:,.0f} output")
352
+ lines.append(f" └─ Time: {avg_time:.2f}s\n")
353
+
354
+ lines.append("═══════════════════════════════════════════════════════════════")
355
+ lines.append("Summary - Model Ranking (Cheapest to Most Expensive)")
356
+ lines.append("───────────────────────────────────────────────────────────────")
357
+
358
+ model_averages = sorted(
359
+ (
360
+ (
361
+ model_name,
362
+ sum(result.cost for result in model_results) / len(model_results),
363
+ )
364
+ for model_name, model_results in by_model.items()
365
+ ),
366
+ key=lambda model_avg_tuple: model_avg_tuple[1],
367
+ )
368
+
369
+ for rank, (model_name, avg_cost) in enumerate(model_averages, 1):
370
+ prefix = (
371
+ "🥇"
372
+ if rank == 1
373
+ else "🥈"
374
+ if rank == 2
375
+ else "🥉"
376
+ if rank == 3
377
+ else f"{rank}."
378
+ )
379
+ lines.append(f"{prefix:<4} {model_name:<25} ${avg_cost:.4f}")
380
+
381
+ lines.append("\n═══════════════════════════════════════════════════════════════")
382
+ lines.append("Summary - Timing Performance (Fastest to Slowest)")
383
+ lines.append("───────────────────────────────────────────────────────────────")
384
+
385
+ model_timing = sorted(
386
+ (
387
+ (
388
+ model_name,
389
+ sum(result.assessment_time_seconds for result in model_results)
390
+ / len(model_results),
391
+ )
392
+ for model_name, model_results in by_model.items()
393
+ ),
394
+ key=lambda model_time_tuple: model_time_tuple[1],
395
+ )
396
+
397
+ for rank, (model_name, avg_time) in enumerate(model_timing, 1):
398
+ prefix = (
399
+ "🥇"
400
+ if rank == 1
401
+ else "🥈"
402
+ if rank == 2
403
+ else "🥉"
404
+ if rank == 3
405
+ else f"{rank}."
406
+ )
407
+ lines.append(f"{prefix:<4} {model_name:<25} {avg_time:.2f}s")
408
+
409
+ lines.append(f"\nTotal: {len(results)} assessments across {len(by_model)} models")
410
+ lines.append("═══════════════════════════════════════════════════════════════\n")
411
+
412
+ logger.info("\n".join(lines))
413
+
414
+
415
+ def export_to_csv(results: list[BenchmarkResult], output_path: Path) -> None:
416
+ """Export results to CSV file.
417
+
418
+ Args:
419
+ results: List of benchmark results
420
+ output_path: Path to output CSV file
421
+ """
422
+ with open(output_path, "w", newline="") as f:
423
+ writer = csv.writer(f)
424
+ writer.writerow(
425
+ [
426
+ "model_name",
427
+ "provider",
428
+ "profile_name",
429
+ "input_tokens",
430
+ "output_tokens",
431
+ "total_tokens",
432
+ "cost_usd",
433
+ "assessment_time_seconds",
434
+ ]
435
+ )
436
+ for result in results:
437
+ writer.writerow(
438
+ [
439
+ result.model_name,
440
+ result.provider,
441
+ result.profile_name,
442
+ result.token_usage.input_tokens,
443
+ result.token_usage.output_tokens,
444
+ result.token_usage.total_tokens,
445
+ f"{result.cost:.6f}",
446
+ f"{result.assessment_time_seconds:.3f}",
447
+ ]
448
+ )
449
+ logger.success(f"Results exported to: {output_path}")
450
+
451
+
452
+ def export_to_pdf(
453
+ results: list[BenchmarkResult],
454
+ output_path: Path,
455
+ ) -> None:
456
+ """Export results to PDF file with formatted table.
457
+
458
+ Args:
459
+ results: List of benchmark results
460
+ output_path: Path to output PDF file
461
+ """
462
+ doc = SimpleDocTemplate(
463
+ str(output_path),
464
+ pagesize=letter,
465
+ leftMargin=0.75 * inch,
466
+ rightMargin=0.75 * inch,
467
+ topMargin=0.75 * inch,
468
+ bottomMargin=0.75 * inch,
469
+ )
470
+
471
+ elements = []
472
+ styles = getSampleStyleSheet()
473
+
474
+ title = Paragraph(
475
+ "<b>LLM Benchmark Report</b>",
476
+ styles["Title"],
477
+ )
478
+ elements.append(title)
479
+ elements.append(Spacer(1, 0.2 * inch))
480
+
481
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
482
+ timestamp_text = Paragraph(
483
+ f"Generated: {timestamp}",
484
+ styles["Normal"],
485
+ )
486
+ elements.append(timestamp_text)
487
+ elements.append(Spacer(1, 0.3 * inch))
488
+
489
+ by_model = defaultdict(list)
490
+ for result in results:
491
+ by_model[result.model_name].append(result)
492
+
493
+ pricing_lookup = {model.model_name: model.pricing for model in BENCHMARK_MODELS}
494
+
495
+ results_desc = Paragraph(
496
+ "Average cost and timing for running a single cancer risk assessment given a completed patient questionnaire.",
497
+ styles["Normal"],
498
+ )
499
+ elements.append(results_desc)
500
+ elements.append(Spacer(1, 0.2 * inch))
501
+
502
+ table_data = [
503
+ [
504
+ "Model",
505
+ "Provider",
506
+ "Avg Cost\nper Report",
507
+ "Input Price\n(per 1M)",
508
+ "Output Price\n(per 1M)",
509
+ "Avg Input\nTokens",
510
+ "Avg Output\nTokens",
511
+ "Avg Time\n(seconds)",
512
+ ]
513
+ ]
514
+
515
+ # Sort by average cost (cheapest first)
516
+ sorted_models = sorted(
517
+ by_model.items(),
518
+ key=lambda model_tuple: sum(result.cost for result in model_tuple[1])
519
+ / len(model_tuple[1]),
520
+ )
521
+
522
+ for model_name, model_results in sorted_models:
523
+ provider = model_results[0].provider
524
+ num_results = len(model_results)
525
+ avg_cost = sum(result.cost for result in model_results) / num_results
526
+ avg_input = (
527
+ sum(result.token_usage.input_tokens for result in model_results)
528
+ / num_results
529
+ )
530
+ avg_output = (
531
+ sum(result.token_usage.output_tokens for result in model_results)
532
+ / num_results
533
+ )
534
+ avg_time = (
535
+ sum(result.assessment_time_seconds for result in model_results)
536
+ / num_results
537
+ )
538
+
539
+ pricing = pricing_lookup.get(model_name)
540
+ input_price = f"${pricing.input_per_million:.2f}" if pricing else "N/A"
541
+ output_price = f"${pricing.output_per_million:.2f}" if pricing else "N/A"
542
+
543
+ table_data.append(
544
+ [
545
+ model_name,
546
+ provider,
547
+ f"${avg_cost:.4f}",
548
+ input_price,
549
+ output_price,
550
+ f"{avg_input:,.0f}",
551
+ f"{avg_output:,.0f}",
552
+ f"{avg_time:.1f}",
553
+ ]
554
+ )
555
+
556
+ table = Table(
557
+ table_data,
558
+ colWidths=[
559
+ 1.4 * inch,
560
+ 0.75 * inch,
561
+ 0.8 * inch,
562
+ 0.75 * inch,
563
+ 0.75 * inch,
564
+ 0.7 * inch,
565
+ 0.7 * inch,
566
+ 0.65 * inch,
567
+ ],
568
+ )
569
+
570
+ table_style = TableStyle(
571
+ [
572
+ # Header styling
573
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4A90E2")),
574
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
575
+ ("ALIGN", (0, 0), (-1, 0), "CENTER"),
576
+ ("VALIGN", (0, 0), (-1, 0), "MIDDLE"),
577
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
578
+ ("FONTSIZE", (0, 0), (-1, 0), 8),
579
+ ("BOTTOMPADDING", (0, 0), (-1, 0), 10),
580
+ ("TOPPADDING", (0, 0), (-1, 0), 10),
581
+ # Data rows styling
582
+ ("BACKGROUND", (0, 1), (-1, -1), colors.beige),
583
+ ("TEXTCOLOR", (0, 1), (-1, -1), colors.black),
584
+ ("ALIGN", (0, 1), (1, -1), "LEFT"),
585
+ ("ALIGN", (2, 1), (-1, -1), "CENTER"),
586
+ ("VALIGN", (0, 1), (-1, -1), "MIDDLE"),
587
+ ("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
588
+ ("FONTSIZE", (0, 1), (-1, -1), 8),
589
+ ("TOPPADDING", (0, 1), (-1, -1), 7),
590
+ ("BOTTOMPADDING", (0, 1), (-1, -1), 7),
591
+ # Alternating row colors
592
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.beige, colors.lightgrey]),
593
+ # Grid
594
+ ("GRID", (0, 0), (-1, -1), 1, colors.black),
595
+ ]
596
+ )
597
+
598
+ table.setStyle(table_style)
599
+ elements.append(table)
600
+ elements.append(Spacer(1, 0.3 * inch))
601
+
602
+ doc.build(elements)
603
+ logger.success(f"PDF report generated: {output_path}")
604
+
605
+
606
+ def parse_args() -> argparse.Namespace:
607
+ """Parse command-line arguments.
608
+
609
+ Returns:
610
+ Parsed command-line arguments
611
+ """
612
+ workspace_root = Path(__file__).parent.parent
613
+
614
+ parser = argparse.ArgumentParser(description="Benchmark LLM costs")
615
+ parser.add_argument(
616
+ "--benchmark-dir",
617
+ type=Path,
618
+ default=workspace_root / "examples/benchmark",
619
+ help="Benchmark profile directory",
620
+ )
621
+ parser.add_argument(
622
+ "--models",
623
+ nargs="+",
624
+ help="Specific models to test (by name)",
625
+ )
626
+ parser.add_argument(
627
+ "--profiles",
628
+ nargs="+",
629
+ help="Specific profiles to test",
630
+ )
631
+ parser.add_argument(
632
+ "--output",
633
+ type=Path,
634
+ help="Export to CSV",
635
+ )
636
+ return parser.parse_args()
637
+
638
+
639
+ def main() -> None:
640
+ """Main entry point.
641
+
642
+ Raises:
643
+ ValueError: If no matching models or profiles found
644
+ """
645
+ args = parse_args()
646
+
647
+ logger.info("Loading benchmark configuration...")
648
+ all_models = get_available_models()
649
+
650
+ logger.info("Loading profiles...")
651
+ all_profiles = load_benchmark_profiles(args.benchmark_dir)
652
+
653
+ if args.models:
654
+ all_models = [model for model in all_models if model.model_name in args.models]
655
+ if not all_models:
656
+ raise ValueError(f"No matching models: {args.models}")
657
+
658
+ if args.profiles:
659
+ all_profiles = [
660
+ profile for profile in all_profiles if profile["name"] in args.profiles
661
+ ]
662
+ if not all_profiles:
663
+ raise ValueError(f"No matching profiles: {args.profiles}")
664
+
665
+ logger.info(
666
+ f"\nRunning {len(all_models)} model(s) x {len(all_profiles)} profile(s)...\n"
667
+ )
668
+
669
+ results = []
670
+ for model_index, model in enumerate(all_models, 1):
671
+ for profile in all_profiles:
672
+ logger.info(
673
+ f"[{model_index}/{len(all_models)}] {model.model_name}: {profile['name']}"
674
+ )
675
+ result = run_assessment(model, profile["path"])
676
+ results.append(result)
677
+
678
+ print_results(results)
679
+
680
+ # Generate PDF report with timestamp
681
+ workspace_root = Path(__file__).parent.parent
682
+ outputs_dir = workspace_root / "outputs"
683
+ outputs_dir.mkdir(exist_ok=True)
684
+
685
+ timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
686
+ pdf_path = outputs_dir / f"llm_benchmark_{timestamp}.pdf"
687
+
688
+ export_to_pdf(results, pdf_path)
689
+
690
+ if args.output:
691
+ export_to_csv(results, args.output)
692
+
693
+
694
+ if __name__ == "__main__":
695
+ main()
src/sentinel/conversation.py CHANGED
@@ -8,7 +8,10 @@ from langchain_core.runnables.base import Runnable
8
 
9
  from .llm_service import extract_thinking
10
  from .models import ConversationResponse, InitialAssessment
11
- from .risk_aggregation import format_scores_for_llm, group_scores_by_cancer_type
 
 
 
12
  from .user_input import UserInput
13
 
14
 
@@ -58,8 +61,9 @@ class ConversationManager:
58
  if risk_scores is None:
59
  # Try to get from user if it has risk_scores attribute
60
  risk_scores = getattr(user, "risk_scores", [])
 
 
61
  grouped_scores = group_scores_by_cancer_type(risk_scores)
62
- formatted_scores = format_scores_for_llm(grouped_scores)
63
 
64
  # Invoke LLM with scores as separate context
65
  result = self.structured_chain.invoke(
 
8
 
9
  from .llm_service import extract_thinking
10
  from .models import ConversationResponse, InitialAssessment
11
+ from .risk_aggregation import (
12
+ format_scores_with_aggregation,
13
+ group_scores_by_cancer_type,
14
+ )
15
  from .user_input import UserInput
16
 
17
 
 
61
  if risk_scores is None:
62
  # Try to get from user if it has risk_scores attribute
63
  risk_scores = getattr(user, "risk_scores", [])
64
+
65
+ formatted_scores = format_scores_with_aggregation(risk_scores)
66
  grouped_scores = group_scores_by_cancer_type(risk_scores)
 
67
 
68
  # Invoke LLM with scores as separate context
69
  result = self.structured_chain.invoke(
src/sentinel/models.py CHANGED
@@ -1591,6 +1591,20 @@ class RiskScore(SentinelBaseModel):
1591
  references: list[str] | None = Field(
1592
  default=None, description="References to the risk score"
1593
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
 
1595
 
1596
  # ---------------------------------------------------------------------------
 
1591
  references: list[str] | None = Field(
1592
  default=None, description="References to the risk score"
1593
  )
1594
+ probability_percent: float | None = Field(
1595
+ default=None,
1596
+ description="Numeric probability as percentage (0-100) if score is probability-based",
1597
+ ge=0,
1598
+ le=100,
1599
+ )
1600
+ time_horizon_years: float | None = Field(
1601
+ default=None,
1602
+ description="Time horizon in years for probability (e.g., 5, 10, lifetime=79)",
1603
+ )
1604
+ score_type: Literal["probability", "categorical", "not_applicable"] = Field(
1605
+ default="probability",
1606
+ description="Type of score output",
1607
+ )
1608
 
1609
 
1610
  # ---------------------------------------------------------------------------
src/sentinel/probability_aggregation.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Probability aggregation utilities for risk scores.
2
+
3
+ This module provides functions to aggregate probability-based risk scores by cancer type
4
+ and time horizon, and to separate probability-based scores from categorical/diagnostic scores.
5
+ """
6
+
7
+ import re
8
+ from collections import defaultdict
9
+ from dataclasses import dataclass
10
+
11
+ from sentinel.models import RiskScore
12
+
13
+
14
+ def normalize_cancer_type(cancer_type: str) -> str:
15
+ """Normalize cancer type name for consistent grouping.
16
+
17
+ Removes "cancer" suffix, trims whitespace, and converts to lowercase.
18
+
19
+ Args:
20
+ cancer_type: Raw cancer type string (e.g., "Breast Cancer", "breast", "BREAST").
21
+
22
+ Returns:
23
+ Normalized cancer type (e.g., "breast").
24
+
25
+ Examples:
26
+ >>> normalize_cancer_type("Breast Cancer")
27
+ 'breast'
28
+ >>> normalize_cancer_type("Lung cancer")
29
+ 'lung'
30
+ >>> normalize_cancer_type("PROSTATE")
31
+ 'prostate'
32
+ """
33
+ if not cancer_type:
34
+ return ""
35
+
36
+ # Convert to lowercase and strip whitespace
37
+ normalized = cancer_type.strip().lower()
38
+
39
+ # Remove "cancer" suffix (with or without space before it)
40
+ normalized = re.sub(r"\s*cancer\s*$", "", normalized)
41
+
42
+ # Clean up any remaining whitespace
43
+ normalized = normalized.strip()
44
+
45
+ return normalized
46
+
47
+
48
+ def get_display_cancer_type(cancer_type: str) -> str:
49
+ """Get display-friendly cancer type name.
50
+
51
+ Capitalizes first letter and removes "cancer" suffix.
52
+
53
+ Args:
54
+ cancer_type: Normalized cancer type string.
55
+
56
+ Returns:
57
+ Display-friendly cancer type name.
58
+
59
+ Examples:
60
+ >>> get_display_cancer_type("breast")
61
+ 'Breast'
62
+ >>> get_display_cancer_type("lung")
63
+ 'Lung'
64
+ """
65
+ return cancer_type.title()
66
+
67
+
68
+ def categorize_risk(probability_percent: float, time_horizon_years: float) -> str:
69
+ """Categorize risk level based on probability and time horizon.
70
+
71
+ Args:
72
+ probability_percent: Probability as percentage (0-100).
73
+ time_horizon_years: Time horizon in years.
74
+
75
+ Returns:
76
+ Risk category string.
77
+ """
78
+ # For very short horizons (< 10 years), use stricter thresholds
79
+ if time_horizon_years < 10:
80
+ if probability_percent < 0.5:
81
+ return "Very Low"
82
+ elif probability_percent < 1.5:
83
+ return "Low"
84
+ elif probability_percent < 3.0:
85
+ return "Moderate"
86
+ elif probability_percent < 5.0:
87
+ return "Moderately High"
88
+ else:
89
+ return "High"
90
+ # For 10-year and lifetime risks, use higher thresholds
91
+ else:
92
+ if probability_percent < 1.0:
93
+ return "Very Low"
94
+ elif probability_percent < 3.0:
95
+ return "Low"
96
+ elif probability_percent < 7.0:
97
+ return "Moderate"
98
+ elif probability_percent < 15.0:
99
+ return "Moderately High"
100
+ else:
101
+ return "High"
102
+
103
+
104
+ @dataclass
105
+ class AggregatedRisk:
106
+ """Aggregated risk for a cancer type at a specific time horizon.
107
+
108
+ Attributes:
109
+ cancer_type: The cancer type being assessed (normalized, lowercase).
110
+ time_horizon_years: Time horizon in years for the aggregated probability.
111
+ avg_probability_percent: Average probability across all contributing models.
112
+ risk_category: Discrete risk category (e.g., "Low", "Moderate", "High").
113
+ model_count: Number of models that contributed to this aggregation.
114
+ individual_scores: List of original RiskScore objects that were aggregated.
115
+ """
116
+
117
+ cancer_type: str
118
+ time_horizon_years: float
119
+ avg_probability_percent: float
120
+ risk_category: str
121
+ model_count: int
122
+ individual_scores: list[RiskScore]
123
+
124
+
125
+ def aggregate_probabilities(scores: list[RiskScore]) -> list[AggregatedRisk]:
126
+ """Aggregate probability scores by cancer type and time horizon.
127
+
128
+ Groups scores by cancer type and time horizon, then calculates the average
129
+ probability for each group. Only includes scores with score_type="probability".
130
+
131
+ Args:
132
+ scores: List of RiskScore objects to aggregate.
133
+
134
+ Returns:
135
+ List of AggregatedRisk objects, sorted by cancer type then time horizon.
136
+
137
+ Example:
138
+ >>> scores = [
139
+ ... RiskScore(name="Gail", cancer_type="breast", probability_percent=1.5,
140
+ ... time_horizon_years=5.0, score_type="probability", ...),
141
+ ... RiskScore(name="BOADICEA", cancer_type="breast", probability_percent=2.0,
142
+ ... time_horizon_years=10.0, score_type="probability", ...),
143
+ ... ]
144
+ >>> aggregated = aggregate_probabilities(scores)
145
+ >>> len(aggregated)
146
+ 2 # One for 5-year breast, one for 10-year breast
147
+ """
148
+ # Group scores by (cancer_type, time_horizon_years)
149
+ grouped: dict[tuple[str, float], list[RiskScore]] = defaultdict(list)
150
+
151
+ for score in scores:
152
+ # Skip non-probability scores
153
+ if score.score_type != "probability":
154
+ continue
155
+
156
+ # Skip scores without required fields
157
+ if (
158
+ not score.cancer_type
159
+ or score.probability_percent is None
160
+ or score.time_horizon_years is None
161
+ ):
162
+ continue
163
+
164
+ # Normalize cancer type for grouping (removes "cancer" suffix, standardizes)
165
+ cancer_type = normalize_cancer_type(score.cancer_type)
166
+ key = (cancer_type, score.time_horizon_years)
167
+ grouped[key].append(score)
168
+
169
+ # Calculate averages for each group
170
+ aggregated = []
171
+ for (cancer_type, time_horizon), group_scores in grouped.items():
172
+ probabilities = [s.probability_percent for s in group_scores]
173
+ avg_probability = sum(probabilities) / len(probabilities)
174
+
175
+ # Categorize risk level
176
+ risk_category = categorize_risk(avg_probability, time_horizon)
177
+
178
+ aggregated.append(
179
+ AggregatedRisk(
180
+ cancer_type=cancer_type,
181
+ time_horizon_years=time_horizon,
182
+ avg_probability_percent=avg_probability,
183
+ risk_category=risk_category,
184
+ model_count=len(group_scores),
185
+ individual_scores=group_scores,
186
+ )
187
+ )
188
+
189
+ # Sort by cancer type, then by time horizon
190
+ aggregated.sort(key=lambda agg: (agg.cancer_type, agg.time_horizon_years))
191
+
192
+ return aggregated
193
+
194
+
195
+ def separate_score_types(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
196
+ """Separate scores into probability, categorical, and not_applicable groups.
197
+
198
+ Args:
199
+ scores: List of RiskScore objects to separate.
200
+
201
+ Returns:
202
+ Dictionary with keys "probability", "categorical", and "not_applicable",
203
+ each mapping to a list of RiskScore objects of that type.
204
+
205
+ Example:
206
+ >>> scores = [
207
+ ... RiskScore(name="Gail", score_type="probability", ...),
208
+ ... RiskScore(name="PCPT", score_type="categorical", ...),
209
+ ... RiskScore(name="Model", score="N/A: Age out of range", score_type="not_applicable", ...),
210
+ ... ]
211
+ >>> separated = separate_score_types(scores)
212
+ >>> len(separated["probability"])
213
+ 1
214
+ >>> len(separated["categorical"])
215
+ 1
216
+ >>> len(separated["not_applicable"])
217
+ 1
218
+ """
219
+ result: dict[str, list[RiskScore]] = {
220
+ "probability": [],
221
+ "categorical": [],
222
+ "not_applicable": [],
223
+ }
224
+
225
+ for score in scores:
226
+ score_type = score.score_type
227
+ if score_type in result:
228
+ result[score_type].append(score)
229
+
230
+ return result
src/sentinel/reporting.py CHANGED
@@ -36,6 +36,12 @@ from .models import (
36
  ContributionStrength,
37
  InitialAssessment,
38
  RiskFactorCategory,
 
 
 
 
 
 
39
  )
40
  from .user_input import UserInput
41
 
@@ -133,6 +139,39 @@ def _get_rec_color(level: int | None, color_format: str = "hex"):
133
  return PDF_COLORS[color_key] if color_format == "pdf" else HEX_COLORS[color_key]
134
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  def _markdown_to_reportlab(md_text: str) -> str:
137
  """Convert Markdown text to ReportLab-compatible HTML-like markup.
138
 
@@ -180,6 +219,7 @@ def generate_excel_report(
180
 
181
  _create_summary_sheet(wb, assessment, user_input)
182
  _create_risk_scores_sheet(wb, assessment)
 
183
  _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
184
  _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
185
 
@@ -466,12 +506,12 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
466
  header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
467
  wrap_alignment = Alignment(wrap_text=True, vertical="top")
468
 
469
- ws.merge_cells("A1:E1")
470
  ws["A1"] = "Calculated Risk Scores (Ground Truth)"
471
  ws["A1"].font = title_font
472
  ws["A1"].alignment = Alignment(horizontal="center")
473
 
474
- ws.merge_cells("A2:E2")
475
  ws["A2"] = "Scores calculated using validated clinical risk models"
476
  ws["A2"].alignment = Alignment(horizontal="center")
477
 
@@ -481,8 +521,17 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
481
  ws.cell(row=current_row, column=1, value="No risk scores calculated")
482
  return
483
 
484
- # Create headers
485
- headers = ["Cancer Type", "Model Name", "Score", "Interpretation", "References"]
 
 
 
 
 
 
 
 
 
486
  for col_idx, header in enumerate(headers, 1):
487
  cell = ws.cell(row=current_row, column=col_idx, value=header)
488
  cell.font = header_font
@@ -500,13 +549,30 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
500
  ws.cell(row=current_row, column=2, value=score.name)
501
  ws.cell(row=current_row, column=3, value=score.score or "N/A")
502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  interp_cell = ws.cell(
504
- row=current_row, column=4, value=score.interpretation or "N/A"
505
  )
506
  interp_cell.alignment = wrap_alignment
507
 
508
  refs = "; ".join(score.references) if score.references else "N/A"
509
- refs_cell = ws.cell(row=current_row, column=5, value=refs)
510
  refs_cell.alignment = wrap_alignment
511
 
512
  current_row += 1
@@ -515,8 +581,98 @@ def _create_risk_scores_sheet(wb: Workbook, assessment: InitialAssessment) -> No
515
  ws.column_dimensions["A"].width = 20
516
  ws.column_dimensions["B"].width = 25
517
  ws.column_dimensions["C"].width = 15
518
- ws.column_dimensions["D"].width = 50
519
- ws.column_dimensions["E"].width = 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
 
522
  def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
@@ -787,65 +943,183 @@ def generate_pdf_report(
787
  story.append(Paragraph("Assessment", heading_style))
788
  story.append(Spacer(1, SPACER_NORMAL))
789
 
790
- # --- NEW: Calculated Risk Scores Section ---
791
  if assessment.calculated_risk_scores:
792
- story.append(Paragraph("Calculated Risk Scores", subheading_style))
793
- story.append(Spacer(1, SPACER_SMALL))
794
- risk_scores_intro = """
795
- The following risk scores have been calculated using validated clinical risk models.
796
- Each score represents a quantitative assessment based on your specific profile.
797
- """
798
- story.append(Paragraph(risk_scores_intro, styles["BodyText"]))
799
- story.append(Spacer(1, SPACER_SMALL))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
 
801
- # Create table for calculated risk scores
802
- score_data = [
803
- [
804
- Paragraph(h, table_header_style)
805
- for h in ["Cancer Type", "Model", "Score", "Interpretation"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
  ]
807
- ]
808
- score_style_cmds = [
809
- (
810
- "BACKGROUND",
811
- (0, 0),
812
- (-1, 0),
813
- colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
814
- ),
815
- ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
816
- ("GRID", (0, 0), (-1, -1), 1, colors.black),
817
- ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
818
- ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
819
- ("TOPPADDING", (0, 0), (-1, -1), 4),
820
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
 
822
- # Sort by cancer type and add rows
823
- for cancer_type, scores in sorted(assessment.calculated_risk_scores.items()):
824
- for i, score in enumerate(scores):
825
- # Only show cancer type on first row for each cancer
826
- cancer_cell = Paragraph(cancer_type, table_body_style) if i == 0 else ""
827
- score_data.append(
828
  [
829
- cancer_cell,
830
  Paragraph(score.name, table_body_style),
 
831
  Paragraph(score.score or "N/A", table_body_style),
832
- Paragraph(score.interpretation or "N/A", table_body_style),
 
833
  ]
834
  )
835
 
836
- score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
837
- scaled_widths = [w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths]
838
- scores_table = Table(
839
- score_data, colWidths=scaled_widths, style=score_style_cmds, splitByRow=1
840
- )
841
- story.append(scores_table)
842
- story.append(Spacer(1, SPACER_NORMAL))
 
 
 
 
 
843
 
844
- # --- New 3-Column Summary Section ---
845
  headers = [
846
  Paragraph("<b>Overall Risk Score</b>", summary_header_style),
847
  Paragraph("<b>Risk Breakdown</b>", summary_header_style),
848
- Paragraph("<b>Dx Recommendations</b>", summary_header_style),
849
  ]
850
 
851
  gauge = ""
@@ -853,16 +1127,13 @@ def generate_pdf_report(
853
  gauge = _create_risk_gauge(assessment.overall_risk_score, width=120, height=70)
854
 
855
  risk_panel = _create_risk_breakdown_chart(
856
- assessment.risk_assessments, width=150, height=70
857
- )
858
- dx_panel = _create_dx_recommendations_summary(
859
- assessment.dx_recommendations, width=150, height=70
860
  )
861
 
862
- content_row = [gauge, risk_panel, dx_panel]
863
 
864
  summary_data = [headers, content_row]
865
- summary_table = Table(summary_data, colWidths=[2.1 * inch, 2.2 * inch, 2.2 * inch])
866
  summary_table.setStyle(
867
  TableStyle(
868
  [
@@ -1059,77 +1330,8 @@ def generate_pdf_report(
1059
  )
1060
  story.append(factor_table)
1061
 
1062
- if ra.recommended_steps:
1063
- story.append(Spacer(1, SPACER_SMALL))
1064
- story.append(Paragraph("<b>Recommended steps</b>", styles["BodyText"]))
1065
- steps = (
1066
- ra.recommended_steps
1067
- if isinstance(ra.recommended_steps, list)
1068
- else [ra.recommended_steps]
1069
- )
1070
- for step in steps:
1071
- p = Paragraph(f"• {step}", indented_style)
1072
- story.append(p)
1073
-
1074
  story.append(Spacer(1, SPACER_NORMAL))
1075
 
1076
- story.append(Paragraph("Diagnostic Recommendations", subheading_style))
1077
- story.append(Spacer(1, SPACER_SMALL))
1078
- dx_intro_text = """
1079
- Based on your risk profile, the following diagnostic tests are recommended. The recommendation
1080
- level is on a scale from 1 (lowest priority) to 5 (highest priority/urgency).
1081
- """
1082
- story.append(Paragraph(dx_intro_text, styles["BodyText"]))
1083
- story.append(Spacer(1, SPACER_SMALL))
1084
-
1085
- dx_data = [
1086
- [
1087
- Paragraph(h, table_header_style)
1088
- for h in ["Test", "Rec.", "Frequency", "Rationale"]
1089
- ]
1090
- ]
1091
- dx_style_cmds = [
1092
- (
1093
- "BACKGROUND",
1094
- (0, 0),
1095
- (-1, 0),
1096
- colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
1097
- ),
1098
- ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
1099
- ("GRID", (0, 0), (-1, -1), 1, colors.black),
1100
- ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
1101
- ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
1102
- ("TOPPADDING", (0, 0), (-1, -1), 4),
1103
- ]
1104
- sorted_dx_recommendations = sorted(
1105
- assessment.dx_recommendations,
1106
- key=lambda x: x.recommendation_level or 0,
1107
- reverse=True,
1108
- )
1109
- for i, dr in enumerate(sorted_dx_recommendations, 1):
1110
- dx_data.append(
1111
- [
1112
- Paragraph(dr.test_name or "", table_body_style),
1113
- Paragraph(str(dr.recommendation_level), table_body_style_centered),
1114
- Paragraph(dr.frequency or "", table_body_style),
1115
- Paragraph(dr.rationale or "", table_body_style),
1116
- ]
1117
- )
1118
- dx_style_cmds.append(
1119
- (
1120
- "BACKGROUND",
1121
- (1, i),
1122
- (1, i),
1123
- _get_rec_color(dr.recommendation_level, "pdf"),
1124
- )
1125
- )
1126
- dx_widths = [1.5 * inch, 0.5 * inch, 1.5 * inch, 2.9 * inch]
1127
- scaled_widths = [w * (CONTENT_WIDTH / sum(dx_widths)) for w in dx_widths]
1128
- dx_table = Table(
1129
- dx_data, colWidths=scaled_widths, style=dx_style_cmds, splitByRow=1
1130
- )
1131
- story.append(dx_table)
1132
-
1133
  story.append(Spacer(1, SPACER_NORMAL))
1134
  disclaimer = """
1135
  IMPORTANT: This assessment does not replace professional medical advice.
@@ -1137,11 +1339,73 @@ def generate_pdf_report(
1137
  story.append(Paragraph(disclaimer, styles["BodyText"]))
1138
 
1139
  # --- Appendix Section ---
1140
- if assessment.thinking or assessment.reasoning:
1141
  story.append(PageBreak())
1142
  story.append(Paragraph("Appendix", heading_style))
1143
  story.append(Spacer(1, SPACER_NORMAL))
1144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1145
  if assessment.thinking:
1146
  story.append(Paragraph("Thinking Process", subheading_style))
1147
  # Use a preformatted style for better readability of raw text
 
36
  ContributionStrength,
37
  InitialAssessment,
38
  RiskFactorCategory,
39
+ RiskScore,
40
+ )
41
+ from .probability_aggregation import (
42
+ aggregate_probabilities,
43
+ get_display_cancer_type,
44
+ separate_score_types,
45
  )
46
  from .user_input import UserInput
47
 
 
139
  return PDF_COLORS[color_key] if color_format == "pdf" else HEX_COLORS[color_key]
140
 
141
 
142
+ def _categorize_numeric_score(model_name: str, score_value: float) -> str:
143
+ """Categorize numeric risk scores from models like GAIL and CRC-PRO.
144
+
145
+ Args:
146
+ model_name: Name of the risk model.
147
+ score_value: Numeric score value.
148
+
149
+ Returns:
150
+ Risk category string.
151
+ """
152
+ model_lower = model_name.lower()
153
+
154
+ if "gail" in model_lower:
155
+ # GAIL: 5-year breast cancer risk
156
+ # High Risk: >= 1.67%, Average Risk: < 1.67%
157
+ if score_value >= 1.67:
158
+ return "High Risk"
159
+ else:
160
+ return "Average Risk"
161
+
162
+ elif "crc" in model_lower or "crc_pro" in model_lower:
163
+ # CRC-PRO: 10-year colorectal cancer risk
164
+ # Low: <0.5%, Moderate: 0.5-2.0%, High: >2.0%
165
+ if score_value < 0.5:
166
+ return "Low Risk"
167
+ elif score_value < 2.0:
168
+ return "Moderate Risk"
169
+ else:
170
+ return "High Risk"
171
+
172
+ return "N/A"
173
+
174
+
175
  def _markdown_to_reportlab(md_text: str) -> str:
176
  """Convert Markdown text to ReportLab-compatible HTML-like markup.
177
 
 
219
 
220
  _create_summary_sheet(wb, assessment, user_input)
221
  _create_risk_scores_sheet(wb, assessment)
222
+ _create_aggregated_probabilities_sheet(wb, assessment)
223
  _create_data_sheet(wb, "User Input Data", user_input.model_dump(mode="json"))
224
  _create_data_sheet(wb, "Raw LLM Output", assessment.model_dump(mode="json"))
225
 
 
506
  header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
507
  wrap_alignment = Alignment(wrap_text=True, vertical="top")
508
 
509
+ ws.merge_cells("A1:H1")
510
  ws["A1"] = "Calculated Risk Scores (Ground Truth)"
511
  ws["A1"].font = title_font
512
  ws["A1"].alignment = Alignment(horizontal="center")
513
 
514
+ ws.merge_cells("A2:H2")
515
  ws["A2"] = "Scores calculated using validated clinical risk models"
516
  ws["A2"].alignment = Alignment(horizontal="center")
517
 
 
521
  ws.cell(row=current_row, column=1, value="No risk scores calculated")
522
  return
523
 
524
+ # Create headers with new columns
525
+ headers = [
526
+ "Cancer Type",
527
+ "Model Name",
528
+ "Score",
529
+ "Probability (%)",
530
+ "Time Horizon (years)",
531
+ "Score Type",
532
+ "Interpretation",
533
+ "References",
534
+ ]
535
  for col_idx, header in enumerate(headers, 1):
536
  cell = ws.cell(row=current_row, column=col_idx, value=header)
537
  cell.font = header_font
 
549
  ws.cell(row=current_row, column=2, value=score.name)
550
  ws.cell(row=current_row, column=3, value=score.score or "N/A")
551
 
552
+ # Add new probability fields
553
+ prob_value = (
554
+ f"{score.probability_percent:.2f}"
555
+ if score.probability_percent is not None
556
+ else "N/A"
557
+ )
558
+ ws.cell(row=current_row, column=4, value=prob_value)
559
+
560
+ horizon_value = (
561
+ str(score.time_horizon_years)
562
+ if score.time_horizon_years is not None
563
+ else "N/A"
564
+ )
565
+ ws.cell(row=current_row, column=5, value=horizon_value)
566
+
567
+ ws.cell(row=current_row, column=6, value=score.score_type)
568
+
569
  interp_cell = ws.cell(
570
+ row=current_row, column=7, value=score.interpretation or "N/A"
571
  )
572
  interp_cell.alignment = wrap_alignment
573
 
574
  refs = "; ".join(score.references) if score.references else "N/A"
575
+ refs_cell = ws.cell(row=current_row, column=8, value=refs)
576
  refs_cell.alignment = wrap_alignment
577
 
578
  current_row += 1
 
581
  ws.column_dimensions["A"].width = 20
582
  ws.column_dimensions["B"].width = 25
583
  ws.column_dimensions["C"].width = 15
584
+ ws.column_dimensions["D"].width = 15
585
+ ws.column_dimensions["E"].width = 20
586
+ ws.column_dimensions["F"].width = 15
587
+ ws.column_dimensions["G"].width = 50
588
+ ws.column_dimensions["H"].width = 40
589
+
590
+
591
+ def _create_aggregated_probabilities_sheet(
592
+ wb: Workbook, assessment: InitialAssessment
593
+ ) -> None:
594
+ """Create a worksheet with aggregated probability scores.
595
+
596
+ Args:
597
+ wb: An openpyxl workbook.
598
+ assessment: The structured initial assessment containing calculated scores.
599
+ """
600
+ ws = wb.create_sheet("Aggregated Probabilities")
601
+
602
+ title_font = Font(bold=True, size=16, name="Calibri")
603
+ header_font = Font(bold=True, color=HEX_COLORS["header_font"], name="Calibri")
604
+ header_fill = PatternFill(start_color=HEX_COLORS["header_fill"], fill_type="solid")
605
+
606
+ ws.merge_cells("A1:F1")
607
+ ws["A1"] = "Aggregated Probability Scores"
608
+ ws["A1"].font = title_font
609
+ ws["A1"].alignment = Alignment(horizontal="center")
610
+
611
+ ws.merge_cells("A2:F2")
612
+ ws["A2"] = "Average probabilities when multiple models assess the same cancer type"
613
+ ws["A2"].alignment = Alignment(horizontal="center")
614
+
615
+ current_row = 4
616
+
617
+ # Collect all scores from the grouped format
618
+ all_scores: list[RiskScore] = []
619
+ for scores_list in assessment.calculated_risk_scores.values():
620
+ all_scores.extend(scores_list)
621
+
622
+ # Separate and aggregate
623
+ separated = separate_score_types(all_scores)
624
+ probability_scores = separated["probability"]
625
+
626
+ if not probability_scores:
627
+ ws.cell(
628
+ row=current_row, column=1, value="No probability-based scores calculated"
629
+ )
630
+ return
631
+
632
+ aggregated = aggregate_probabilities(probability_scores)
633
+
634
+ # Create headers
635
+ headers = [
636
+ "Cancer Type",
637
+ "Time Horizon (years)",
638
+ "Average Probability (%)",
639
+ "Model Count",
640
+ "Contributing Models",
641
+ "Individual Probabilities",
642
+ ]
643
+ for col_idx, header in enumerate(headers, 1):
644
+ cell = ws.cell(row=current_row, column=col_idx, value=header)
645
+ cell.font = header_font
646
+ cell.fill = header_fill
647
+
648
+ current_row += 1
649
+
650
+ # Add aggregated scores
651
+ for agg in aggregated:
652
+ # Use display function for clean cancer type name
653
+ display_name = get_display_cancer_type(agg.cancer_type)
654
+ ws.cell(row=current_row, column=1, value=display_name)
655
+ ws.cell(row=current_row, column=2, value=str(agg.time_horizon_years))
656
+ ws.cell(row=current_row, column=3, value=f"{agg.avg_probability_percent:.2f}")
657
+ ws.cell(row=current_row, column=4, value=str(agg.model_count))
658
+
659
+ model_names = ", ".join(s.name for s in agg.individual_scores)
660
+ ws.cell(row=current_row, column=5, value=model_names)
661
+
662
+ individual_probs = ", ".join(
663
+ f"{s.name}: {s.probability_percent:.2f}%" for s in agg.individual_scores
664
+ )
665
+ ws.cell(row=current_row, column=6, value=individual_probs)
666
+
667
+ current_row += 1
668
+
669
+ # Set column widths
670
+ ws.column_dimensions["A"].width = 20
671
+ ws.column_dimensions["B"].width = 20
672
+ ws.column_dimensions["C"].width = 25
673
+ ws.column_dimensions["D"].width = 15
674
+ ws.column_dimensions["E"].width = 30
675
+ ws.column_dimensions["F"].width = 50
676
 
677
 
678
  def _create_data_sheet(wb: Workbook, title: str, data: dict) -> None:
 
943
  story.append(Paragraph("Assessment", heading_style))
944
  story.append(Spacer(1, SPACER_NORMAL))
945
 
946
+ # --- Aggregated Probability Scores Section (Early in document) ---
947
  if assessment.calculated_risk_scores:
948
+ # Collect all scores from grouped format
949
+ all_scores: list[RiskScore] = []
950
+ for scores_list in assessment.calculated_risk_scores.values():
951
+ all_scores.extend(scores_list)
952
+
953
+ # Separate and aggregate
954
+ separated = separate_score_types(all_scores)
955
+ probability_scores = separated["probability"]
956
+ categorical_scores = separated["categorical"]
957
+
958
+ if probability_scores:
959
+ aggregated = aggregate_probabilities(probability_scores)
960
+
961
+ if aggregated:
962
+ story.append(
963
+ Paragraph("Aggregated Probability Scores", subheading_style)
964
+ )
965
+ story.append(Spacer(1, SPACER_SMALL))
966
+ agg_intro = """
967
+ When multiple models assess the same cancer type, we aggregate their probabilities
968
+ to provide a comprehensive risk estimate. The table below shows averaged probabilities
969
+ by cancer type and time horizon.
970
+ """
971
+ story.append(Paragraph(agg_intro, styles["BodyText"]))
972
+ story.append(Spacer(1, SPACER_SMALL))
973
+
974
+ # Create table for aggregated probabilities
975
+ agg_data = [
976
+ [
977
+ Paragraph(h, table_header_style)
978
+ for h in [
979
+ "Cancer Type",
980
+ "Time Horizon",
981
+ "Probability",
982
+ "Models",
983
+ ]
984
+ ]
985
+ ]
986
+ agg_style_cmds = [
987
+ (
988
+ "BACKGROUND",
989
+ (0, 0),
990
+ (-1, 0),
991
+ colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
992
+ ),
993
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
994
+ ("GRID", (0, 0), (-1, -1), 1, colors.black),
995
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
996
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
997
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
998
+ ]
999
 
1000
+ for agg in aggregated:
1001
+ model_names = ", ".join(s.name for s in agg.individual_scores)
1002
+ time_horizon_text = (
1003
+ f"{agg.time_horizon_years:.1f} years"
1004
+ if agg.time_horizon_years != 79.0
1005
+ else "Lifetime"
1006
+ )
1007
+ # Use display function to get clean cancer type name
1008
+ display_cancer_type = get_display_cancer_type(agg.cancer_type)
1009
+
1010
+ agg_data.append(
1011
+ [
1012
+ Paragraph(display_cancer_type, table_body_style),
1013
+ Paragraph(time_horizon_text, table_body_style),
1014
+ Paragraph(
1015
+ f"{agg.avg_probability_percent:.2f}%", table_body_style
1016
+ ),
1017
+ Paragraph(
1018
+ f"{model_names} (n={agg.model_count})", table_body_style
1019
+ ),
1020
+ ]
1021
+ )
1022
+
1023
+ agg_widths = [1.5 * inch, 1.5 * inch, 1.2 * inch, 2.3 * inch]
1024
+ agg_scaled_widths = [
1025
+ w * (CONTENT_WIDTH / sum(agg_widths)) for w in agg_widths
1026
+ ]
1027
+ agg_table = Table(
1028
+ agg_data,
1029
+ colWidths=agg_scaled_widths,
1030
+ style=agg_style_cmds,
1031
+ splitByRow=1,
1032
+ )
1033
+ story.append(agg_table)
1034
+ story.append(Spacer(1, SPACER_NORMAL))
1035
+
1036
+ # Add categorical/diagnostic scores section if present (Early in document)
1037
+ if categorical_scores:
1038
+ story.append(Paragraph("Categorical Risk Assessments", subheading_style))
1039
+ story.append(Spacer(1, SPACER_SMALL))
1040
+ cat_intro = """
1041
+ The following assessments provide numeric risk scores with categorical classifications.
1042
+ These models output absolute risk values that are categorized based on validated thresholds.
1043
+ """
1044
+ story.append(Paragraph(cat_intro, styles["BodyText"]))
1045
+ story.append(Spacer(1, SPACER_SMALL))
1046
+
1047
+ # Create table for categorical scores
1048
+ cat_data = [
1049
+ [
1050
+ Paragraph(h, table_header_style)
1051
+ for h in [
1052
+ "Model",
1053
+ "Cancer Type",
1054
+ "Score",
1055
+ "Time Horizon",
1056
+ "Risk Category",
1057
+ ]
1058
+ ]
1059
  ]
1060
+ cat_style_cmds = [
1061
+ (
1062
+ "BACKGROUND",
1063
+ (0, 0),
1064
+ (-1, 0),
1065
+ colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
1066
+ ),
1067
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
1068
+ ("GRID", (0, 0), (-1, -1), 1, colors.black),
1069
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
1070
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
1071
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
1072
+ ]
1073
+
1074
+ for score in categorical_scores:
1075
+ # Format time horizon if available
1076
+ time_horizon_text = "N/A"
1077
+ if score.time_horizon_years is not None:
1078
+ if score.time_horizon_years == 79.0:
1079
+ time_horizon_text = "Lifetime"
1080
+ else:
1081
+ time_horizon_text = f"{score.time_horizon_years:.1f} years"
1082
+
1083
+ # Calculate risk category from numeric score
1084
+ risk_category_text = "N/A"
1085
+ try:
1086
+ # Try to extract numeric value from score
1087
+ score_value = float(score.score or "0")
1088
+ risk_category_text = _categorize_numeric_score(
1089
+ score.name,
1090
+ score_value,
1091
+ )
1092
+ except (ValueError, TypeError):
1093
+ # If score is not numeric, leave as N/A
1094
+ pass
1095
 
1096
+ cat_data.append(
 
 
 
 
 
1097
  [
 
1098
  Paragraph(score.name, table_body_style),
1099
+ Paragraph(score.cancer_type or "N/A", table_body_style),
1100
  Paragraph(score.score or "N/A", table_body_style),
1101
+ Paragraph(time_horizon_text, table_body_style),
1102
+ Paragraph(risk_category_text, table_body_style),
1103
  ]
1104
  )
1105
 
1106
+ cat_widths = [1.3 * inch, 1.3 * inch, 0.8 * inch, 1.2 * inch, 1.9 * inch]
1107
+ cat_scaled_widths = [
1108
+ w * (CONTENT_WIDTH / sum(cat_widths)) for w in cat_widths
1109
+ ]
1110
+ cat_table = Table(
1111
+ cat_data,
1112
+ colWidths=cat_scaled_widths,
1113
+ style=cat_style_cmds,
1114
+ splitByRow=1,
1115
+ )
1116
+ story.append(cat_table)
1117
+ story.append(Spacer(1, SPACER_NORMAL))
1118
 
1119
+ # --- New 2-Column Summary Section ---
1120
  headers = [
1121
  Paragraph("<b>Overall Risk Score</b>", summary_header_style),
1122
  Paragraph("<b>Risk Breakdown</b>", summary_header_style),
 
1123
  ]
1124
 
1125
  gauge = ""
 
1127
  gauge = _create_risk_gauge(assessment.overall_risk_score, width=120, height=70)
1128
 
1129
  risk_panel = _create_risk_breakdown_chart(
1130
+ assessment.risk_assessments, width=200, height=70
 
 
 
1131
  )
1132
 
1133
+ content_row = [gauge, risk_panel]
1134
 
1135
  summary_data = [headers, content_row]
1136
+ summary_table = Table(summary_data, colWidths=[3.2 * inch, 3.3 * inch])
1137
  summary_table.setStyle(
1138
  TableStyle(
1139
  [
 
1330
  )
1331
  story.append(factor_table)
1332
 
 
 
 
 
 
 
 
 
 
 
 
 
1333
  story.append(Spacer(1, SPACER_NORMAL))
1334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1335
  story.append(Spacer(1, SPACER_NORMAL))
1336
  disclaimer = """
1337
  IMPORTANT: This assessment does not replace professional medical advice.
 
1339
  story.append(Paragraph(disclaimer, styles["BodyText"]))
1340
 
1341
  # --- Appendix Section ---
1342
+ if assessment.thinking or assessment.reasoning or assessment.calculated_risk_scores:
1343
  story.append(PageBreak())
1344
  story.append(Paragraph("Appendix", heading_style))
1345
  story.append(Spacer(1, SPACER_NORMAL))
1346
 
1347
+ # --- Calculated Risk Scores Section (In Appendix) ---
1348
+ if assessment.calculated_risk_scores:
1349
+ story.append(Paragraph("Calculated Risk Scores", subheading_style))
1350
+ story.append(Spacer(1, SPACER_SMALL))
1351
+ score_intro_text = """
1352
+ The following risk scores have been calculated using validated clinical risk models.
1353
+ These scores provide detailed quantitative estimates of cancer risk based on your profile.
1354
+ """
1355
+ story.append(Paragraph(score_intro_text, styles["BodyText"]))
1356
+ story.append(Spacer(1, SPACER_SMALL))
1357
+
1358
+ score_data = [
1359
+ [
1360
+ Paragraph(h, table_header_style)
1361
+ for h in ["Cancer Type", "Model", "Score", "Interpretation"]
1362
+ ]
1363
+ ]
1364
+ score_style_cmds = [
1365
+ (
1366
+ "BACKGROUND",
1367
+ (0, 0),
1368
+ (-1, 0),
1369
+ colors.HexColor(f"#{HEX_COLORS['header_fill']}"),
1370
+ ),
1371
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
1372
+ ("GRID", (0, 0), (-1, -1), 1, colors.black),
1373
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
1374
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
1375
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
1376
+ ]
1377
+
1378
+ # Sort by cancer type and add rows
1379
+ for cancer_type, scores in sorted(
1380
+ assessment.calculated_risk_scores.items()
1381
+ ):
1382
+ for i, score in enumerate(scores):
1383
+ # Only show cancer type on first row for each cancer
1384
+ cancer_cell = (
1385
+ Paragraph(cancer_type, table_body_style) if i == 0 else ""
1386
+ )
1387
+ score_data.append(
1388
+ [
1389
+ cancer_cell,
1390
+ Paragraph(score.name, table_body_style),
1391
+ Paragraph(score.score or "N/A", table_body_style),
1392
+ Paragraph(score.interpretation or "N/A", table_body_style),
1393
+ ]
1394
+ )
1395
+
1396
+ score_widths = [1.5 * inch, 1.5 * inch, 1.0 * inch, 2.5 * inch]
1397
+ scaled_widths = [
1398
+ w * (CONTENT_WIDTH / sum(score_widths)) for w in score_widths
1399
+ ]
1400
+ scores_table = Table(
1401
+ score_data,
1402
+ colWidths=scaled_widths,
1403
+ style=score_style_cmds,
1404
+ splitByRow=1,
1405
+ )
1406
+ story.append(scores_table)
1407
+ story.append(Spacer(1, SPACER_NORMAL))
1408
+
1409
  if assessment.thinking:
1410
  story.append(Paragraph("Thinking Process", subheading_style))
1411
  # Use a preformatted style for better readability of raw text
src/sentinel/risk_aggregation.py CHANGED
@@ -3,6 +3,12 @@
3
  from collections import defaultdict
4
 
5
  from .models import RiskScore
 
 
 
 
 
 
6
 
7
 
8
  def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
@@ -96,3 +102,128 @@ def format_scores_for_pdf(
96
  List of (cancer_type, scores) tuples sorted by cancer type.
97
  """
98
  return sorted(grouped_scores.items())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from collections import defaultdict
4
 
5
  from .models import RiskScore
6
+ from .probability_aggregation import (
7
+ AggregatedRisk,
8
+ aggregate_probabilities,
9
+ get_display_cancer_type,
10
+ separate_score_types,
11
+ )
12
 
13
 
14
  def group_scores_by_cancer_type(scores: list[RiskScore]) -> dict[str, list[RiskScore]]:
 
102
  List of (cancer_type, scores) tuples sorted by cancer type.
103
  """
104
  return sorted(grouped_scores.items())
105
+
106
+
107
+ def format_probabilities_for_llm(
108
+ aggregated: list[AggregatedRisk], categorical: list[RiskScore]
109
+ ) -> str:
110
+ """Format aggregated probabilities and categorical scores for LLM context.
111
+
112
+ Args:
113
+ aggregated: List of aggregated probability risks.
114
+ categorical: List of categorical/diagnostic scores.
115
+
116
+ Returns:
117
+ Formatted string representation for LLM consumption.
118
+ """
119
+ lines = []
120
+ lines.append("# Calculated Risk Scores (Ground Truth)\n")
121
+ lines.append(
122
+ "The following risk scores have been calculated using validated models:\n"
123
+ )
124
+
125
+ # Group aggregated risks by cancer type
126
+ if aggregated:
127
+ lines.append("\n## Probability-Based Risk Scores\n")
128
+ lines.append(
129
+ "These scores represent time-based probabilities of developing cancer:\n"
130
+ )
131
+
132
+ current_cancer = None
133
+ for agg in aggregated:
134
+ # Add cancer type header if changed
135
+ if agg.cancer_type != current_cancer:
136
+ current_cancer = agg.cancer_type
137
+ display_name = get_display_cancer_type(agg.cancer_type)
138
+ lines.append(f"\n### {display_name}\n")
139
+
140
+ # Format time horizon nicely
141
+ if agg.time_horizon_years == 79.0:
142
+ horizon_text = "Lifetime"
143
+ else:
144
+ horizon_text = f"{agg.time_horizon_years:.1f}-year"
145
+
146
+ # Add aggregated score
147
+ if agg.model_count == 1:
148
+ model = agg.individual_scores[0]
149
+ lines.append(f"**{model.name}** ({horizon_text} risk)")
150
+ lines.append(f"- **Probability**: {agg.avg_probability_percent:.2f}%")
151
+ if model.description:
152
+ lines.append(f"- **Description**: {model.description}")
153
+ if model.interpretation:
154
+ lines.append(f"- **Interpretation**: {model.interpretation}")
155
+ else:
156
+ model_names = ", ".join(s.name for s in agg.individual_scores)
157
+ lines.append(
158
+ f"**Aggregated Risk** from {agg.model_count} models ({model_names})"
159
+ )
160
+ lines.append(
161
+ f"- **Average {horizon_text} Probability**: {agg.avg_probability_percent:.2f}%"
162
+ )
163
+ lines.append(
164
+ "- **Individual Probabilities**: "
165
+ + ", ".join(
166
+ f"{s.name}: {s.probability_percent:.2f}%"
167
+ for s in agg.individual_scores
168
+ )
169
+ )
170
+
171
+ lines.append("") # Empty line
172
+
173
+ # Add categorical scores
174
+ if categorical:
175
+ lines.append("\n## Diagnostic/Categorical Risk Scores\n")
176
+ lines.append(
177
+ "These scores represent diagnostic categories or non-time-based assessments:\n"
178
+ )
179
+
180
+ for score in categorical:
181
+ lines.append(f"\n### {score.name}")
182
+ if score.cancer_type:
183
+ lines.append(f"- **Cancer Type**: {score.cancer_type}")
184
+ lines.append(f"- **Result**: {score.score}")
185
+ if score.description:
186
+ lines.append(f"- **Description**: {score.description}")
187
+ if score.interpretation:
188
+ lines.append(f"- **Interpretation**: {score.interpretation}")
189
+ lines.append("") # Empty line
190
+
191
+ lines.append("\n---\n")
192
+ lines.append("**Important**: These scores are the ground truth. Your task is to:")
193
+ lines.append("1. Explain what these scores mean for the patient in clear language")
194
+ lines.append(
195
+ "2. Identify and highlight key risk factors contributing to elevated scores"
196
+ )
197
+ lines.append("3. Provide actionable context and insights based on these scores")
198
+ lines.append(
199
+ "4. DO NOT generate your own risk levels - explain and contextualize the calculated ones\n"
200
+ )
201
+
202
+ return "\n".join(lines)
203
+
204
+
205
+ def format_scores_with_aggregation(scores: list[RiskScore]) -> str:
206
+ """Format risk scores with probability aggregation for LLM context.
207
+
208
+ This is an enhanced version of format_scores_for_llm that aggregates
209
+ probability-based scores by cancer type and time horizon.
210
+
211
+ Args:
212
+ scores: List of RiskScore objects.
213
+
214
+ Returns:
215
+ Formatted string representation for LLM consumption.
216
+ """
217
+ if not scores:
218
+ return "No risk scores calculated."
219
+
220
+ # Separate scores by type
221
+ separated = separate_score_types(scores)
222
+ probability_scores = separated["probability"]
223
+ categorical_scores = separated["categorical"]
224
+
225
+ # Aggregate probability scores
226
+ aggregated = aggregate_probabilities(probability_scores)
227
+
228
+ # Format for LLM
229
+ return format_probabilities_for_llm(aggregated, categorical_scores)
src/sentinel/risk_models/__init__.py CHANGED
@@ -11,6 +11,7 @@ from sentinel.risk_models.pcpt import PCPTRiskModel
11
  from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
12
  from sentinel.risk_models.prostate_mortality import ProstateMortalityRiskModel
13
  from sentinel.risk_models.qcancer import QCancerRiskModel
 
14
 
15
  RISK_MODELS = [
16
  GailRiskModel,
@@ -24,6 +25,7 @@ RISK_MODELS = [
24
  QCancerRiskModel,
25
  ClausRiskModel,
26
  MRATRiskModel,
 
27
  ]
28
 
29
  __all__ = [
@@ -33,4 +35,5 @@ __all__ = [
33
  "LLPiRiskModel",
34
  "MRATRiskModel",
35
  "PLCOm2012RiskModel",
 
36
  ]
 
11
  from sentinel.risk_models.plcom2012 import PLCOm2012RiskModel
12
  from sentinel.risk_models.prostate_mortality import ProstateMortalityRiskModel
13
  from sentinel.risk_models.qcancer import QCancerRiskModel
14
+ from sentinel.risk_models.tyrer_cuzick import TyrerCuzickRiskModel
15
 
16
  RISK_MODELS = [
17
  GailRiskModel,
 
25
  QCancerRiskModel,
26
  ClausRiskModel,
27
  MRATRiskModel,
28
+ TyrerCuzickRiskModel,
29
  ]
30
 
31
  __all__ = [
 
35
  "LLPiRiskModel",
36
  "MRATRiskModel",
37
  "PLCOm2012RiskModel",
38
+ "TyrerCuzickRiskModel",
39
  ]
src/sentinel/risk_models/base.py CHANGED
@@ -1,5 +1,6 @@
1
  """Abstract base classes for risk model implementations."""
2
 
 
3
  from abc import ABC, abstractmethod
4
  from typing import Any
5
 
@@ -44,6 +45,34 @@ class RiskModel(ABC):
44
  def references(self) -> list[str]:
45
  """Return academic or source references for the model."""
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  @staticmethod
48
  def _get_nested_field(obj: Any, path: str) -> Any:
49
  """Navigate a dotted path to retrieve a nested field value.
@@ -108,11 +137,17 @@ class RiskModel(ABC):
108
  Returns:
109
  A populated RiskScore object for this model.
110
  """
 
 
 
111
  return RiskScore(
112
  name=self.name,
113
- score=self.compute_score(user),
114
  cancer_type=self.cancer_type(),
115
  description=self.description(),
116
  interpretation=self.interpretation(),
117
  references=self.references(),
 
 
 
118
  )
 
1
  """Abstract base classes for risk model implementations."""
2
 
3
+ import re
4
  from abc import ABC, abstractmethod
5
  from typing import Any
6
 
 
45
  def references(self) -> list[str]:
46
  """Return academic or source references for the model."""
47
 
48
+ @abstractmethod
49
+ def time_horizon_years(self) -> float | None:
50
+ """Return time horizon in years for probability output.
51
+
52
+ Returns:
53
+ Time horizon in years (e.g., 5, 10, 79 for lifetime), or None if not applicable.
54
+ """
55
+
56
+ def _parse_probability(self, score_str: str) -> tuple[float | None, str]:
57
+ """Parse probability from score string and determine score type.
58
+
59
+ Args:
60
+ score_str: The score string returned by compute_score().
61
+
62
+ Returns:
63
+ Tuple of (probability_percent, score_type).
64
+ """
65
+ if score_str.startswith("N/A"):
66
+ return (None, "not_applicable")
67
+
68
+ # Extract percentage using regex
69
+ match = re.search(r"(\d+\.?\d*)%", score_str)
70
+ if match:
71
+ return (float(match.group(1)), "probability")
72
+
73
+ # If no percentage found, treat as categorical
74
+ return (None, "categorical")
75
+
76
  @staticmethod
77
  def _get_nested_field(obj: Any, path: str) -> Any:
78
  """Navigate a dotted path to retrieve a nested field value.
 
137
  Returns:
138
  A populated RiskScore object for this model.
139
  """
140
+ score_str = self.compute_score(user)
141
+ probability, score_type = self._parse_probability(score_str)
142
+
143
  return RiskScore(
144
  name=self.name,
145
+ score=score_str,
146
  cancer_type=self.cancer_type(),
147
  description=self.description(),
148
  interpretation=self.interpretation(),
149
  references=self.references(),
150
+ probability_percent=probability,
151
+ time_horizon_years=self.time_horizon_years(),
152
+ score_type=score_type,
153
  )
src/sentinel/risk_models/boadicea.py CHANGED
@@ -190,3 +190,11 @@ class BOADICEARiskModel(RiskModel):
190
  "Antoniou et al. Average risks of breast and ovarian cancer associated with BRCA1 or BRCA2 "
191
  "mutations detected in case series unselected for family history. Am J Hum Genet. 2003;72(5):1117-1130.",
192
  ]
 
 
 
 
 
 
 
 
 
190
  "Antoniou et al. Average risks of breast and ovarian cancer associated with BRCA1 or BRCA2 "
191
  "mutations detected in case series unselected for family history. Am J Hum Genet. 2003;72(5):1117-1130.",
192
  ]
193
+
194
+ def time_horizon_years(self) -> float | None:
195
+ """Return the time horizon in years for the BOADICEA model.
196
+
197
+ Returns:
198
+ The time horizon in years (10 years for BOADICEA).
199
+ """
200
+ return 10.0
src/sentinel/risk_models/claus.py CHANGED
@@ -571,6 +571,14 @@ class ClausRiskModel(RiskModel):
571
  "Reference implementation: https://github.com/ColorGenomics/risk-models",
572
  ]
573
 
 
 
 
 
 
 
 
 
574
 
575
  def _bin_age_to_index(age: int) -> int:
576
  """Convert age to table index.
 
571
  "Reference implementation: https://github.com/ColorGenomics/risk-models",
572
  ]
573
 
574
+ def time_horizon_years(self) -> float | None:
575
+ """Return the time horizon in years for the Claus model.
576
+
577
+ Returns:
578
+ The time horizon in years (79 years - lifetime risk to age 79).
579
+ """
580
+ return 79.0
581
+
582
 
583
  def _bin_age_to_index(age: int) -> int:
584
  """Convert age to table index.
src/sentinel/risk_models/crc_pro.py CHANGED
@@ -216,9 +216,10 @@ class CRCProRiskModel(RiskModel):
216
  str: Human-readable interpretation guidance.
217
  """
218
  return (
219
- "The output represents the % probability of developing colorectal "
220
- "cancer within 10 years. Elevated results should be reviewed with a "
221
- "qualified healthcare professional."
 
222
  )
223
 
224
  def references(self) -> list[str]:
@@ -233,6 +234,14 @@ class CRCProRiskModel(RiskModel):
233
  "Multi-Ethnic Cohort Study. J Am Board Fam Med. 2014;27(1):42-55."
234
  ]
235
 
 
 
 
 
 
 
 
 
236
  # --- Internal helpers -----------------------------------------------
237
  def _build_input(self, user: UserInput, sex: Sex, age: int):
238
  """Build the input for the model.
 
216
  str: Human-readable interpretation guidance.
217
  """
218
  return (
219
+ "Risk categories: Low Risk (<0.5%), Moderate Risk (0.5-2.0%), "
220
+ "High Risk (>2.0%). The percentage indicates the 10-year absolute "
221
+ "risk of developing colorectal cancer. Elevated results should be "
222
+ "reviewed with a qualified healthcare professional."
223
  )
224
 
225
  def references(self) -> list[str]:
 
234
  "Multi-Ethnic Cohort Study. J Am Board Fam Med. 2014;27(1):42-55."
235
  ]
236
 
237
+ def time_horizon_years(self) -> float | None:
238
+ """Return the time horizon in years for the CRC-PRO model.
239
+
240
+ Returns:
241
+ The time horizon in years (10 years for CRC-PRO).
242
+ """
243
+ return 10.0
244
+
245
  # --- Internal helpers -----------------------------------------------
246
  def _build_input(self, user: UserInput, sex: Sex, age: int):
247
  """Build the input for the model.
src/sentinel/risk_models/extended_pbcg.py CHANGED
@@ -279,6 +279,14 @@ class ExtendedPBCGRiskModel(RiskModel):
279
  "prostate cancer risk prediction. BMC Med Res Methodol. 2022;22:200.",
280
  ]
281
 
 
 
 
 
 
 
 
 
282
  def _get_feature_value_direct(
283
  self,
284
  user: UserInput,
 
279
  "prostate cancer risk prediction. BMC Med Res Methodol. 2022;22:200.",
280
  ]
281
 
282
+ def time_horizon_years(self) -> float | None:
283
+ """Return the time horizon in years for the Extended PBCG model.
284
+
285
+ Returns:
286
+ The time horizon in years (None for Extended PBCG - no fixed horizon).
287
+ """
288
+ return None
289
+
290
  def _get_feature_value_direct(
291
  self,
292
  user: UserInput,
src/sentinel/risk_models/gail.py CHANGED
@@ -805,7 +805,15 @@ class GailRiskModel(RiskModel):
805
  return "The Gail Model (Breast Cancer Risk Assessment Tool) calculates a woman's chance of developing invasive breast cancer over a given time interval. It uses demographic and reproductive history factors to project risk for women with no prior breast cancer, DCIS or LCIS. Typically applied to ages 35-85."
806
 
807
  def interpretation(self) -> str:
808
- return "A score of 1.66 or higher is generally considered above average. Results should be discussed with a healthcare professional. The model does not include all possible risk factors and may overestimate risk for non-white populations."
809
 
810
  def references(self) -> list[str]:
811
  return ["National Cancer Institute Breast Cancer Risk Assessment Tool"]
 
 
 
 
 
 
 
 
 
805
  return "The Gail Model (Breast Cancer Risk Assessment Tool) calculates a woman's chance of developing invasive breast cancer over a given time interval. It uses demographic and reproductive history factors to project risk for women with no prior breast cancer, DCIS or LCIS. Typically applied to ages 35-85."
806
 
807
  def interpretation(self) -> str:
808
+ return "High Risk is defined as a 5-year risk of 1.67% or higher. Average Risk is below 1.67%. Women classified as High Risk may be candidates for preventive interventions. Results should be discussed with a healthcare professional. The model does not include all possible risk factors and may overestimate risk for non-white populations."
809
 
810
  def references(self) -> list[str]:
811
  return ["National Cancer Institute Breast Cancer Risk Assessment Tool"]
812
+
813
+ def time_horizon_years(self) -> float | None:
814
+ """Return the time horizon in years for the Gail model.
815
+
816
+ Returns:
817
+ The time horizon in years (5 years for Gail).
818
+ """
819
+ return 5.0
src/sentinel/risk_models/llpi.py CHANGED
@@ -228,3 +228,11 @@ class LLPiRiskModel(RiskModel):
228
  "LLPi: Liverpool lung project risk prediction model for lung cancer incidence. "
229
  "Cancer Prev Res (Phila) 2015;8:570-5."
230
  ]
 
 
 
 
 
 
 
 
 
228
  "LLPi: Liverpool lung project risk prediction model for lung cancer incidence. "
229
  "Cancer Prev Res (Phila) 2015;8:570-5."
230
  ]
231
+
232
+ def time_horizon_years(self) -> float | None:
233
+ """Return the time horizon in years for the LLPi model.
234
+
235
+ Returns:
236
+ The time horizon in years (8.7 years for LLPi).
237
+ """
238
+ return 8.7
src/sentinel/risk_models/mrat.py CHANGED
@@ -140,6 +140,14 @@ class MRATRiskModel(RiskModel):
140
  "Fears TR et al. Identifying individuals at high risk for melanoma: J Am Acad Dermatol. 2006;55:819-826.",
141
  ]
142
 
 
 
 
 
 
 
 
 
143
  def absolute_risk(self, user: UserInput) -> float:
144
  """Compute the 5-year melanoma absolute risk percentage.
145
 
 
140
  "Fears TR et al. Identifying individuals at high risk for melanoma: J Am Acad Dermatol. 2006;55:819-826.",
141
  ]
142
 
143
+ def time_horizon_years(self) -> float | None:
144
+ """Return the time horizon in years for the MRAT model.
145
+
146
+ Returns:
147
+ The time horizon in years (5 years for MRAT).
148
+ """
149
+ return 5.0
150
+
151
  def absolute_risk(self, user: UserInput) -> float:
152
  """Compute the 5-year melanoma absolute risk percentage.
153
 
src/sentinel/risk_models/pcpt.py CHANGED
@@ -631,3 +631,11 @@ class PCPTRiskModel(RiskModel):
631
  "Calculator 2.0 for the prediction of low- versus high-grade "
632
  "prostate cancer. Urology. 2014;83(6):1362-1367.",
633
  ]
 
 
 
 
 
 
 
 
 
631
  "Calculator 2.0 for the prediction of low- versus high-grade "
632
  "prostate cancer. Urology. 2014;83(6):1362-1367.",
633
  ]
634
+
635
+ def time_horizon_years(self) -> float | None:
636
+ """Return the time horizon in years for the PCPT model.
637
+
638
+ Returns:
639
+ The time horizon in years (None for PCPT - no fixed horizon).
640
+ """
641
+ return None
src/sentinel/risk_models/plcom2012.py CHANGED
@@ -259,3 +259,11 @@ class PLCOm2012RiskModel(RiskModel):
259
  return [
260
  "Tammemägi, M. C., et al. (2013). Selection of individuals for lung-cancer screening by modeling lung-cancer risk. New England Journal of Medicine, 368(8), 728-736."
261
  ]
 
 
 
 
 
 
 
 
 
259
  return [
260
  "Tammemägi, M. C., et al. (2013). Selection of individuals for lung-cancer screening by modeling lung-cancer risk. New England Journal of Medicine, 368(8), 728-736."
261
  ]
262
+
263
+ def time_horizon_years(self) -> float | None:
264
+ """Return the time horizon in years for the PLCOm2012 model.
265
+
266
+ Returns:
267
+ The time horizon in years (6 years for PLCOm2012).
268
+ """
269
+ return 6.0
src/sentinel/risk_models/prostate_mortality.py CHANGED
@@ -221,3 +221,11 @@ class ProstateMortalityRiskModel(RiskModel):
221
  "BMC Med. 2019;17:144.",
222
  "Predict Prostate: https://prostate.predict.cam/tool",
223
  ]
 
 
 
 
 
 
 
 
 
221
  "BMC Med. 2019;17:144.",
222
  "Predict Prostate: https://prostate.predict.cam/tool",
223
  ]
224
+
225
+ def time_horizon_years(self) -> float | None:
226
+ """Return the time horizon in years for the Prostate Mortality model.
227
+
228
+ Returns:
229
+ The time horizon in years (15 years for Prostate Mortality).
230
+ """
231
+ return 15.0
src/sentinel/risk_models/qcancer.py CHANGED
@@ -1858,6 +1858,9 @@ class QCancerRiskModel(RiskModel):
1858
  description="10-year probability of not developing cancer",
1859
  interpretation="Baseline probability - higher values indicate lower overall cancer risk",
1860
  references=self.references(),
 
 
 
1861
  )
1862
  )
1863
 
@@ -1877,6 +1880,9 @@ class QCancerRiskModel(RiskModel):
1877
  "Values >1% warrant clinical review."
1878
  ),
1879
  references=self.references(),
 
 
 
1880
  )
1881
  )
1882
 
@@ -1888,6 +1894,14 @@ class QCancerRiskModel(RiskModel):
1888
  "ClinRisk Ltd. QCancer-2013 source code (GNU AGPL v3).",
1889
  ]
1890
 
 
 
 
 
 
 
 
 
1891
  def _format_risks(self, risks: dict[str, float], is_female: bool) -> str:
1892
  """Format probabilities as semicolon-separated string.
1893
 
 
1858
  description="10-year probability of not developing cancer",
1859
  interpretation="Baseline probability - higher values indicate lower overall cancer risk",
1860
  references=self.references(),
1861
+ probability_percent=no_cancer_pct,
1862
+ time_horizon_years=10.0,
1863
+ score_type="probability",
1864
  )
1865
  )
1866
 
 
1880
  "Values >1% warrant clinical review."
1881
  ),
1882
  references=self.references(),
1883
+ probability_percent=pct,
1884
+ time_horizon_years=10.0,
1885
+ score_type="probability",
1886
  )
1887
  )
1888
 
 
1894
  "ClinRisk Ltd. QCancer-2013 source code (GNU AGPL v3).",
1895
  ]
1896
 
1897
+ def time_horizon_years(self) -> float | None:
1898
+ """Return the time horizon in years for the QCancer model.
1899
+
1900
+ Returns:
1901
+ The time horizon in years (10 years for QCancer).
1902
+ """
1903
+ return 10.0
1904
+
1905
  def _format_risks(self, risks: dict[str, float], is_female: bool) -> str:
1906
  """Format probabilities as semicolon-separated string.
1907
 
src/sentinel/risk_models/tyrer_cuzick.py CHANGED
@@ -1320,3 +1320,11 @@ class TyrerCuzickRiskModel(RiskModel):
1320
  "Tyrer, J., Duffy, S. W., & Cuzick, J. (2004). A breast cancer prediction model "
1321
  "incorporating familial and personal risk factors. Statistics in Medicine, 23(7), 1111-1130."
1322
  ]
 
 
 
 
 
 
 
 
 
1320
  "Tyrer, J., Duffy, S. W., & Cuzick, J. (2004). A breast cancer prediction model "
1321
  "incorporating familial and personal risk factors. Statistics in Medicine, 23(7), 1111-1130."
1322
  ]
1323
+
1324
+ def time_horizon_years(self) -> float | None:
1325
+ """Return the time horizon in years for the Tyrer-Cuzick model.
1326
+
1327
+ Returns:
1328
+ The time horizon in years (10 years for Tyrer-Cuzick).
1329
+ """
1330
+ return 10.0
tests/test_probability_aggregation.py ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for probability aggregation utilities."""
2
+
3
+ import pytest
4
+
5
+ from sentinel.models import RiskScore
6
+ from sentinel.probability_aggregation import (
7
+ AggregatedRisk,
8
+ aggregate_probabilities,
9
+ categorize_risk,
10
+ get_display_cancer_type,
11
+ normalize_cancer_type,
12
+ separate_score_types,
13
+ )
14
+
15
+
16
+ class TestAggregateProbabilities:
17
+ """Test probability aggregation functionality."""
18
+
19
+ def test_single_model_per_cancer_type(self):
20
+ """Test aggregation with one model per cancer type."""
21
+ scores = [
22
+ RiskScore(
23
+ name="Gail",
24
+ score="1.5%",
25
+ cancer_type="breast",
26
+ probability_percent=1.5,
27
+ time_horizon_years=5.0,
28
+ score_type="probability",
29
+ ),
30
+ RiskScore(
31
+ name="PLCOm2012",
32
+ score="2.3%",
33
+ cancer_type="lung",
34
+ probability_percent=2.3,
35
+ time_horizon_years=6.0,
36
+ score_type="probability",
37
+ ),
38
+ ]
39
+
40
+ aggregated = aggregate_probabilities(scores)
41
+
42
+ assert len(aggregated) == 2
43
+
44
+ # Check breast cancer aggregation
45
+ breast_agg = next(agg for agg in aggregated if agg.cancer_type == "breast")
46
+ assert breast_agg.time_horizon_years == 5.0
47
+ assert breast_agg.avg_probability_percent == 1.5
48
+ assert breast_agg.risk_category == "Moderate" # 1.5% for 5-year horizon
49
+ assert breast_agg.model_count == 1
50
+ assert len(breast_agg.individual_scores) == 1
51
+ assert breast_agg.individual_scores[0].name == "Gail"
52
+
53
+ # Check lung cancer aggregation
54
+ lung_agg = next(agg for agg in aggregated if agg.cancer_type == "lung")
55
+ assert lung_agg.time_horizon_years == 6.0
56
+ assert lung_agg.avg_probability_percent == 2.3
57
+ assert lung_agg.risk_category == "Moderate" # 2.3% for 6-year horizon
58
+ assert lung_agg.model_count == 1
59
+
60
+ def test_multiple_models_same_cancer_same_horizon(self):
61
+ """Test aggregation when multiple models assess same cancer with same time horizon."""
62
+ scores = [
63
+ RiskScore(
64
+ name="Model1",
65
+ score="2.0%",
66
+ cancer_type="breast",
67
+ probability_percent=2.0,
68
+ time_horizon_years=10.0,
69
+ score_type="probability",
70
+ ),
71
+ RiskScore(
72
+ name="Model2",
73
+ score="3.0%",
74
+ cancer_type="breast",
75
+ probability_percent=3.0,
76
+ time_horizon_years=10.0,
77
+ score_type="probability",
78
+ ),
79
+ RiskScore(
80
+ name="Model3",
81
+ score="4.0%",
82
+ cancer_type="breast",
83
+ probability_percent=4.0,
84
+ time_horizon_years=10.0,
85
+ score_type="probability",
86
+ ),
87
+ ]
88
+
89
+ aggregated = aggregate_probabilities(scores)
90
+
91
+ assert len(aggregated) == 1
92
+ agg = aggregated[0]
93
+
94
+ assert agg.cancer_type == "breast"
95
+ assert agg.time_horizon_years == 10.0
96
+ assert agg.avg_probability_percent == pytest.approx(3.0) # (2+3+4)/3
97
+ assert agg.risk_category == "Moderate" # 3.0% for 10-year horizon
98
+ assert agg.model_count == 3
99
+ assert len(agg.individual_scores) == 3
100
+
101
+ def test_multiple_models_same_cancer_different_horizons(self):
102
+ """Test aggregation with same cancer type but different time horizons."""
103
+ scores = [
104
+ RiskScore(
105
+ name="Model1",
106
+ score="1.5%",
107
+ cancer_type="breast",
108
+ probability_percent=1.5,
109
+ time_horizon_years=5.0,
110
+ score_type="probability",
111
+ ),
112
+ RiskScore(
113
+ name="Model2",
114
+ score="3.0%",
115
+ cancer_type="breast",
116
+ probability_percent=3.0,
117
+ time_horizon_years=10.0,
118
+ score_type="probability",
119
+ ),
120
+ RiskScore(
121
+ name="Model3",
122
+ score="15.0%",
123
+ cancer_type="breast",
124
+ probability_percent=15.0,
125
+ time_horizon_years=79.0,
126
+ score_type="probability",
127
+ ),
128
+ ]
129
+
130
+ aggregated = aggregate_probabilities(scores)
131
+
132
+ assert len(aggregated) == 3 # Three different time horizons
133
+
134
+ # Verify each time horizon is separate
135
+ horizons = {agg.time_horizon_years for agg in aggregated}
136
+ assert horizons == {5.0, 10.0, 79.0}
137
+
138
+ # Verify each has single model
139
+ for agg in aggregated:
140
+ assert agg.model_count == 1
141
+
142
+ def test_excludes_non_probability_scores(self):
143
+ """Test that non-probability scores are excluded from aggregation."""
144
+ scores = [
145
+ RiskScore(
146
+ name="Gail",
147
+ score="1.5%",
148
+ cancer_type="breast",
149
+ probability_percent=1.5,
150
+ time_horizon_years=5.0,
151
+ score_type="probability",
152
+ ),
153
+ RiskScore(
154
+ name="PCPT",
155
+ score="No Cancer: 45%, Low Grade: 30%, High Grade: 25%",
156
+ cancer_type="prostate",
157
+ probability_percent=None,
158
+ time_horizon_years=None,
159
+ score_type="categorical",
160
+ ),
161
+ RiskScore(
162
+ name="Model",
163
+ score="N/A: Age out of range",
164
+ cancer_type="lung",
165
+ probability_percent=None,
166
+ time_horizon_years=None,
167
+ score_type="not_applicable",
168
+ ),
169
+ ]
170
+
171
+ aggregated = aggregate_probabilities(scores)
172
+
173
+ assert len(aggregated) == 1
174
+ assert aggregated[0].cancer_type == "breast"
175
+
176
+ def test_empty_list(self):
177
+ """Test aggregation with empty score list."""
178
+ aggregated = aggregate_probabilities([])
179
+ assert aggregated == []
180
+
181
+ def test_all_non_probability_scores(self):
182
+ """Test aggregation when all scores are non-probability."""
183
+ scores = [
184
+ RiskScore(
185
+ name="PCPT",
186
+ score="Results",
187
+ cancer_type="prostate",
188
+ score_type="categorical",
189
+ ),
190
+ RiskScore(
191
+ name="Model",
192
+ score="N/A",
193
+ cancer_type="lung",
194
+ score_type="not_applicable",
195
+ ),
196
+ ]
197
+
198
+ aggregated = aggregate_probabilities(scores)
199
+ assert aggregated == []
200
+
201
+ def test_case_insensitive_cancer_type_grouping(self):
202
+ """Test that cancer types are grouped case-insensitively."""
203
+ scores = [
204
+ RiskScore(
205
+ name="Model1",
206
+ score="1.5%",
207
+ cancer_type="Breast",
208
+ probability_percent=1.5,
209
+ time_horizon_years=5.0,
210
+ score_type="probability",
211
+ ),
212
+ RiskScore(
213
+ name="Model2",
214
+ score="1.8%",
215
+ cancer_type="breast",
216
+ probability_percent=1.8,
217
+ time_horizon_years=5.0,
218
+ score_type="probability",
219
+ ),
220
+ RiskScore(
221
+ name="Model3",
222
+ score="1.7%",
223
+ cancer_type="BREAST",
224
+ probability_percent=1.7,
225
+ time_horizon_years=5.0,
226
+ score_type="probability",
227
+ ),
228
+ ]
229
+
230
+ aggregated = aggregate_probabilities(scores)
231
+
232
+ assert len(aggregated) == 1
233
+ assert aggregated[0].cancer_type == "breast" # normalized to lowercase
234
+ assert aggregated[0].model_count == 3
235
+ assert aggregated[0].avg_probability_percent == pytest.approx(1.6667, abs=0.001)
236
+
237
+
238
+ class TestSeparateScoreTypes:
239
+ """Test score type separation functionality."""
240
+
241
+ def test_separate_all_types(self):
242
+ """Test separation of all three score types."""
243
+ scores = [
244
+ RiskScore(
245
+ name="Gail",
246
+ score="1.5%",
247
+ score_type="probability",
248
+ ),
249
+ RiskScore(
250
+ name="BOADICEA",
251
+ score="2.0%",
252
+ score_type="probability",
253
+ ),
254
+ RiskScore(
255
+ name="PCPT",
256
+ score="No Cancer: 45%",
257
+ score_type="categorical",
258
+ ),
259
+ RiskScore(
260
+ name="Model",
261
+ score="N/A: Age out of range",
262
+ score_type="not_applicable",
263
+ ),
264
+ RiskScore(
265
+ name="Model2",
266
+ score="N/A: Invalid",
267
+ score_type="not_applicable",
268
+ ),
269
+ ]
270
+
271
+ separated = separate_score_types(scores)
272
+
273
+ assert len(separated["probability"]) == 2
274
+ assert len(separated["categorical"]) == 1
275
+ assert len(separated["not_applicable"]) == 2
276
+
277
+ def test_empty_list(self):
278
+ """Test separation with empty list."""
279
+ separated = separate_score_types([])
280
+
281
+ assert separated["probability"] == []
282
+ assert separated["categorical"] == []
283
+ assert separated["not_applicable"] == []
284
+
285
+ def test_only_probabilities(self):
286
+ """Test separation when all scores are probabilities."""
287
+ scores = [
288
+ RiskScore(name="Model1", score="1%", score_type="probability"),
289
+ RiskScore(name="Model2", score="2%", score_type="probability"),
290
+ ]
291
+
292
+ separated = separate_score_types(scores)
293
+
294
+ assert len(separated["probability"]) == 2
295
+ assert separated["categorical"] == []
296
+ assert separated["not_applicable"] == []
297
+
298
+
299
+ class TestFilterFunctions:
300
+ """Test individual filter functions."""
301
+
302
+ def test_separate_score_types_for_probability(self):
303
+ """Test using separate_score_types to get probability scores."""
304
+ scores = [
305
+ RiskScore(name="Model1", score="1%", score_type="probability"),
306
+ RiskScore(name="Model2", score="Result", score_type="categorical"),
307
+ RiskScore(name="Model3", score="2%", score_type="probability"),
308
+ ]
309
+
310
+ separated = separate_score_types(scores)
311
+
312
+ assert len(separated["probability"]) == 2
313
+ assert all(
314
+ score.score_type == "probability" for score in separated["probability"]
315
+ )
316
+
317
+ def test_separate_score_types_for_categorical(self):
318
+ """Test using separate_score_types to get categorical scores."""
319
+ scores = [
320
+ RiskScore(name="Model1", score="1%", score_type="probability"),
321
+ RiskScore(name="Model2", score="Result", score_type="categorical"),
322
+ RiskScore(name="Model3", score="N/A", score_type="not_applicable"),
323
+ ]
324
+
325
+ separated = separate_score_types(scores)
326
+
327
+ assert len(separated["categorical"]) == 1
328
+ assert separated["categorical"][0].score_type == "categorical"
329
+
330
+ def test_separate_score_types_for_not_applicable(self):
331
+ """Test using separate_score_types to get not_applicable scores."""
332
+ scores = [
333
+ RiskScore(name="Model1", score="1%", score_type="probability"),
334
+ RiskScore(
335
+ name="Model2", score="N/A: Reason 1", score_type="not_applicable"
336
+ ),
337
+ RiskScore(
338
+ name="Model3", score="N/A: Reason 2", score_type="not_applicable"
339
+ ),
340
+ ]
341
+
342
+ separated = separate_score_types(scores)
343
+
344
+ assert len(separated["not_applicable"]) == 2
345
+ assert all(
346
+ score.score_type == "not_applicable"
347
+ for score in separated["not_applicable"]
348
+ )
349
+
350
+ def test_separate_score_types_for_all_types(self):
351
+ """Test using separate_score_types to get all score types at once."""
352
+ scores = [
353
+ RiskScore(name="Model1", score="1%", score_type="probability"),
354
+ RiskScore(name="Model2", score="2%", score_type="probability"),
355
+ RiskScore(name="Model3", score="Result", score_type="categorical"),
356
+ RiskScore(name="Model4", score="N/A: Age", score_type="not_applicable"),
357
+ ]
358
+
359
+ separated = separate_score_types(scores)
360
+
361
+ assert len(separated["probability"]) == 2
362
+ assert len(separated["categorical"]) == 1
363
+ assert len(separated["not_applicable"]) == 1
364
+
365
+
366
+ class TestAggregatedRiskDataclass:
367
+ """Test the AggregatedRisk dataclass."""
368
+
369
+ def test_dataclass_creation(self):
370
+ """Test creating an AggregatedRisk object."""
371
+ score = RiskScore(
372
+ name="Gail",
373
+ score="1.5%",
374
+ cancer_type="breast",
375
+ probability_percent=1.5,
376
+ time_horizon_years=5.0,
377
+ score_type="probability",
378
+ )
379
+
380
+ agg = AggregatedRisk(
381
+ cancer_type="breast",
382
+ time_horizon_years=5.0,
383
+ avg_probability_percent=1.5,
384
+ risk_category="Low",
385
+ model_count=1,
386
+ individual_scores=[score],
387
+ )
388
+
389
+ assert agg.cancer_type == "breast"
390
+ assert agg.time_horizon_years == 5.0
391
+ assert agg.avg_probability_percent == 1.5
392
+ assert agg.risk_category == "Low"
393
+ assert agg.model_count == 1
394
+ assert len(agg.individual_scores) == 1
395
+
396
+
397
+ class TestNormalizeCancerType:
398
+ """Test cancer type normalization."""
399
+
400
+ def test_normalize_with_cancer_suffix(self):
401
+ """Test removing 'cancer' suffix."""
402
+ assert normalize_cancer_type("Breast Cancer") == "breast"
403
+ assert normalize_cancer_type("Lung cancer") == "lung"
404
+ assert normalize_cancer_type("PROSTATE CANCER") == "prostate"
405
+
406
+ def test_normalize_without_cancer_suffix(self):
407
+ """Test normalization without 'cancer' suffix."""
408
+ assert normalize_cancer_type("Breast") == "breast"
409
+ assert normalize_cancer_type("LUNG") == "lung"
410
+ assert normalize_cancer_type("Prostate") == "prostate"
411
+
412
+ def test_normalize_with_whitespace(self):
413
+ """Test trimming whitespace."""
414
+ assert normalize_cancer_type(" Breast Cancer ") == "breast"
415
+ assert normalize_cancer_type("Lung cancer") == "lung"
416
+
417
+ def test_normalize_empty_string(self):
418
+ """Test empty string."""
419
+ assert normalize_cancer_type("") == ""
420
+
421
+ def test_display_cancer_type(self):
422
+ """Test display-friendly cancer type names."""
423
+ assert get_display_cancer_type("breast") == "Breast"
424
+ assert get_display_cancer_type("lung") == "Lung"
425
+ assert get_display_cancer_type("prostate") == "Prostate"
426
+
427
+
428
+ class TestCategorizeRisk:
429
+ """Test risk categorization."""
430
+
431
+ def test_categorize_short_horizon_very_low(self):
432
+ """Test very low risk for short time horizon."""
433
+ assert categorize_risk(0.3, 5.0) == "Very Low"
434
+
435
+ def test_categorize_short_horizon_low(self):
436
+ """Test low risk for short time horizon."""
437
+ assert categorize_risk(1.0, 5.0) == "Low"
438
+
439
+ def test_categorize_short_horizon_moderate(self):
440
+ """Test moderate risk for short time horizon."""
441
+ assert categorize_risk(2.0, 5.0) == "Moderate"
442
+
443
+ def test_categorize_short_horizon_moderately_high(self):
444
+ """Test moderately high risk for short time horizon."""
445
+ assert categorize_risk(4.0, 5.0) == "Moderately High"
446
+
447
+ def test_categorize_short_horizon_high(self):
448
+ """Test high risk for short time horizon."""
449
+ assert categorize_risk(6.0, 5.0) == "High"
450
+
451
+ def test_categorize_long_horizon_very_low(self):
452
+ """Test very low risk for long time horizon."""
453
+ assert categorize_risk(0.5, 10.0) == "Very Low"
454
+
455
+ def test_categorize_long_horizon_low(self):
456
+ """Test low risk for long time horizon."""
457
+ assert categorize_risk(2.0, 10.0) == "Low"
458
+
459
+ def test_categorize_long_horizon_moderate(self):
460
+ """Test moderate risk for long time horizon."""
461
+ assert categorize_risk(5.0, 10.0) == "Moderate"
462
+
463
+ def test_categorize_long_horizon_moderately_high(self):
464
+ """Test moderately high risk for long time horizon."""
465
+ assert categorize_risk(10.0, 10.0) == "Moderately High"
466
+
467
+ def test_categorize_long_horizon_high(self):
468
+ """Test high risk for long time horizon."""
469
+ assert categorize_risk(20.0, 10.0) == "High"
470
+
471
+ def test_categorize_lifetime_risk(self):
472
+ """Test categorization for lifetime risk."""
473
+ assert categorize_risk(0.5, 79.0) == "Very Low"
474
+ assert categorize_risk(2.0, 79.0) == "Low"
475
+ assert categorize_risk(5.0, 79.0) == "Moderate"
476
+ assert categorize_risk(12.0, 79.0) == "Moderately High"
477
+ assert categorize_risk(20.0, 79.0) == "High"
tests/test_risk_models/test_gail_model.py CHANGED
@@ -362,6 +362,6 @@ class TestGailModel:
362
  "Gail Model" in self.model.description()
363
  or "BCRAT" in self.model.description()
364
  )
365
- assert "1.66" in self.model.interpretation()
366
  assert isinstance(self.model.references(), list)
367
  assert len(self.model.references()) > 0
 
362
  "Gail Model" in self.model.description()
363
  or "BCRAT" in self.model.description()
364
  )
365
+ assert "1.67" in self.model.interpretation()
366
  assert isinstance(self.model.references(), list)
367
  assert len(self.model.references()) > 0