Spaces:
Running
feat: SPEC_10 - Domain-Agnostic Refactor + License Fix (#87)
Browse files* feat: implement SPEC_10 domain-agnostic refactor
* fix: complete SPEC_10 audit - license + HierarchicalOrchestrator domain
License Fixes:
- LICENSE: Fix copyright "Antibody Training Pipeline ESM" → "DeepBoner Contributors"
- pyproject.toml: Add missing license = "Apache-2.0" field
- README.md: Fix frontmatter license: mit → apache-2.0
Domain Threading Fixes:
- hierarchical.py: Add domain param to __init__ and pass to ResearchTeam
- factory.py: Pass domain to HierarchicalOrchestrator
All 237 tests pass. Domain now properly threaded through all orchestrators.
* test: enhance domain handling in orchestrators and judges
- Updated unit tests for `configure_orchestrator` to include mock mode and free tier scenarios, ensuring the domain is correctly passed to handlers.
- Refactored tests for `JudgeHandler` to mock model retrieval, allowing for domain acceptance without API key requirements.
- Improved `AdvancedOrchestrator` tests to mock API key validation and ensure domain handling is consistent across orchestrators.
All tests pass successfully, reinforcing domain threading in the application.
* fix: CodeRabbit review - trailing comma bug + missing assertion
CRITICAL:
- src/app.py:148: Remove trailing comma that made has_anthropic a tuple
instead of boolean, breaking free tier detection
Minor:
- test_magentic_agents_domain.py: Add assertion to verify domain-specific
judge system prompt is passed through
- LICENSE +201 -0
- README.md +1 -1
- pyproject.toml +1 -0
- src/agent_factory/judges.py +35 -8
- src/agents/magentic_agents.py +41 -16
- src/agents/search_agent.py +4 -1
- src/agents/tools.py +5 -5
- src/app.py +39 -23
- src/config/__init__.py +0 -0
- src/config/domain.py +176 -0
- src/mcp_tools.py +14 -9
- src/orchestrators/advanced.py +10 -5
- src/orchestrators/factory.py +7 -2
- src/orchestrators/hierarchical.py +7 -3
- src/orchestrators/simple.py +7 -2
- src/prompts/hypothesis.py +10 -1
- src/prompts/judge.py +19 -2
- src/prompts/report.py +15 -4
- src/utils/config.py +4 -0
- tests/e2e/test_simple_mode.py +1 -1
- tests/unit/agent_factory/test_judge_domain.py +72 -0
- tests/unit/agents/test_magentic_agents_domain.py +47 -0
- tests/unit/agents/test_search_agent_domain.py +19 -0
- tests/unit/config/test_domain.py +53 -0
- tests/unit/mcp/test_mcp_tools_domain.py +29 -0
- tests/unit/orchestrators/test_advanced_orchestrator_domain.py +51 -0
- tests/unit/orchestrators/test_factory_domain.py +37 -0
- tests/unit/orchestrators/test_simple_orchestrator_domain.py +47 -0
- tests/unit/prompts/test_hypothesis_prompt_domain.py +16 -0
- tests/unit/prompts/test_judge_prompt_domain.py +31 -0
- tests/unit/prompts/test_report_prompt_domain.py +16 -0
- tests/unit/test_app_domain.py +70 -0
- tests/unit/utils/test_config_domain.py +15 -0
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright 2025 DeepBoner Contributors
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
|
@@ -8,7 +8,7 @@ sdk_version: "6.0.1"
|
|
| 8 |
python_version: "3.11"
|
| 9 |
app_file: src/app.py
|
| 10 |
pinned: true
|
| 11 |
-
license:
|
| 12 |
short_description: "Deep Research Agent for the Strongest Boners 💪🔬"
|
| 13 |
tags:
|
| 14 |
- mcp-in-action-track-enterprise
|
|
|
|
| 8 |
python_version: "3.11"
|
| 9 |
app_file: src/app.py
|
| 10 |
pinned: true
|
| 11 |
+
license: apache-2.0
|
| 12 |
short_description: "Deep Research Agent for the Strongest Boners 💪🔬"
|
| 13 |
tags:
|
| 14 |
- mcp-in-action-track-enterprise
|
|
@@ -3,6 +3,7 @@ name = "deepboner"
|
|
| 3 |
version = "0.1.0"
|
| 4 |
description = "AI-Native Sexual Health Research Agent"
|
| 5 |
readme = "README.md"
|
|
|
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
| 8 |
# Core
|
|
|
|
| 3 |
version = "0.1.0"
|
| 4 |
description = "AI-Native Sexual Health Research Agent"
|
| 5 |
readme = "README.md"
|
| 6 |
+
license = "Apache-2.0"
|
| 7 |
requires-python = ">=3.11"
|
| 8 |
dependencies = [
|
| 9 |
# Core
|
|
@@ -15,10 +15,11 @@ from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
|
| 15 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 16 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 17 |
|
|
|
|
| 18 |
from src.prompts.judge import (
|
| 19 |
-
SYSTEM_PROMPT,
|
| 20 |
format_empty_evidence_prompt,
|
| 21 |
format_user_prompt,
|
|
|
|
| 22 |
select_evidence_for_judge,
|
| 23 |
)
|
| 24 |
from src.utils.config import settings
|
|
@@ -84,18 +85,24 @@ class JudgeHandler:
|
|
| 84 |
Uses PydanticAI to ensure responses match the JudgeAssessment schema.
|
| 85 |
"""
|
| 86 |
|
| 87 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
"""
|
| 89 |
Initialize the JudgeHandler.
|
| 90 |
|
| 91 |
Args:
|
| 92 |
model: Optional PydanticAI model. If None, uses config default.
|
|
|
|
| 93 |
"""
|
| 94 |
self.model = model or get_model()
|
|
|
|
| 95 |
self.agent = Agent(
|
| 96 |
model=self.model,
|
| 97 |
output_type=JudgeAssessment,
|
| 98 |
-
system_prompt=
|
| 99 |
retries=3,
|
| 100 |
)
|
| 101 |
|
|
@@ -126,6 +133,7 @@ class JudgeHandler:
|
|
| 126 |
question=question[:100],
|
| 127 |
evidence_count=len(evidence),
|
| 128 |
iteration=iteration,
|
|
|
|
| 129 |
)
|
| 130 |
|
| 131 |
# Format the prompt based on whether we have evidence
|
|
@@ -138,6 +146,7 @@ class JudgeHandler:
|
|
| 138 |
iteration,
|
| 139 |
max_iterations,
|
| 140 |
total_evidence_count=len(evidence),
|
|
|
|
| 141 |
)
|
| 142 |
else:
|
| 143 |
user_prompt = format_empty_evidence_prompt(question)
|
|
@@ -213,14 +222,20 @@ class HFInferenceJudgeHandler:
|
|
| 213 |
# Rationale: 3 models x 3 retries each = 9 total API attempts before circuit break
|
| 214 |
MAX_CONSECUTIVE_FAILURES: ClassVar[int] = 3
|
| 215 |
|
| 216 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
"""
|
| 218 |
Initialize with HF Inference client.
|
| 219 |
|
| 220 |
Args:
|
| 221 |
model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
|
|
|
|
| 222 |
"""
|
| 223 |
self.model_id = model_id
|
|
|
|
| 224 |
# Will automatically use HF_TOKEN from env if available
|
| 225 |
self.client = InferenceClient()
|
| 226 |
self.call_count = 0
|
|
@@ -269,6 +284,7 @@ class HFInferenceJudgeHandler:
|
|
| 269 |
iteration,
|
| 270 |
max_iterations,
|
| 271 |
total_evidence_count=len(evidence),
|
|
|
|
| 272 |
)
|
| 273 |
else:
|
| 274 |
user_prompt = format_empty_evidence_prompt(question)
|
|
@@ -314,12 +330,13 @@ class HFInferenceJudgeHandler:
|
|
| 314 |
async def _call_with_retry(self, model: str, prompt: str, question: str) -> JudgeAssessment:
|
| 315 |
"""Make API call with retry logic using chat_completion."""
|
| 316 |
loop = asyncio.get_running_loop()
|
|
|
|
| 317 |
|
| 318 |
# Build messages for chat_completion (model-agnostic)
|
| 319 |
messages = [
|
| 320 |
{
|
| 321 |
"role": "system",
|
| 322 |
-
"content": f"""{
|
| 323 |
|
| 324 |
IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
| 325 |
{{
|
|
@@ -420,7 +437,9 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 420 |
return None
|
| 421 |
|
| 422 |
def _create_quota_exhausted_assessment(
|
| 423 |
-
self,
|
|
|
|
|
|
|
| 424 |
) -> JudgeAssessment:
|
| 425 |
"""Create an assessment that stops the loop when quota is exhausted."""
|
| 426 |
findings = _extract_titles_from_evidence(
|
|
@@ -455,7 +474,9 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 455 |
)
|
| 456 |
|
| 457 |
def _create_forced_synthesis_assessment(
|
| 458 |
-
self,
|
|
|
|
|
|
|
| 459 |
) -> JudgeAssessment:
|
| 460 |
"""Force synthesis after repeated failures to prevent infinite loops."""
|
| 461 |
findings = _extract_titles_from_evidence(
|
|
@@ -524,14 +545,20 @@ class MockJudgeHandler:
|
|
| 524 |
to provide a useful demo experience without requiring API keys.
|
| 525 |
"""
|
| 526 |
|
| 527 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
"""
|
| 529 |
Initialize with optional mock response.
|
| 530 |
|
| 531 |
Args:
|
| 532 |
mock_response: The assessment to return. If None, extracts from evidence.
|
|
|
|
| 533 |
"""
|
| 534 |
self.mock_response = mock_response
|
|
|
|
| 535 |
self.call_count = 0
|
| 536 |
self.last_question: str | None = None
|
| 537 |
self.last_evidence: list[Evidence] | None = None
|
|
|
|
| 15 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 16 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
| 17 |
|
| 18 |
+
from src.config.domain import ResearchDomain
|
| 19 |
from src.prompts.judge import (
|
|
|
|
| 20 |
format_empty_evidence_prompt,
|
| 21 |
format_user_prompt,
|
| 22 |
+
get_system_prompt,
|
| 23 |
select_evidence_for_judge,
|
| 24 |
)
|
| 25 |
from src.utils.config import settings
|
|
|
|
| 85 |
Uses PydanticAI to ensure responses match the JudgeAssessment schema.
|
| 86 |
"""
|
| 87 |
|
| 88 |
+
def __init__(
|
| 89 |
+
self,
|
| 90 |
+
model: Any = None,
|
| 91 |
+
domain: ResearchDomain | str | None = None,
|
| 92 |
+
) -> None:
|
| 93 |
"""
|
| 94 |
Initialize the JudgeHandler.
|
| 95 |
|
| 96 |
Args:
|
| 97 |
model: Optional PydanticAI model. If None, uses config default.
|
| 98 |
+
domain: Research domain for prompt customization.
|
| 99 |
"""
|
| 100 |
self.model = model or get_model()
|
| 101 |
+
self.domain = domain
|
| 102 |
self.agent = Agent(
|
| 103 |
model=self.model,
|
| 104 |
output_type=JudgeAssessment,
|
| 105 |
+
system_prompt=get_system_prompt(domain),
|
| 106 |
retries=3,
|
| 107 |
)
|
| 108 |
|
|
|
|
| 133 |
question=question[:100],
|
| 134 |
evidence_count=len(evidence),
|
| 135 |
iteration=iteration,
|
| 136 |
+
domain=self.domain,
|
| 137 |
)
|
| 138 |
|
| 139 |
# Format the prompt based on whether we have evidence
|
|
|
|
| 146 |
iteration,
|
| 147 |
max_iterations,
|
| 148 |
total_evidence_count=len(evidence),
|
| 149 |
+
domain=self.domain,
|
| 150 |
)
|
| 151 |
else:
|
| 152 |
user_prompt = format_empty_evidence_prompt(question)
|
|
|
|
| 222 |
# Rationale: 3 models x 3 retries each = 9 total API attempts before circuit break
|
| 223 |
MAX_CONSECUTIVE_FAILURES: ClassVar[int] = 3
|
| 224 |
|
| 225 |
+
def __init__(
|
| 226 |
+
self,
|
| 227 |
+
model_id: str | None = None,
|
| 228 |
+
domain: ResearchDomain | str | None = None,
|
| 229 |
+
) -> None:
|
| 230 |
"""
|
| 231 |
Initialize with HF Inference client.
|
| 232 |
|
| 233 |
Args:
|
| 234 |
model_id: Optional specific model ID. If None, uses FALLBACK_MODELS chain.
|
| 235 |
+
domain: Research domain for prompt customization.
|
| 236 |
"""
|
| 237 |
self.model_id = model_id
|
| 238 |
+
self.domain = domain
|
| 239 |
# Will automatically use HF_TOKEN from env if available
|
| 240 |
self.client = InferenceClient()
|
| 241 |
self.call_count = 0
|
|
|
|
| 284 |
iteration,
|
| 285 |
max_iterations,
|
| 286 |
total_evidence_count=len(evidence),
|
| 287 |
+
domain=self.domain,
|
| 288 |
)
|
| 289 |
else:
|
| 290 |
user_prompt = format_empty_evidence_prompt(question)
|
|
|
|
| 330 |
async def _call_with_retry(self, model: str, prompt: str, question: str) -> JudgeAssessment:
|
| 331 |
"""Make API call with retry logic using chat_completion."""
|
| 332 |
loop = asyncio.get_running_loop()
|
| 333 |
+
system_prompt = get_system_prompt(self.domain)
|
| 334 |
|
| 335 |
# Build messages for chat_completion (model-agnostic)
|
| 336 |
messages = [
|
| 337 |
{
|
| 338 |
"role": "system",
|
| 339 |
+
"content": f"""{system_prompt}
|
| 340 |
|
| 341 |
IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
| 342 |
{{
|
|
|
|
| 437 |
return None
|
| 438 |
|
| 439 |
def _create_quota_exhausted_assessment(
|
| 440 |
+
self,
|
| 441 |
+
question: str,
|
| 442 |
+
evidence: list[Evidence],
|
| 443 |
) -> JudgeAssessment:
|
| 444 |
"""Create an assessment that stops the loop when quota is exhausted."""
|
| 445 |
findings = _extract_titles_from_evidence(
|
|
|
|
| 474 |
)
|
| 475 |
|
| 476 |
def _create_forced_synthesis_assessment(
|
| 477 |
+
self,
|
| 478 |
+
question: str,
|
| 479 |
+
evidence: list[Evidence],
|
| 480 |
) -> JudgeAssessment:
|
| 481 |
"""Force synthesis after repeated failures to prevent infinite loops."""
|
| 482 |
findings = _extract_titles_from_evidence(
|
|
|
|
| 545 |
to provide a useful demo experience without requiring API keys.
|
| 546 |
"""
|
| 547 |
|
| 548 |
+
def __init__(
|
| 549 |
+
self,
|
| 550 |
+
mock_response: JudgeAssessment | None = None,
|
| 551 |
+
domain: ResearchDomain | str | None = None,
|
| 552 |
+
) -> None:
|
| 553 |
"""
|
| 554 |
Initialize with optional mock response.
|
| 555 |
|
| 556 |
Args:
|
| 557 |
mock_response: The assessment to return. If None, extracts from evidence.
|
| 558 |
+
domain: Research domain (ignored in mock but kept for interface compatibility).
|
| 559 |
"""
|
| 560 |
self.mock_response = mock_response
|
| 561 |
+
self.domain = domain
|
| 562 |
self.call_count = 0
|
| 563 |
self.last_question: str | None = None
|
| 564 |
self.last_evidence: list[Evidence] | None = None
|
|
@@ -9,14 +9,19 @@ from src.agents.tools import (
|
|
| 9 |
search_preprints,
|
| 10 |
search_pubmed,
|
| 11 |
)
|
|
|
|
| 12 |
from src.utils.config import settings
|
| 13 |
|
| 14 |
|
| 15 |
-
def create_search_agent(
|
|
|
|
|
|
|
|
|
|
| 16 |
"""Create a search agent with internal LLM and search tools.
|
| 17 |
|
| 18 |
Args:
|
| 19 |
chat_client: Optional custom chat client. If None, uses default.
|
|
|
|
| 20 |
|
| 21 |
Returns:
|
| 22 |
ChatAgent configured for biomedical search
|
|
@@ -25,14 +30,12 @@ def create_search_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgen
|
|
| 25 |
model_id=settings.openai_model, # Use configured model
|
| 26 |
api_key=settings.openai_api_key,
|
| 27 |
)
|
|
|
|
| 28 |
|
| 29 |
return ChatAgent(
|
| 30 |
name="SearchAgent",
|
| 31 |
-
description=
|
| 32 |
-
|
| 33 |
-
"for drug repurposing evidence"
|
| 34 |
-
),
|
| 35 |
-
instructions="""You are a biomedical search specialist. When asked to find evidence:
|
| 36 |
|
| 37 |
1. Analyze the request to determine what to search for
|
| 38 |
2. Extract key search terms (drug names, disease names, mechanisms)
|
|
@@ -43,18 +46,23 @@ def create_search_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgen
|
|
| 43 |
4. Summarize what you found and highlight key evidence
|
| 44 |
|
| 45 |
Be thorough - search multiple databases when appropriate.
|
| 46 |
-
Focus on finding: mechanisms of action, clinical evidence, and specific
|
|
|
|
| 47 |
chat_client=client,
|
| 48 |
tools=[search_pubmed, search_clinical_trials, search_preprints],
|
| 49 |
temperature=1.0, # Explicitly set for reasoning model compatibility (o1/o3)
|
| 50 |
)
|
| 51 |
|
| 52 |
|
| 53 |
-
def create_judge_agent(
|
|
|
|
|
|
|
|
|
|
| 54 |
"""Create a judge agent that evaluates evidence quality.
|
| 55 |
|
| 56 |
Args:
|
| 57 |
chat_client: Optional custom chat client. If None, uses default.
|
|
|
|
| 58 |
|
| 59 |
Returns:
|
| 60 |
ChatAgent configured for evidence assessment
|
|
@@ -63,11 +71,14 @@ def create_judge_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent
|
|
| 63 |
model_id=settings.openai_model,
|
| 64 |
api_key=settings.openai_api_key,
|
| 65 |
)
|
|
|
|
| 66 |
|
| 67 |
return ChatAgent(
|
| 68 |
name="JudgeAgent",
|
| 69 |
description="Evaluates evidence quality and determines if sufficient for synthesis",
|
| 70 |
-
instructions="""
|
|
|
|
|
|
|
| 71 |
|
| 72 |
1. Review all evidence presented in the conversation
|
| 73 |
2. Score on two dimensions (0-10 each):
|
|
@@ -89,11 +100,15 @@ Be rigorous but fair. Look for:
|
|
| 89 |
)
|
| 90 |
|
| 91 |
|
| 92 |
-
def create_hypothesis_agent(
|
|
|
|
|
|
|
|
|
|
| 93 |
"""Create a hypothesis generation agent.
|
| 94 |
|
| 95 |
Args:
|
| 96 |
chat_client: Optional custom chat client. If None, uses default.
|
|
|
|
| 97 |
|
| 98 |
Returns:
|
| 99 |
ChatAgent configured for hypothesis generation
|
|
@@ -102,11 +117,14 @@ def create_hypothesis_agent(chat_client: OpenAIChatClient | None = None) -> Chat
|
|
| 102 |
model_id=settings.openai_model,
|
| 103 |
api_key=settings.openai_api_key,
|
| 104 |
)
|
|
|
|
| 105 |
|
| 106 |
return ChatAgent(
|
| 107 |
name="HypothesisAgent",
|
| 108 |
-
description=
|
| 109 |
-
instructions="""
|
|
|
|
|
|
|
| 110 |
|
| 111 |
1. Identify the key molecular targets involved
|
| 112 |
2. Map the biological pathways affected
|
|
@@ -126,11 +144,15 @@ Focus on mechanistic plausibility and existing evidence.""",
|
|
| 126 |
)
|
| 127 |
|
| 128 |
|
| 129 |
-
def create_report_agent(
|
|
|
|
|
|
|
|
|
|
| 130 |
"""Create a report synthesis agent.
|
| 131 |
|
| 132 |
Args:
|
| 133 |
chat_client: Optional custom chat client. If None, uses default.
|
|
|
|
| 134 |
|
| 135 |
Returns:
|
| 136 |
ChatAgent configured for report generation
|
|
@@ -139,11 +161,14 @@ def create_report_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgen
|
|
| 139 |
model_id=settings.openai_model,
|
| 140 |
api_key=settings.openai_api_key,
|
| 141 |
)
|
|
|
|
| 142 |
|
| 143 |
return ChatAgent(
|
| 144 |
name="ReportAgent",
|
| 145 |
description="Synthesizes research findings into structured reports",
|
| 146 |
-
instructions="""
|
|
|
|
|
|
|
| 147 |
|
| 148 |
Generate a structured report with these sections:
|
| 149 |
|
|
@@ -164,8 +189,8 @@ Databases searched, queries used, evidence reviewed
|
|
| 164 |
- Clinical trials
|
| 165 |
- Safety profile
|
| 166 |
|
| 167 |
-
##
|
| 168 |
-
List specific
|
| 169 |
|
| 170 |
## Limitations
|
| 171 |
Gaps in evidence, conflicting data, caveats
|
|
|
|
| 9 |
search_preprints,
|
| 10 |
search_pubmed,
|
| 11 |
)
|
| 12 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 13 |
from src.utils.config import settings
|
| 14 |
|
| 15 |
|
| 16 |
+
def create_search_agent(
|
| 17 |
+
chat_client: OpenAIChatClient | None = None,
|
| 18 |
+
domain: ResearchDomain | str | None = None,
|
| 19 |
+
) -> ChatAgent:
|
| 20 |
"""Create a search agent with internal LLM and search tools.
|
| 21 |
|
| 22 |
Args:
|
| 23 |
chat_client: Optional custom chat client. If None, uses default.
|
| 24 |
+
domain: Research domain for customization.
|
| 25 |
|
| 26 |
Returns:
|
| 27 |
ChatAgent configured for biomedical search
|
|
|
|
| 30 |
model_id=settings.openai_model, # Use configured model
|
| 31 |
api_key=settings.openai_api_key,
|
| 32 |
)
|
| 33 |
+
config = get_domain_config(domain)
|
| 34 |
|
| 35 |
return ChatAgent(
|
| 36 |
name="SearchAgent",
|
| 37 |
+
description=config.search_agent_description,
|
| 38 |
+
instructions=f"""You are a biomedical search specialist. When asked to find evidence:
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
1. Analyze the request to determine what to search for
|
| 41 |
2. Extract key search terms (drug names, disease names, mechanisms)
|
|
|
|
| 46 |
4. Summarize what you found and highlight key evidence
|
| 47 |
|
| 48 |
Be thorough - search multiple databases when appropriate.
|
| 49 |
+
Focus on finding: mechanisms of action, clinical evidence, and specific findings
|
| 50 |
+
related to {config.name}.""",
|
| 51 |
chat_client=client,
|
| 52 |
tools=[search_pubmed, search_clinical_trials, search_preprints],
|
| 53 |
temperature=1.0, # Explicitly set for reasoning model compatibility (o1/o3)
|
| 54 |
)
|
| 55 |
|
| 56 |
|
| 57 |
+
def create_judge_agent(
|
| 58 |
+
chat_client: OpenAIChatClient | None = None,
|
| 59 |
+
domain: ResearchDomain | str | None = None,
|
| 60 |
+
) -> ChatAgent:
|
| 61 |
"""Create a judge agent that evaluates evidence quality.
|
| 62 |
|
| 63 |
Args:
|
| 64 |
chat_client: Optional custom chat client. If None, uses default.
|
| 65 |
+
domain: Research domain for customization.
|
| 66 |
|
| 67 |
Returns:
|
| 68 |
ChatAgent configured for evidence assessment
|
|
|
|
| 71 |
model_id=settings.openai_model,
|
| 72 |
api_key=settings.openai_api_key,
|
| 73 |
)
|
| 74 |
+
config = get_domain_config(domain)
|
| 75 |
|
| 76 |
return ChatAgent(
|
| 77 |
name="JudgeAgent",
|
| 78 |
description="Evaluates evidence quality and determines if sufficient for synthesis",
|
| 79 |
+
instructions=f"""{config.judge_system_prompt}
|
| 80 |
+
|
| 81 |
+
When asked to evaluate:
|
| 82 |
|
| 83 |
1. Review all evidence presented in the conversation
|
| 84 |
2. Score on two dimensions (0-10 each):
|
|
|
|
| 100 |
)
|
| 101 |
|
| 102 |
|
| 103 |
+
def create_hypothesis_agent(
|
| 104 |
+
chat_client: OpenAIChatClient | None = None,
|
| 105 |
+
domain: ResearchDomain | str | None = None,
|
| 106 |
+
) -> ChatAgent:
|
| 107 |
"""Create a hypothesis generation agent.
|
| 108 |
|
| 109 |
Args:
|
| 110 |
chat_client: Optional custom chat client. If None, uses default.
|
| 111 |
+
domain: Research domain for customization.
|
| 112 |
|
| 113 |
Returns:
|
| 114 |
ChatAgent configured for hypothesis generation
|
|
|
|
| 117 |
model_id=settings.openai_model,
|
| 118 |
api_key=settings.openai_api_key,
|
| 119 |
)
|
| 120 |
+
config = get_domain_config(domain)
|
| 121 |
|
| 122 |
return ChatAgent(
|
| 123 |
name="HypothesisAgent",
|
| 124 |
+
description=config.hypothesis_agent_description,
|
| 125 |
+
instructions=f"""{config.hypothesis_system_prompt}
|
| 126 |
+
|
| 127 |
+
Based on evidence:
|
| 128 |
|
| 129 |
1. Identify the key molecular targets involved
|
| 130 |
2. Map the biological pathways affected
|
|
|
|
| 144 |
)
|
| 145 |
|
| 146 |
|
| 147 |
+
def create_report_agent(
|
| 148 |
+
chat_client: OpenAIChatClient | None = None,
|
| 149 |
+
domain: ResearchDomain | str | None = None,
|
| 150 |
+
) -> ChatAgent:
|
| 151 |
"""Create a report synthesis agent.
|
| 152 |
|
| 153 |
Args:
|
| 154 |
chat_client: Optional custom chat client. If None, uses default.
|
| 155 |
+
domain: Research domain for customization.
|
| 156 |
|
| 157 |
Returns:
|
| 158 |
ChatAgent configured for report generation
|
|
|
|
| 161 |
model_id=settings.openai_model,
|
| 162 |
api_key=settings.openai_api_key,
|
| 163 |
)
|
| 164 |
+
config = get_domain_config(domain)
|
| 165 |
|
| 166 |
return ChatAgent(
|
| 167 |
name="ReportAgent",
|
| 168 |
description="Synthesizes research findings into structured reports",
|
| 169 |
+
instructions=f"""{config.report_system_prompt}
|
| 170 |
+
|
| 171 |
+
When asked to synthesize:
|
| 172 |
|
| 173 |
Generate a structured report with these sections:
|
| 174 |
|
|
|
|
| 189 |
- Clinical trials
|
| 190 |
- Safety profile
|
| 191 |
|
| 192 |
+
## Candidates
|
| 193 |
+
List specific candidates with potential
|
| 194 |
|
| 195 |
## Limitations
|
| 196 |
Gaps in evidence, conflicting data, caveats
|
|
@@ -10,6 +10,7 @@ from agent_framework import (
|
|
| 10 |
Role,
|
| 11 |
)
|
| 12 |
|
|
|
|
| 13 |
from src.orchestrators import SearchHandlerProtocol
|
| 14 |
from src.utils.models import Citation, Evidence, SearchResult
|
| 15 |
|
|
@@ -25,10 +26,12 @@ class SearchAgent(BaseAgent): # type: ignore[misc]
|
|
| 25 |
search_handler: SearchHandlerProtocol,
|
| 26 |
evidence_store: dict[str, list[Evidence]],
|
| 27 |
embedding_service: "EmbeddingService | None" = None,
|
|
|
|
| 28 |
) -> None:
|
|
|
|
| 29 |
super().__init__(
|
| 30 |
name="SearchAgent",
|
| 31 |
-
description=
|
| 32 |
)
|
| 33 |
self._handler = search_handler
|
| 34 |
self._evidence_store = evidence_store
|
|
|
|
| 10 |
Role,
|
| 11 |
)
|
| 12 |
|
| 13 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 14 |
from src.orchestrators import SearchHandlerProtocol
|
| 15 |
from src.utils.models import Citation, Evidence, SearchResult
|
| 16 |
|
|
|
|
| 26 |
search_handler: SearchHandlerProtocol,
|
| 27 |
evidence_store: dict[str, list[Evidence]],
|
| 28 |
embedding_service: "EmbeddingService | None" = None,
|
| 29 |
+
domain: ResearchDomain | str | None = None,
|
| 30 |
) -> None:
|
| 31 |
+
config = get_domain_config(domain)
|
| 32 |
super().__init__(
|
| 33 |
name="SearchAgent",
|
| 34 |
+
description=config.search_agent_description,
|
| 35 |
)
|
| 36 |
self._handler = search_handler
|
| 37 |
self._evidence_store = evidence_store
|
|
@@ -17,7 +17,7 @@ _clinicaltrials = ClinicalTrialsTool()
|
|
| 17 |
_europepmc = EuropePMCTool()
|
| 18 |
|
| 19 |
|
| 20 |
-
@ai_function # type: ignore[arg-type, misc]
|
| 21 |
async def search_pubmed(query: str, max_results: int = 10) -> str:
|
| 22 |
"""Search PubMed for biomedical research papers.
|
| 23 |
|
|
@@ -77,12 +77,12 @@ async def search_pubmed(query: str, max_results: int = 10) -> str:
|
|
| 77 |
return "\n".join(output)
|
| 78 |
|
| 79 |
|
| 80 |
-
@ai_function # type: ignore[arg-type, misc]
|
| 81 |
async def search_clinical_trials(query: str, max_results: int = 10) -> str:
|
| 82 |
"""Search ClinicalTrials.gov for clinical studies.
|
| 83 |
|
| 84 |
Use this tool to find ongoing and completed clinical trials
|
| 85 |
-
for
|
| 86 |
|
| 87 |
Args:
|
| 88 |
query: Search terms (e.g., "metformin cancer phase 3")
|
|
@@ -117,7 +117,7 @@ async def search_clinical_trials(query: str, max_results: int = 10) -> str:
|
|
| 117 |
return "\n".join(output)
|
| 118 |
|
| 119 |
|
| 120 |
-
@ai_function # type: ignore[arg-type, misc]
|
| 121 |
async def search_preprints(query: str, max_results: int = 10) -> str:
|
| 122 |
"""Search Europe PMC for preprints and papers.
|
| 123 |
|
|
@@ -157,7 +157,7 @@ async def search_preprints(query: str, max_results: int = 10) -> str:
|
|
| 157 |
return "\n".join(output)
|
| 158 |
|
| 159 |
|
| 160 |
-
@ai_function # type: ignore[arg-type, misc]
|
| 161 |
async def get_bibliography() -> str:
|
| 162 |
"""Get the full list of collected evidence for the bibliography.
|
| 163 |
|
|
|
|
| 17 |
_europepmc = EuropePMCTool()
|
| 18 |
|
| 19 |
|
| 20 |
+
@ai_function # type: ignore[arg-type, misc, untyped-decorator]
|
| 21 |
async def search_pubmed(query: str, max_results: int = 10) -> str:
|
| 22 |
"""Search PubMed for biomedical research papers.
|
| 23 |
|
|
|
|
| 77 |
return "\n".join(output)
|
| 78 |
|
| 79 |
|
| 80 |
+
@ai_function # type: ignore[arg-type, misc, untyped-decorator]
|
| 81 |
async def search_clinical_trials(query: str, max_results: int = 10) -> str:
|
| 82 |
"""Search ClinicalTrials.gov for clinical studies.
|
| 83 |
|
| 84 |
Use this tool to find ongoing and completed clinical trials
|
| 85 |
+
for potential interventions.
|
| 86 |
|
| 87 |
Args:
|
| 88 |
query: Search terms (e.g., "metformin cancer phase 3")
|
|
|
|
| 117 |
return "\n".join(output)
|
| 118 |
|
| 119 |
|
| 120 |
+
@ai_function # type: ignore[arg-type, misc, untyped-decorator]
|
| 121 |
async def search_preprints(query: str, max_results: int = 10) -> str:
|
| 122 |
"""Search Europe PMC for preprints and papers.
|
| 123 |
|
|
|
|
| 157 |
return "\n".join(output)
|
| 158 |
|
| 159 |
|
| 160 |
+
@ai_function # type: ignore[arg-type, misc, untyped-decorator]
|
| 161 |
async def get_bibliography() -> str:
|
| 162 |
"""Get the full list of collected evidence for the bibliography.
|
| 163 |
|
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"
|
| 2 |
|
| 3 |
import os
|
| 4 |
from collections.abc import AsyncGenerator
|
|
@@ -11,6 +11,7 @@ from pydantic_ai.providers.anthropic import AnthropicProvider
|
|
| 11 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 12 |
|
| 13 |
from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
|
|
|
|
| 14 |
from src.orchestrators import create_orchestrator
|
| 15 |
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 16 |
from src.tools.europepmc import EuropePMCTool
|
|
@@ -26,6 +27,7 @@ def configure_orchestrator(
|
|
| 26 |
use_mock: bool = False,
|
| 27 |
mode: str = "simple",
|
| 28 |
user_api_key: str | None = None,
|
|
|
|
| 29 |
) -> tuple[Any, str]:
|
| 30 |
"""
|
| 31 |
Create an orchestrator instance.
|
|
@@ -34,6 +36,7 @@ def configure_orchestrator(
|
|
| 34 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 35 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 36 |
user_api_key: Optional user-provided API key (BYOK) - auto-detects provider
|
|
|
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
Tuple of (Orchestrator instance, backend_name)
|
|
@@ -56,7 +59,7 @@ def configure_orchestrator(
|
|
| 56 |
|
| 57 |
# 1. Forced Mock (Unit Testing)
|
| 58 |
if use_mock:
|
| 59 |
-
judge_handler = MockJudgeHandler()
|
| 60 |
backend_info = "Mock (Testing)"
|
| 61 |
|
| 62 |
# 2. Paid API Key (User provided or Env)
|
|
@@ -77,20 +80,20 @@ def configure_orchestrator(
|
|
| 77 |
raise ConfigurationError(
|
| 78 |
"Invalid API key format. Expected sk-... (OpenAI) or sk-ant-... (Anthropic)"
|
| 79 |
)
|
| 80 |
-
judge_handler = JudgeHandler(model=model)
|
| 81 |
|
| 82 |
# 3. Environment API Keys (fallback)
|
| 83 |
elif os.getenv("OPENAI_API_KEY"):
|
| 84 |
-
judge_handler = JudgeHandler(model=None) # Uses env key
|
| 85 |
backend_info = "Paid API (OpenAI from env)"
|
| 86 |
|
| 87 |
elif os.getenv("ANTHROPIC_API_KEY"):
|
| 88 |
-
judge_handler = JudgeHandler(model=None) # Uses env key
|
| 89 |
backend_info = "Paid API (Anthropic from env)"
|
| 90 |
|
| 91 |
# 4. Free Tier (HuggingFace Inference)
|
| 92 |
else:
|
| 93 |
-
judge_handler = HFInferenceJudgeHandler()
|
| 94 |
backend_info = "Free Tier (Llama 3.1 / Mistral)"
|
| 95 |
|
| 96 |
orchestrator = create_orchestrator(
|
|
@@ -99,6 +102,7 @@ def configure_orchestrator(
|
|
| 99 |
config=config,
|
| 100 |
mode=mode, # type: ignore
|
| 101 |
api_key=user_api_key,
|
|
|
|
| 102 |
)
|
| 103 |
|
| 104 |
return orchestrator, backend_info
|
|
@@ -108,6 +112,7 @@ async def research_agent(
|
|
| 108 |
message: str,
|
| 109 |
history: list[dict[str, Any]],
|
| 110 |
mode: str = "simple",
|
|
|
|
| 111 |
api_key: str = "",
|
| 112 |
api_key_state: str = "",
|
| 113 |
) -> AsyncGenerator[str, None]:
|
|
@@ -118,6 +123,7 @@ async def research_agent(
|
|
| 118 |
message: User's research question
|
| 119 |
history: Chat history (Gradio format)
|
| 120 |
mode: Orchestrator mode ("simple" or "advanced")
|
|
|
|
| 121 |
api_key: Optional user-provided API key (BYOK - auto-detects provider)
|
| 122 |
api_key_state: Persistent API key state (survives example clicks)
|
| 123 |
|
|
@@ -132,6 +138,7 @@ async def research_agent(
|
|
| 132 |
# Gradio passes None for missing example columns, overriding defaults
|
| 133 |
api_key_str = api_key or ""
|
| 134 |
api_key_state_str = api_key_state or ""
|
|
|
|
| 135 |
|
| 136 |
# BUG FIX: Prefer freshly-entered key, then persisted state
|
| 137 |
user_api_key = (api_key_str.strip() or api_key_state_str.strip()) or None
|
|
@@ -172,11 +179,12 @@ async def research_agent(
|
|
| 172 |
use_mock=False, # Never use mock in production - HF Inference is the free fallback
|
| 173 |
mode=mode,
|
| 174 |
user_api_key=user_api_key,
|
|
|
|
| 175 |
)
|
| 176 |
|
| 177 |
# Immediate backend info + loading feedback so user knows something is happening
|
| 178 |
yield (
|
| 179 |
-
f"🧠 **Backend**: {backend_name}\n\n"
|
| 180 |
"⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC, OpenAlex...\n"
|
| 181 |
)
|
| 182 |
|
|
@@ -231,34 +239,39 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
|
|
| 231 |
api_key_state = gr.State("")
|
| 232 |
|
| 233 |
# 1. Unwrapped ChatInterface (Fixes Accordion Bug)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
demo = gr.ChatInterface(
|
| 235 |
fn=research_agent,
|
| 236 |
title="🍆 DeepBoner",
|
| 237 |
-
description=
|
| 238 |
-
"*AI-Powered Sexual Health Research Agent — searches PubMed, "
|
| 239 |
-
"ClinicalTrials.gov, Europe PMC & OpenAlex*\n\n"
|
| 240 |
-
"Deep research for sexual wellness, ED treatments, hormone therapy, "
|
| 241 |
-
"libido, and reproductive health - for all genders.\n\n"
|
| 242 |
-
"---\n"
|
| 243 |
-
"*Research tool only — not for medical advice.* \n"
|
| 244 |
-
"**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`"
|
| 245 |
-
),
|
| 246 |
examples=[
|
| 247 |
[
|
| 248 |
"What drugs improve female libido post-menopause?",
|
| 249 |
"simple",
|
|
|
|
| 250 |
None,
|
| 251 |
None,
|
| 252 |
],
|
| 253 |
[
|
| 254 |
-
"
|
| 255 |
-
"
|
|
|
|
| 256 |
None,
|
| 257 |
None,
|
| 258 |
],
|
| 259 |
[
|
| 260 |
-
"
|
| 261 |
-
"
|
|
|
|
| 262 |
None,
|
| 263 |
None,
|
| 264 |
],
|
|
@@ -271,6 +284,12 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
|
|
| 271 |
label="Orchestrator Mode",
|
| 272 |
info="⚡ Simple: Free/Any | 🔬 Advanced: OpenAI (Deep Research)",
|
| 273 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
gr.Textbox(
|
| 275 |
label="🔑 API Key (Optional)",
|
| 276 |
placeholder="sk-... (OpenAI) or sk-ant-... (Anthropic)",
|
|
@@ -281,9 +300,6 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
|
|
| 281 |
],
|
| 282 |
)
|
| 283 |
|
| 284 |
-
# API key persists because examples include [message, mode, None, None].
|
| 285 |
-
# The explicit None values tell Gradio to NOT overwrite those inputs.
|
| 286 |
-
|
| 287 |
return demo, additional_inputs_accordion
|
| 288 |
|
| 289 |
|
|
|
|
| 1 |
+
"Gradio UI for DeepBoner agent with MCP server support."
|
| 2 |
|
| 3 |
import os
|
| 4 |
from collections.abc import AsyncGenerator
|
|
|
|
| 11 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 12 |
|
| 13 |
from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
|
| 14 |
+
from src.config.domain import ResearchDomain
|
| 15 |
from src.orchestrators import create_orchestrator
|
| 16 |
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 17 |
from src.tools.europepmc import EuropePMCTool
|
|
|
|
| 27 |
use_mock: bool = False,
|
| 28 |
mode: str = "simple",
|
| 29 |
user_api_key: str | None = None,
|
| 30 |
+
domain: str | ResearchDomain | None = None,
|
| 31 |
) -> tuple[Any, str]:
|
| 32 |
"""
|
| 33 |
Create an orchestrator instance.
|
|
|
|
| 36 |
use_mock: If True, use MockJudgeHandler (no API key needed)
|
| 37 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 38 |
user_api_key: Optional user-provided API key (BYOK) - auto-detects provider
|
| 39 |
+
domain: Research domain (e.g., "general", "sexual_health")
|
| 40 |
|
| 41 |
Returns:
|
| 42 |
Tuple of (Orchestrator instance, backend_name)
|
|
|
|
| 59 |
|
| 60 |
# 1. Forced Mock (Unit Testing)
|
| 61 |
if use_mock:
|
| 62 |
+
judge_handler = MockJudgeHandler(domain=domain)
|
| 63 |
backend_info = "Mock (Testing)"
|
| 64 |
|
| 65 |
# 2. Paid API Key (User provided or Env)
|
|
|
|
| 80 |
raise ConfigurationError(
|
| 81 |
"Invalid API key format. Expected sk-... (OpenAI) or sk-ant-... (Anthropic)"
|
| 82 |
)
|
| 83 |
+
judge_handler = JudgeHandler(model=model, domain=domain)
|
| 84 |
|
| 85 |
# 3. Environment API Keys (fallback)
|
| 86 |
elif os.getenv("OPENAI_API_KEY"):
|
| 87 |
+
judge_handler = JudgeHandler(model=None, domain=domain) # Uses env key
|
| 88 |
backend_info = "Paid API (OpenAI from env)"
|
| 89 |
|
| 90 |
elif os.getenv("ANTHROPIC_API_KEY"):
|
| 91 |
+
judge_handler = JudgeHandler(model=None, domain=domain) # Uses env key
|
| 92 |
backend_info = "Paid API (Anthropic from env)"
|
| 93 |
|
| 94 |
# 4. Free Tier (HuggingFace Inference)
|
| 95 |
else:
|
| 96 |
+
judge_handler = HFInferenceJudgeHandler(domain=domain)
|
| 97 |
backend_info = "Free Tier (Llama 3.1 / Mistral)"
|
| 98 |
|
| 99 |
orchestrator = create_orchestrator(
|
|
|
|
| 102 |
config=config,
|
| 103 |
mode=mode, # type: ignore
|
| 104 |
api_key=user_api_key,
|
| 105 |
+
domain=domain,
|
| 106 |
)
|
| 107 |
|
| 108 |
return orchestrator, backend_info
|
|
|
|
| 112 |
message: str,
|
| 113 |
history: list[dict[str, Any]],
|
| 114 |
mode: str = "simple",
|
| 115 |
+
domain: str = "general",
|
| 116 |
api_key: str = "",
|
| 117 |
api_key_state: str = "",
|
| 118 |
) -> AsyncGenerator[str, None]:
|
|
|
|
| 123 |
message: User's research question
|
| 124 |
history: Chat history (Gradio format)
|
| 125 |
mode: Orchestrator mode ("simple" or "advanced")
|
| 126 |
+
domain: Research domain
|
| 127 |
api_key: Optional user-provided API key (BYOK - auto-detects provider)
|
| 128 |
api_key_state: Persistent API key state (survives example clicks)
|
| 129 |
|
|
|
|
| 138 |
# Gradio passes None for missing example columns, overriding defaults
|
| 139 |
api_key_str = api_key or ""
|
| 140 |
api_key_state_str = api_key_state or ""
|
| 141 |
+
domain_str = domain or "general"
|
| 142 |
|
| 143 |
# BUG FIX: Prefer freshly-entered key, then persisted state
|
| 144 |
user_api_key = (api_key_str.strip() or api_key_state_str.strip()) or None
|
|
|
|
| 179 |
use_mock=False, # Never use mock in production - HF Inference is the free fallback
|
| 180 |
mode=mode,
|
| 181 |
user_api_key=user_api_key,
|
| 182 |
+
domain=domain_str,
|
| 183 |
)
|
| 184 |
|
| 185 |
# Immediate backend info + loading feedback so user knows something is happening
|
| 186 |
yield (
|
| 187 |
+
f"🧠 **Backend**: {backend_name} | **Domain**: {domain_str.title()}\n\n"
|
| 188 |
"⏳ **Processing...** Searching PubMed, ClinicalTrials.gov, Europe PMC, OpenAlex...\n"
|
| 189 |
)
|
| 190 |
|
|
|
|
| 239 |
api_key_state = gr.State("")
|
| 240 |
|
| 241 |
# 1. Unwrapped ChatInterface (Fixes Accordion Bug)
|
| 242 |
+
description = (
|
| 243 |
+
"*AI-Powered Research Agent — searches PubMed, "
|
| 244 |
+
"ClinicalTrials.gov, Europe PMC & OpenAlex*\n\n"
|
| 245 |
+
"Deep research for sexual wellness, ED treatments, hormone therapy, "
|
| 246 |
+
"libido, and reproductive health - for all genders.\n\n"
|
| 247 |
+
"---\n"
|
| 248 |
+
"*Research tool only — not for medical advice.* \n"
|
| 249 |
+
"**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`"
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
demo = gr.ChatInterface(
|
| 253 |
fn=research_agent,
|
| 254 |
title="🍆 DeepBoner",
|
| 255 |
+
description=description,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
examples=[
|
| 257 |
[
|
| 258 |
"What drugs improve female libido post-menopause?",
|
| 259 |
"simple",
|
| 260 |
+
"sexual_health",
|
| 261 |
None,
|
| 262 |
None,
|
| 263 |
],
|
| 264 |
[
|
| 265 |
+
"Metformin mechanism for Alzheimer's?",
|
| 266 |
+
"simple",
|
| 267 |
+
"general",
|
| 268 |
None,
|
| 269 |
None,
|
| 270 |
],
|
| 271 |
[
|
| 272 |
+
"Clinical trials for PDE5 inhibitors alternatives?",
|
| 273 |
+
"advanced",
|
| 274 |
+
"sexual_health",
|
| 275 |
None,
|
| 276 |
None,
|
| 277 |
],
|
|
|
|
| 284 |
label="Orchestrator Mode",
|
| 285 |
info="⚡ Simple: Free/Any | 🔬 Advanced: OpenAI (Deep Research)",
|
| 286 |
),
|
| 287 |
+
gr.Dropdown(
|
| 288 |
+
choices=[d.value for d in ResearchDomain],
|
| 289 |
+
value="general",
|
| 290 |
+
label="Research Domain",
|
| 291 |
+
info="Select research focus area (adjusts prompts)",
|
| 292 |
+
),
|
| 293 |
gr.Textbox(
|
| 294 |
label="🔑 API Key (Optional)",
|
| 295 |
placeholder="sk-... (OpenAI) or sk-ant-... (Anthropic)",
|
|
|
|
| 300 |
],
|
| 301 |
)
|
| 302 |
|
|
|
|
|
|
|
|
|
|
| 303 |
return demo, additional_inputs_accordion
|
| 304 |
|
| 305 |
|
|
File without changes
|
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Centralized domain configuration for research agents.
|
| 2 |
+
|
| 3 |
+
This module defines research domains and their associated prompts,
|
| 4 |
+
allowing the agent to operate in domain-agnostic or domain-specific modes.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
from src.config.domain import get_domain_config, ResearchDomain
|
| 8 |
+
|
| 9 |
+
# Get default (general) config
|
| 10 |
+
config = get_domain_config()
|
| 11 |
+
|
| 12 |
+
# Get specific domain
|
| 13 |
+
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 14 |
+
|
| 15 |
+
# Use in prompts
|
| 16 |
+
system_prompt = config.judge_system_prompt
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from enum import Enum
|
| 20 |
+
|
| 21 |
+
from pydantic import BaseModel
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ResearchDomain(str, Enum):
|
| 25 |
+
"""Available research domains."""
|
| 26 |
+
|
| 27 |
+
GENERAL = "general"
|
| 28 |
+
DRUG_REPURPOSING = "drug_repurposing"
|
| 29 |
+
SEXUAL_HEALTH = "sexual_health"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class DomainConfig(BaseModel):
|
| 33 |
+
"""Configuration for a research domain.
|
| 34 |
+
|
| 35 |
+
Contains all domain-specific text used across the codebase,
|
| 36 |
+
ensuring consistency and single-source-of-truth.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
# Identity
|
| 40 |
+
name: str
|
| 41 |
+
description: str
|
| 42 |
+
|
| 43 |
+
# Report generation
|
| 44 |
+
report_title: str
|
| 45 |
+
report_focus: str
|
| 46 |
+
|
| 47 |
+
# Judge prompts
|
| 48 |
+
judge_system_prompt: str
|
| 49 |
+
judge_scoring_prompt: str
|
| 50 |
+
|
| 51 |
+
# Hypothesis prompts
|
| 52 |
+
hypothesis_system_prompt: str
|
| 53 |
+
|
| 54 |
+
# Report writer prompts
|
| 55 |
+
report_system_prompt: str
|
| 56 |
+
|
| 57 |
+
# Search context
|
| 58 |
+
search_description: str
|
| 59 |
+
search_example_query: str
|
| 60 |
+
|
| 61 |
+
# Agent descriptions (for Magentic mode)
|
| 62 |
+
search_agent_description: str
|
| 63 |
+
hypothesis_agent_description: str
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ─────────────────────────────────────────────────────────────────
|
| 67 |
+
# Domain Definitions
|
| 68 |
+
# ─────────────────────────────────────────────────────────────────
|
| 69 |
+
|
| 70 |
+
GENERAL_CONFIG = DomainConfig(
|
| 71 |
+
name="General Research",
|
| 72 |
+
description="General-purpose biomedical research agent",
|
| 73 |
+
report_title="## Research Analysis",
|
| 74 |
+
report_focus="comprehensive research synthesis",
|
| 75 |
+
judge_system_prompt="""You are an expert research judge.
|
| 76 |
+
Your role is to evaluate evidence quality, assess relevance to the research query,
|
| 77 |
+
and determine if sufficient evidence exists to synthesize findings.""",
|
| 78 |
+
judge_scoring_prompt="""Score this evidence for research relevance.
|
| 79 |
+
Provide ONLY scores and extracted data.""",
|
| 80 |
+
hypothesis_system_prompt="""You are a biomedical research scientist.
|
| 81 |
+
Your role is to generate evidence-based hypotheses from the literature,
|
| 82 |
+
identifying key mechanisms, targets, and potential therapeutic implications.""",
|
| 83 |
+
report_system_prompt="""You are a scientific writer specializing in research reports.
|
| 84 |
+
Your role is to synthesize evidence into clear, well-structured reports with
|
| 85 |
+
proper citations and evidence-based conclusions.""",
|
| 86 |
+
search_description="Searches biomedical literature for relevant evidence",
|
| 87 |
+
search_example_query="metformin aging mechanisms",
|
| 88 |
+
search_agent_description="Searches PubMed, ClinicalTrials.gov, and Europe PMC for evidence",
|
| 89 |
+
hypothesis_agent_description="Generates mechanistic hypotheses from evidence",
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
DRUG_REPURPOSING_CONFIG = DomainConfig(
|
| 93 |
+
name="Drug Repurposing",
|
| 94 |
+
description="Drug repurposing research specialist",
|
| 95 |
+
report_title="## Drug Repurposing Analysis",
|
| 96 |
+
report_focus="drug repurposing opportunities",
|
| 97 |
+
judge_system_prompt="""You are an expert drug repurposing research judge.
|
| 98 |
+
Your role is to evaluate evidence for drug repurposing potential, assess
|
| 99 |
+
mechanism plausibility, and determine if compounds warrant further investigation.""",
|
| 100 |
+
judge_scoring_prompt="""Score this evidence for drug repurposing potential.
|
| 101 |
+
Provide ONLY scores and extracted data.""",
|
| 102 |
+
hypothesis_system_prompt=(
|
| 103 |
+
"""You are a biomedical research scientist specializing in drug repurposing.
|
| 104 |
+
Your role is to generate mechanistic hypotheses for how existing drugs might
|
| 105 |
+
treat new indications, based on shared pathways and targets."""
|
| 106 |
+
),
|
| 107 |
+
report_system_prompt=(
|
| 108 |
+
"""You are a scientific writer specializing in drug repurposing research reports.
|
| 109 |
+
Your role is to synthesize evidence into actionable drug repurposing recommendations
|
| 110 |
+
with clear mechanistic rationale and clinical translation potential."""
|
| 111 |
+
),
|
| 112 |
+
search_description="Searches biomedical literature for drug repurposing evidence",
|
| 113 |
+
search_example_query="metformin alzheimer repurposing",
|
| 114 |
+
search_agent_description="Searches PubMed for drug repurposing evidence",
|
| 115 |
+
hypothesis_agent_description="Generates mechanistic hypotheses for drug repurposing",
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
SEXUAL_HEALTH_CONFIG = DomainConfig(
|
| 119 |
+
name="Sexual Health Research",
|
| 120 |
+
description="Sexual health and wellness research specialist",
|
| 121 |
+
report_title="## Sexual Health Analysis",
|
| 122 |
+
report_focus="sexual health and wellness interventions",
|
| 123 |
+
judge_system_prompt="""You are an expert sexual health research judge.
|
| 124 |
+
Your role is to evaluate evidence for sexual health interventions, assess
|
| 125 |
+
efficacy and safety data, and determine clinical applicability.""",
|
| 126 |
+
judge_scoring_prompt="""Score this evidence for sexual health relevance.
|
| 127 |
+
Provide ONLY scores and extracted data.""",
|
| 128 |
+
hypothesis_system_prompt=(
|
| 129 |
+
"""You are a biomedical research scientist specializing in sexual health.
|
| 130 |
+
Your role is to generate evidence-based hypotheses for sexual health interventions,
|
| 131 |
+
identifying mechanisms of action and potential therapeutic applications."""
|
| 132 |
+
),
|
| 133 |
+
report_system_prompt=(
|
| 134 |
+
"""You are a scientific writer specializing in sexual health research reports.
|
| 135 |
+
Your role is to synthesize evidence into clear recommendations for sexual health
|
| 136 |
+
interventions with proper safety considerations."""
|
| 137 |
+
),
|
| 138 |
+
search_description="Searches biomedical literature for sexual health evidence",
|
| 139 |
+
search_example_query="testosterone therapy female libido",
|
| 140 |
+
search_agent_description="Searches PubMed for sexual health evidence",
|
| 141 |
+
hypothesis_agent_description="Generates hypotheses for sexual health interventions",
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# ─────────────────────────────────────────────────────────────────
|
| 145 |
+
# Domain Registry
|
| 146 |
+
# ─────────────────────────────────────────────────────────────────
|
| 147 |
+
|
| 148 |
+
DOMAIN_CONFIGS: dict[ResearchDomain, DomainConfig] = {
|
| 149 |
+
ResearchDomain.GENERAL: GENERAL_CONFIG,
|
| 150 |
+
ResearchDomain.DRUG_REPURPOSING: DRUG_REPURPOSING_CONFIG,
|
| 151 |
+
ResearchDomain.SEXUAL_HEALTH: SEXUAL_HEALTH_CONFIG,
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# Default domain
|
| 155 |
+
DEFAULT_DOMAIN = ResearchDomain.GENERAL
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def get_domain_config(domain: ResearchDomain | str | None = None) -> DomainConfig:
|
| 159 |
+
"""Get configuration for a research domain.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
domain: The research domain. Defaults to GENERAL if None.
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
DomainConfig for the specified domain.
|
| 166 |
+
"""
|
| 167 |
+
if domain is None:
|
| 168 |
+
domain = DEFAULT_DOMAIN
|
| 169 |
+
|
| 170 |
+
if isinstance(domain, str):
|
| 171 |
+
try:
|
| 172 |
+
domain = ResearchDomain(domain)
|
| 173 |
+
except ValueError:
|
| 174 |
+
domain = DEFAULT_DOMAIN
|
| 175 |
+
|
| 176 |
+
return DOMAIN_CONFIGS[domain]
|
|
@@ -7,6 +7,7 @@ Each function follows the MCP tool contract:
|
|
| 7 |
- Formatted string returns
|
| 8 |
"""
|
| 9 |
|
|
|
|
| 10 |
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 11 |
from src.tools.europepmc import EuropePMCTool
|
| 12 |
from src.tools.pubmed import PubMedTool
|
|
@@ -17,27 +18,29 @@ _trials = ClinicalTrialsTool()
|
|
| 17 |
_europepmc = EuropePMCTool()
|
| 18 |
|
| 19 |
|
| 20 |
-
async def search_pubmed(query: str, max_results: int = 10) -> str:
|
| 21 |
"""Search PubMed for peer-reviewed biomedical literature.
|
| 22 |
|
| 23 |
Searches NCBI PubMed database for scientific papers matching your query.
|
| 24 |
Returns titles, authors, abstracts, and citation information.
|
| 25 |
|
| 26 |
Args:
|
| 27 |
-
query: Search query (e.g., "metformin alzheimer"
|
| 28 |
max_results: Maximum results to return (1-50, default 10)
|
|
|
|
| 29 |
|
| 30 |
Returns:
|
| 31 |
Formatted search results with paper titles, authors, dates, and abstracts
|
| 32 |
"""
|
| 33 |
max_results = max(1, min(50, max_results)) # Clamp to valid range
|
|
|
|
| 34 |
|
| 35 |
results = await _pubmed.search(query, max_results)
|
| 36 |
|
| 37 |
if not results:
|
| 38 |
return f"No PubMed results found for: {query}"
|
| 39 |
|
| 40 |
-
formatted = [f"## PubMed Results for: {query}\n"]
|
| 41 |
for i, evidence in enumerate(results, 1):
|
| 42 |
formatted.append(f"### {i}. {evidence.citation.title}")
|
| 43 |
formatted.append(f"**Authors**: {', '.join(evidence.citation.authors[:3])}")
|
|
@@ -109,15 +112,16 @@ async def search_europepmc(query: str, max_results: int = 10) -> str:
|
|
| 109 |
return "\n".join(formatted)
|
| 110 |
|
| 111 |
|
| 112 |
-
async def search_all_sources(query: str, max_per_source: int = 5) -> str:
|
| 113 |
"""Search all biomedical sources simultaneously.
|
| 114 |
|
| 115 |
Performs parallel search across PubMed, ClinicalTrials.gov, and Europe PMC.
|
| 116 |
-
This is the most comprehensive search option for
|
| 117 |
|
| 118 |
Args:
|
| 119 |
query: Search query (e.g., "metformin alzheimer", "aspirin cancer prevention")
|
| 120 |
max_per_source: Maximum results per source (1-20, default 5)
|
|
|
|
| 121 |
|
| 122 |
Returns:
|
| 123 |
Combined results from all sources with source labels
|
|
@@ -125,9 +129,10 @@ async def search_all_sources(query: str, max_per_source: int = 5) -> str:
|
|
| 125 |
import asyncio
|
| 126 |
|
| 127 |
max_per_source = max(1, min(20, max_per_source))
|
|
|
|
| 128 |
|
| 129 |
# Run all searches in parallel
|
| 130 |
-
pubmed_task = search_pubmed(query, max_per_source)
|
| 131 |
trials_task = search_clinical_trials(query, max_per_source)
|
| 132 |
europepmc_task = search_europepmc(query, max_per_source)
|
| 133 |
|
|
@@ -135,7 +140,7 @@ async def search_all_sources(query: str, max_per_source: int = 5) -> str:
|
|
| 135 |
pubmed_task, trials_task, europepmc_task, return_exceptions=True
|
| 136 |
)
|
| 137 |
|
| 138 |
-
formatted = [f"# Comprehensive Search: {query}\n"]
|
| 139 |
|
| 140 |
# Add each result section (handle exceptions gracefully)
|
| 141 |
if isinstance(pubmed_results, str):
|
|
@@ -161,10 +166,10 @@ async def analyze_hypothesis(
|
|
| 161 |
condition: str,
|
| 162 |
evidence_summary: str,
|
| 163 |
) -> str:
|
| 164 |
-
"""Perform statistical analysis of
|
| 165 |
|
| 166 |
Executes AI-generated Python code in a secure Modal sandbox to analyze
|
| 167 |
-
the statistical evidence for a
|
| 168 |
|
| 169 |
Args:
|
| 170 |
drug: The drug being evaluated (e.g., "metformin")
|
|
|
|
| 7 |
- Formatted string returns
|
| 8 |
"""
|
| 9 |
|
| 10 |
+
from src.config.domain import get_domain_config
|
| 11 |
from src.tools.clinicaltrials import ClinicalTrialsTool
|
| 12 |
from src.tools.europepmc import EuropePMCTool
|
| 13 |
from src.tools.pubmed import PubMedTool
|
|
|
|
| 18 |
_europepmc = EuropePMCTool()
|
| 19 |
|
| 20 |
|
| 21 |
+
async def search_pubmed(query: str, max_results: int = 10, domain: str = "general") -> str:
|
| 22 |
"""Search PubMed for peer-reviewed biomedical literature.
|
| 23 |
|
| 24 |
Searches NCBI PubMed database for scientific papers matching your query.
|
| 25 |
Returns titles, authors, abstracts, and citation information.
|
| 26 |
|
| 27 |
Args:
|
| 28 |
+
query: Search query (e.g., "metformin alzheimer")
|
| 29 |
max_results: Maximum results to return (1-50, default 10)
|
| 30 |
+
domain: Research domain (general, drug_repurposing, sexual_health)
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
Formatted search results with paper titles, authors, dates, and abstracts
|
| 34 |
"""
|
| 35 |
max_results = max(1, min(50, max_results)) # Clamp to valid range
|
| 36 |
+
config = get_domain_config(domain)
|
| 37 |
|
| 38 |
results = await _pubmed.search(query, max_results)
|
| 39 |
|
| 40 |
if not results:
|
| 41 |
return f"No PubMed results found for: {query}"
|
| 42 |
|
| 43 |
+
formatted = [f"## PubMed Results for: {query} ({config.name})\n"]
|
| 44 |
for i, evidence in enumerate(results, 1):
|
| 45 |
formatted.append(f"### {i}. {evidence.citation.title}")
|
| 46 |
formatted.append(f"**Authors**: {', '.join(evidence.citation.authors[:3])}")
|
|
|
|
| 112 |
return "\n".join(formatted)
|
| 113 |
|
| 114 |
|
| 115 |
+
async def search_all_sources(query: str, max_per_source: int = 5, domain: str = "general") -> str:
|
| 116 |
"""Search all biomedical sources simultaneously.
|
| 117 |
|
| 118 |
Performs parallel search across PubMed, ClinicalTrials.gov, and Europe PMC.
|
| 119 |
+
This is the most comprehensive search option for biomedical research.
|
| 120 |
|
| 121 |
Args:
|
| 122 |
query: Search query (e.g., "metformin alzheimer", "aspirin cancer prevention")
|
| 123 |
max_per_source: Maximum results per source (1-20, default 5)
|
| 124 |
+
domain: Research domain (general, drug_repurposing, sexual_health)
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
Combined results from all sources with source labels
|
|
|
|
| 129 |
import asyncio
|
| 130 |
|
| 131 |
max_per_source = max(1, min(20, max_per_source))
|
| 132 |
+
config = get_domain_config(domain)
|
| 133 |
|
| 134 |
# Run all searches in parallel
|
| 135 |
+
pubmed_task = search_pubmed(query, max_per_source, domain)
|
| 136 |
trials_task = search_clinical_trials(query, max_per_source)
|
| 137 |
europepmc_task = search_europepmc(query, max_per_source)
|
| 138 |
|
|
|
|
| 140 |
pubmed_task, trials_task, europepmc_task, return_exceptions=True
|
| 141 |
)
|
| 142 |
|
| 143 |
+
formatted = [f"# Comprehensive Search: {query} ({config.name})\n"]
|
| 144 |
|
| 145 |
# Add each result section (handle exceptions gracefully)
|
| 146 |
if isinstance(pubmed_results, str):
|
|
|
|
| 166 |
condition: str,
|
| 167 |
evidence_summary: str,
|
| 168 |
) -> str:
|
| 169 |
+
"""Perform statistical analysis of research hypothesis using Modal.
|
| 170 |
|
| 171 |
Executes AI-generated Python code in a secure Modal sandbox to analyze
|
| 172 |
+
the statistical evidence for a research hypothesis.
|
| 173 |
|
| 174 |
Args:
|
| 175 |
drug: The drug being evaluated (e.g., "metformin")
|
|
@@ -36,6 +36,7 @@ from src.agents.magentic_agents import (
|
|
| 36 |
create_search_agent,
|
| 37 |
)
|
| 38 |
from src.agents.state import init_magentic_state
|
|
|
|
| 39 |
from src.orchestrators.base import OrchestratorProtocol
|
| 40 |
from src.utils.config import settings
|
| 41 |
from src.utils.llm_factory import check_magentic_requirements
|
|
@@ -68,6 +69,7 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 68 |
chat_client: OpenAIChatClient | None = None,
|
| 69 |
api_key: str | None = None,
|
| 70 |
timeout_seconds: float = 600.0,
|
|
|
|
| 71 |
) -> None:
|
| 72 |
"""Initialize orchestrator.
|
| 73 |
|
|
@@ -76,6 +78,7 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 76 |
chat_client: Optional shared chat client for agents
|
| 77 |
api_key: Optional OpenAI API key (for BYOK)
|
| 78 |
timeout_seconds: Maximum workflow duration (default: 10 minutes)
|
|
|
|
| 79 |
"""
|
| 80 |
# Validate requirements only if no key provided
|
| 81 |
if not chat_client and not api_key:
|
|
@@ -83,6 +86,8 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 83 |
|
| 84 |
self._max_rounds = max_rounds
|
| 85 |
self._timeout_seconds = timeout_seconds
|
|
|
|
|
|
|
| 86 |
self._chat_client: OpenAIChatClient | None
|
| 87 |
|
| 88 |
if chat_client:
|
|
@@ -104,10 +109,10 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 104 |
def _build_workflow(self) -> Any:
|
| 105 |
"""Build the workflow with ChatAgent participants."""
|
| 106 |
# Create agents with internal LLMs
|
| 107 |
-
search_agent = create_search_agent(self._chat_client)
|
| 108 |
-
judge_agent = create_judge_agent(self._chat_client)
|
| 109 |
-
hypothesis_agent = create_hypothesis_agent(self._chat_client)
|
| 110 |
-
report_agent = create_report_agent(self._chat_client)
|
| 111 |
|
| 112 |
# Manager chat client (orchestrates the agents)
|
| 113 |
manager_client = self._chat_client or OpenAIChatClient(
|
|
@@ -156,7 +161,7 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 156 |
|
| 157 |
workflow = self._build_workflow()
|
| 158 |
|
| 159 |
-
task = f"""Research
|
| 160 |
|
| 161 |
Workflow:
|
| 162 |
1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and Europe PMC
|
|
|
|
| 36 |
create_search_agent,
|
| 37 |
)
|
| 38 |
from src.agents.state import init_magentic_state
|
| 39 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 40 |
from src.orchestrators.base import OrchestratorProtocol
|
| 41 |
from src.utils.config import settings
|
| 42 |
from src.utils.llm_factory import check_magentic_requirements
|
|
|
|
| 69 |
chat_client: OpenAIChatClient | None = None,
|
| 70 |
api_key: str | None = None,
|
| 71 |
timeout_seconds: float = 600.0,
|
| 72 |
+
domain: ResearchDomain | str | None = None,
|
| 73 |
) -> None:
|
| 74 |
"""Initialize orchestrator.
|
| 75 |
|
|
|
|
| 78 |
chat_client: Optional shared chat client for agents
|
| 79 |
api_key: Optional OpenAI API key (for BYOK)
|
| 80 |
timeout_seconds: Maximum workflow duration (default: 10 minutes)
|
| 81 |
+
domain: Research domain for customization
|
| 82 |
"""
|
| 83 |
# Validate requirements only if no key provided
|
| 84 |
if not chat_client and not api_key:
|
|
|
|
| 86 |
|
| 87 |
self._max_rounds = max_rounds
|
| 88 |
self._timeout_seconds = timeout_seconds
|
| 89 |
+
self.domain = domain
|
| 90 |
+
self.domain_config = get_domain_config(domain)
|
| 91 |
self._chat_client: OpenAIChatClient | None
|
| 92 |
|
| 93 |
if chat_client:
|
|
|
|
| 109 |
def _build_workflow(self) -> Any:
|
| 110 |
"""Build the workflow with ChatAgent participants."""
|
| 111 |
# Create agents with internal LLMs
|
| 112 |
+
search_agent = create_search_agent(self._chat_client, domain=self.domain)
|
| 113 |
+
judge_agent = create_judge_agent(self._chat_client, domain=self.domain)
|
| 114 |
+
hypothesis_agent = create_hypothesis_agent(self._chat_client, domain=self.domain)
|
| 115 |
+
report_agent = create_report_agent(self._chat_client, domain=self.domain)
|
| 116 |
|
| 117 |
# Manager chat client (orchestrates the agents)
|
| 118 |
manager_client = self._chat_client or OpenAIChatClient(
|
|
|
|
| 161 |
|
| 162 |
workflow = self._build_workflow()
|
| 163 |
|
| 164 |
+
task = f"""Research {self.domain_config.report_focus} for: {query}
|
| 165 |
|
| 166 |
Workflow:
|
| 167 |
1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and Europe PMC
|
|
@@ -13,6 +13,7 @@ from typing import TYPE_CHECKING, Literal
|
|
| 13 |
|
| 14 |
import structlog
|
| 15 |
|
|
|
|
| 16 |
from src.orchestrators.base import (
|
| 17 |
JudgeHandlerProtocol,
|
| 18 |
OrchestratorProtocol,
|
|
@@ -58,6 +59,7 @@ def create_orchestrator(
|
|
| 58 |
config: OrchestratorConfig | None = None,
|
| 59 |
mode: Literal["simple", "magentic", "advanced", "hierarchical"] | None = None,
|
| 60 |
api_key: str | None = None,
|
|
|
|
| 61 |
) -> OrchestratorProtocol:
|
| 62 |
"""
|
| 63 |
Create an orchestrator instance.
|
|
@@ -73,6 +75,7 @@ def create_orchestrator(
|
|
| 73 |
mode: "simple", "magentic", "advanced", or "hierarchical"
|
| 74 |
Note: "magentic" is an alias for "advanced" (kept for backwards compatibility)
|
| 75 |
api_key: Optional API key for advanced mode (OpenAI)
|
|
|
|
| 76 |
|
| 77 |
Returns:
|
| 78 |
Orchestrator instance implementing OrchestratorProtocol
|
|
@@ -83,19 +86,20 @@ def create_orchestrator(
|
|
| 83 |
"""
|
| 84 |
effective_config = config or OrchestratorConfig()
|
| 85 |
effective_mode = _determine_mode(mode, api_key)
|
| 86 |
-
logger.info("Creating orchestrator", mode=effective_mode)
|
| 87 |
|
| 88 |
if effective_mode == "advanced":
|
| 89 |
orchestrator_cls = _get_advanced_orchestrator_class()
|
| 90 |
return orchestrator_cls(
|
| 91 |
max_rounds=effective_config.max_iterations,
|
| 92 |
api_key=api_key,
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
if effective_mode == "hierarchical":
|
| 96 |
from src.orchestrators.hierarchical import HierarchicalOrchestrator
|
| 97 |
|
| 98 |
-
return HierarchicalOrchestrator(config=effective_config)
|
| 99 |
|
| 100 |
# Simple mode requires handlers
|
| 101 |
if search_handler is None or judge_handler is None:
|
|
@@ -105,6 +109,7 @@ def create_orchestrator(
|
|
| 105 |
search_handler=search_handler,
|
| 106 |
judge_handler=judge_handler,
|
| 107 |
config=effective_config,
|
|
|
|
| 108 |
)
|
| 109 |
|
| 110 |
|
|
|
|
| 13 |
|
| 14 |
import structlog
|
| 15 |
|
| 16 |
+
from src.config.domain import ResearchDomain
|
| 17 |
from src.orchestrators.base import (
|
| 18 |
JudgeHandlerProtocol,
|
| 19 |
OrchestratorProtocol,
|
|
|
|
| 59 |
config: OrchestratorConfig | None = None,
|
| 60 |
mode: Literal["simple", "magentic", "advanced", "hierarchical"] | None = None,
|
| 61 |
api_key: str | None = None,
|
| 62 |
+
domain: ResearchDomain | str | None = None,
|
| 63 |
) -> OrchestratorProtocol:
|
| 64 |
"""
|
| 65 |
Create an orchestrator instance.
|
|
|
|
| 75 |
mode: "simple", "magentic", "advanced", or "hierarchical"
|
| 76 |
Note: "magentic" is an alias for "advanced" (kept for backwards compatibility)
|
| 77 |
api_key: Optional API key for advanced mode (OpenAI)
|
| 78 |
+
domain: Research domain for customization (default: General)
|
| 79 |
|
| 80 |
Returns:
|
| 81 |
Orchestrator instance implementing OrchestratorProtocol
|
|
|
|
| 86 |
"""
|
| 87 |
effective_config = config or OrchestratorConfig()
|
| 88 |
effective_mode = _determine_mode(mode, api_key)
|
| 89 |
+
logger.info("Creating orchestrator", mode=effective_mode, domain=domain)
|
| 90 |
|
| 91 |
if effective_mode == "advanced":
|
| 92 |
orchestrator_cls = _get_advanced_orchestrator_class()
|
| 93 |
return orchestrator_cls(
|
| 94 |
max_rounds=effective_config.max_iterations,
|
| 95 |
api_key=api_key,
|
| 96 |
+
domain=domain,
|
| 97 |
)
|
| 98 |
|
| 99 |
if effective_mode == "hierarchical":
|
| 100 |
from src.orchestrators.hierarchical import HierarchicalOrchestrator
|
| 101 |
|
| 102 |
+
return HierarchicalOrchestrator(config=effective_config, domain=domain)
|
| 103 |
|
| 104 |
# Simple mode requires handlers
|
| 105 |
if search_handler is None or judge_handler is None:
|
|
|
|
| 109 |
search_handler=search_handler,
|
| 110 |
judge_handler=judge_handler,
|
| 111 |
config=effective_config,
|
| 112 |
+
domain=domain,
|
| 113 |
)
|
| 114 |
|
| 115 |
|
|
@@ -18,6 +18,7 @@ import structlog
|
|
| 18 |
|
| 19 |
from src.agents.judge_agent_llm import LLMSubIterationJudge
|
| 20 |
from src.agents.magentic_agents import create_search_agent
|
|
|
|
| 21 |
from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
|
| 22 |
from src.orchestrators.base import OrchestratorProtocol
|
| 23 |
from src.state import init_magentic_state
|
|
@@ -37,8 +38,8 @@ class ResearchTeam(SubIterationTeam):
|
|
| 37 |
sub-iteration middleware framework.
|
| 38 |
"""
|
| 39 |
|
| 40 |
-
def __init__(self) -> None:
|
| 41 |
-
self.agent = create_search_agent()
|
| 42 |
|
| 43 |
async def execute(self, task: str) -> str:
|
| 44 |
"""Execute a research task.
|
|
@@ -71,16 +72,19 @@ class HierarchicalOrchestrator(OrchestratorProtocol):
|
|
| 71 |
self,
|
| 72 |
config: OrchestratorConfig | None = None,
|
| 73 |
timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS,
|
|
|
|
| 74 |
) -> None:
|
| 75 |
"""Initialize the hierarchical orchestrator.
|
| 76 |
|
| 77 |
Args:
|
| 78 |
config: Optional configuration (uses defaults if not provided)
|
| 79 |
timeout_seconds: Maximum workflow duration (default: 5 minutes)
|
|
|
|
| 80 |
"""
|
| 81 |
self.config = config or OrchestratorConfig()
|
| 82 |
self._timeout_seconds = timeout_seconds
|
| 83 |
-
self.
|
|
|
|
| 84 |
self.judge = LLMSubIterationJudge()
|
| 85 |
self.middleware = SubIterationMiddleware(
|
| 86 |
self.team, self.judge, max_iterations=self.config.max_iterations
|
|
|
|
| 18 |
|
| 19 |
from src.agents.judge_agent_llm import LLMSubIterationJudge
|
| 20 |
from src.agents.magentic_agents import create_search_agent
|
| 21 |
+
from src.config.domain import ResearchDomain
|
| 22 |
from src.middleware.sub_iteration import SubIterationMiddleware, SubIterationTeam
|
| 23 |
from src.orchestrators.base import OrchestratorProtocol
|
| 24 |
from src.state import init_magentic_state
|
|
|
|
| 38 |
sub-iteration middleware framework.
|
| 39 |
"""
|
| 40 |
|
| 41 |
+
def __init__(self, domain: ResearchDomain | str | None = None) -> None:
|
| 42 |
+
self.agent = create_search_agent(domain=domain)
|
| 43 |
|
| 44 |
async def execute(self, task: str) -> str:
|
| 45 |
"""Execute a research task.
|
|
|
|
| 72 |
self,
|
| 73 |
config: OrchestratorConfig | None = None,
|
| 74 |
timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS,
|
| 75 |
+
domain: ResearchDomain | str | None = None,
|
| 76 |
) -> None:
|
| 77 |
"""Initialize the hierarchical orchestrator.
|
| 78 |
|
| 79 |
Args:
|
| 80 |
config: Optional configuration (uses defaults if not provided)
|
| 81 |
timeout_seconds: Maximum workflow duration (default: 5 minutes)
|
| 82 |
+
domain: Research domain for customization
|
| 83 |
"""
|
| 84 |
self.config = config or OrchestratorConfig()
|
| 85 |
self._timeout_seconds = timeout_seconds
|
| 86 |
+
self.domain = domain
|
| 87 |
+
self.team = ResearchTeam(domain=domain)
|
| 88 |
self.judge = LLMSubIterationJudge()
|
| 89 |
self.middleware = SubIterationMiddleware(
|
| 90 |
self.team, self.judge, max_iterations=self.config.max_iterations
|
|
@@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Any, ClassVar
|
|
| 16 |
|
| 17 |
import structlog
|
| 18 |
|
|
|
|
| 19 |
from src.orchestrators.base import JudgeHandlerProtocol, SearchHandlerProtocol
|
| 20 |
from src.utils.config import settings
|
| 21 |
from src.utils.models import (
|
|
@@ -61,6 +62,7 @@ class Orchestrator:
|
|
| 61 |
config: OrchestratorConfig | None = None,
|
| 62 |
enable_analysis: bool = False,
|
| 63 |
enable_embeddings: bool = True,
|
|
|
|
| 64 |
):
|
| 65 |
"""
|
| 66 |
Initialize the orchestrator.
|
|
@@ -71,6 +73,7 @@ class Orchestrator:
|
|
| 71 |
config: Optional configuration (uses defaults if not provided)
|
| 72 |
enable_analysis: Whether to perform statistical analysis (if Modal available)
|
| 73 |
enable_embeddings: Whether to use semantic search for ranking/dedup
|
|
|
|
| 74 |
"""
|
| 75 |
self.search = search_handler
|
| 76 |
self.judge = judge_handler
|
|
@@ -78,6 +81,8 @@ class Orchestrator:
|
|
| 78 |
self.history: list[dict[str, Any]] = []
|
| 79 |
self._enable_analysis = enable_analysis and settings.modal_available
|
| 80 |
self._enable_embeddings = enable_embeddings
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# Lazy-load services (typed for IDE support)
|
| 83 |
self._analyzer: StatisticalAnalyzer | None = None
|
|
@@ -473,7 +478,7 @@ class Orchestrator:
|
|
| 473 |
]
|
| 474 |
)
|
| 475 |
|
| 476 |
-
return f"""
|
| 477 |
|
| 478 |
### Question
|
| 479 |
{query}
|
|
@@ -561,7 +566,7 @@ class Orchestrator:
|
|
| 561 |
)
|
| 562 |
comb_strength = "Sufficient" if combined_score >= 12 else "Partial"
|
| 563 |
|
| 564 |
-
return f"""
|
| 565 |
|
| 566 |
### Research Question
|
| 567 |
{query}
|
|
|
|
| 16 |
|
| 17 |
import structlog
|
| 18 |
|
| 19 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 20 |
from src.orchestrators.base import JudgeHandlerProtocol, SearchHandlerProtocol
|
| 21 |
from src.utils.config import settings
|
| 22 |
from src.utils.models import (
|
|
|
|
| 62 |
config: OrchestratorConfig | None = None,
|
| 63 |
enable_analysis: bool = False,
|
| 64 |
enable_embeddings: bool = True,
|
| 65 |
+
domain: ResearchDomain | str | None = None,
|
| 66 |
):
|
| 67 |
"""
|
| 68 |
Initialize the orchestrator.
|
|
|
|
| 73 |
config: Optional configuration (uses defaults if not provided)
|
| 74 |
enable_analysis: Whether to perform statistical analysis (if Modal available)
|
| 75 |
enable_embeddings: Whether to use semantic search for ranking/dedup
|
| 76 |
+
domain: Research domain for customization
|
| 77 |
"""
|
| 78 |
self.search = search_handler
|
| 79 |
self.judge = judge_handler
|
|
|
|
| 81 |
self.history: list[dict[str, Any]] = []
|
| 82 |
self._enable_analysis = enable_analysis and settings.modal_available
|
| 83 |
self._enable_embeddings = enable_embeddings
|
| 84 |
+
self.domain = domain
|
| 85 |
+
self.domain_config = get_domain_config(domain)
|
| 86 |
|
| 87 |
# Lazy-load services (typed for IDE support)
|
| 88 |
self._analyzer: StatisticalAnalyzer | None = None
|
|
|
|
| 478 |
]
|
| 479 |
)
|
| 480 |
|
| 481 |
+
return f"""{self.domain_config.report_title}
|
| 482 |
|
| 483 |
### Question
|
| 484 |
{query}
|
|
|
|
| 566 |
)
|
| 567 |
comb_strength = "Sufficient" if combined_score >= 12 else "Partial"
|
| 568 |
|
| 569 |
+
return f"""{self.domain_config.report_title}
|
| 570 |
|
| 571 |
### Research Question
|
| 572 |
{query}
|
|
@@ -2,13 +2,18 @@
|
|
| 2 |
|
| 3 |
from typing import TYPE_CHECKING
|
| 4 |
|
|
|
|
| 5 |
from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
|
| 6 |
|
| 7 |
if TYPE_CHECKING:
|
| 8 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 9 |
from src.utils.models import Evidence
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Your role is to generate mechanistic hypotheses based on evidence.
|
| 14 |
|
|
@@ -29,6 +34,10 @@ Example hypothesis format:
|
|
| 29 |
Be specific. Use actual gene/protein names when possible."""
|
| 30 |
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
async def format_hypothesis_prompt(
|
| 33 |
query: str, evidence: list["Evidence"], embeddings: "EmbeddingServiceProtocol | None" = None
|
| 34 |
) -> str:
|
|
|
|
| 2 |
|
| 3 |
from typing import TYPE_CHECKING
|
| 4 |
|
| 5 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 6 |
from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
|
| 7 |
|
| 8 |
if TYPE_CHECKING:
|
| 9 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 10 |
from src.utils.models import Evidence
|
| 11 |
|
| 12 |
+
|
| 13 |
+
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
+
"""Get the system prompt for the hypothesis agent."""
|
| 15 |
+
config = get_domain_config(domain)
|
| 16 |
+
return f"""{config.hypothesis_system_prompt}
|
| 17 |
|
| 18 |
Your role is to generate mechanistic hypotheses based on evidence.
|
| 19 |
|
|
|
|
| 34 |
Be specific. Use actual gene/protein names when possible."""
|
| 35 |
|
| 36 |
|
| 37 |
+
# Keep SYSTEM_PROMPT for backwards compatibility
|
| 38 |
+
SYSTEM_PROMPT = get_system_prompt()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
async def format_hypothesis_prompt(
|
| 42 |
query: str, evidence: list["Evidence"], embeddings: "EmbeddingServiceProtocol | None" = None
|
| 43 |
) -> str:
|
|
@@ -1,8 +1,13 @@
|
|
| 1 |
"""Judge prompts for evidence assessment."""
|
| 2 |
|
|
|
|
| 3 |
from src.utils.models import Evidence
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
Your task is to SCORE evidence from biomedical literature. You do NOT decide whether to
|
| 8 |
continue searching or synthesize - that decision is made by the orchestration system
|
|
@@ -62,6 +67,16 @@ When suggesting next_search_queries:
|
|
| 62 |
- Refine existing terms, don't explore random medical associations
|
| 63 |
"""
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
MAX_EVIDENCE_FOR_JUDGE = 30 # Keep under token limits
|
| 66 |
|
| 67 |
|
|
@@ -99,6 +114,7 @@ def format_user_prompt(
|
|
| 99 |
iteration: int = 0,
|
| 100 |
max_iterations: int = 10,
|
| 101 |
total_evidence_count: int | None = None,
|
|
|
|
| 102 |
) -> str:
|
| 103 |
"""
|
| 104 |
Format user prompt with selected evidence and iteration context.
|
|
@@ -108,6 +124,7 @@ def format_user_prompt(
|
|
| 108 |
"""
|
| 109 |
total_count = total_evidence_count or len(evidence)
|
| 110 |
max_content_len = 1500
|
|
|
|
| 111 |
|
| 112 |
def format_single_evidence(i: int, e: Evidence) -> str:
|
| 113 |
content = e.content
|
|
@@ -137,7 +154,7 @@ def format_user_prompt(
|
|
| 137 |
|
| 138 |
## Your Task
|
| 139 |
|
| 140 |
-
|
| 141 |
DO NOT decide "synthesize" vs "continue" - that decision is made by the system.
|
| 142 |
|
| 143 |
## REMINDER: Original Question (stay focused)
|
|
|
|
| 1 |
"""Judge prompts for evidence assessment."""
|
| 2 |
|
| 3 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 4 |
from src.utils.models import Evidence
|
| 5 |
|
| 6 |
+
|
| 7 |
+
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 8 |
+
"""Get the system prompt for the judge agent."""
|
| 9 |
+
config = get_domain_config(domain)
|
| 10 |
+
return f"""{config.judge_system_prompt}
|
| 11 |
|
| 12 |
Your task is to SCORE evidence from biomedical literature. You do NOT decide whether to
|
| 13 |
continue searching or synthesize - that decision is made by the orchestration system
|
|
|
|
| 67 |
- Refine existing terms, don't explore random medical associations
|
| 68 |
"""
|
| 69 |
|
| 70 |
+
|
| 71 |
+
def get_scoring_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 72 |
+
"""Get the scoring instructions for the judge."""
|
| 73 |
+
config = get_domain_config(domain)
|
| 74 |
+
return config.judge_scoring_prompt
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Keep SYSTEM_PROMPT for backwards compatibility
|
| 78 |
+
SYSTEM_PROMPT = get_system_prompt()
|
| 79 |
+
|
| 80 |
MAX_EVIDENCE_FOR_JUDGE = 30 # Keep under token limits
|
| 81 |
|
| 82 |
|
|
|
|
| 114 |
iteration: int = 0,
|
| 115 |
max_iterations: int = 10,
|
| 116 |
total_evidence_count: int | None = None,
|
| 117 |
+
domain: ResearchDomain | str | None = None,
|
| 118 |
) -> str:
|
| 119 |
"""
|
| 120 |
Format user prompt with selected evidence and iteration context.
|
|
|
|
| 124 |
"""
|
| 125 |
total_count = total_evidence_count or len(evidence)
|
| 126 |
max_content_len = 1500
|
| 127 |
+
scoring_prompt = get_scoring_prompt(domain)
|
| 128 |
|
| 129 |
def format_single_evidence(i: int, e: Evidence) -> str:
|
| 130 |
content = e.content
|
|
|
|
| 154 |
|
| 155 |
## Your Task
|
| 156 |
|
| 157 |
+
{scoring_prompt}
|
| 158 |
DO NOT decide "synthesize" vs "continue" - that decision is made by the system.
|
| 159 |
|
| 160 |
## REMINDER: Original Question (stay focused)
|
|
@@ -2,13 +2,18 @@
|
|
| 2 |
|
| 3 |
from typing import TYPE_CHECKING, Any
|
| 4 |
|
|
|
|
| 5 |
from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
|
| 6 |
|
| 7 |
if TYPE_CHECKING:
|
| 8 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 9 |
from src.utils.models import Evidence, MechanismHypothesis
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Your role is to synthesize evidence and hypotheses into a clear, structured report.
|
| 14 |
|
|
@@ -36,8 +41,10 @@ The `hypotheses_tested` field MUST be a LIST of objects, each with these fields:
|
|
| 36 |
|
| 37 |
Example:
|
| 38 |
hypotheses_tested: [
|
| 39 |
-
{"hypothesis": "Metformin -> AMPK -> reduced inflammation",
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
]
|
| 42 |
|
| 43 |
The `references` field MUST be a LIST of objects, each with these fields:
|
|
@@ -48,7 +55,7 @@ The `references` field MUST be a LIST of objects, each with these fields:
|
|
| 48 |
|
| 49 |
Example:
|
| 50 |
references: [
|
| 51 |
-
{"title": "Metformin and Cancer", "authors": "Smith et al.", "source": "pubmed", "url": "https://pubmed.ncbi.nlm.nih.gov/12345678/"}
|
| 52 |
]
|
| 53 |
|
| 54 |
─────────────────────────────────────────────────────────────────────────────
|
|
@@ -68,6 +75,10 @@ VIOLATION OF THESE RULES PRODUCES DANGEROUS MISINFORMATION.
|
|
| 68 |
─────────────────────────────────────────────────────────────────────────────"""
|
| 69 |
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
async def format_report_prompt(
|
| 72 |
query: str,
|
| 73 |
evidence: list["Evidence"],
|
|
|
|
| 2 |
|
| 3 |
from typing import TYPE_CHECKING, Any
|
| 4 |
|
| 5 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 6 |
from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
|
| 7 |
|
| 8 |
if TYPE_CHECKING:
|
| 9 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 10 |
from src.utils.models import Evidence, MechanismHypothesis
|
| 11 |
|
| 12 |
+
|
| 13 |
+
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
+
"""Get the system prompt for the report agent."""
|
| 15 |
+
config = get_domain_config(domain)
|
| 16 |
+
return f"""{config.report_system_prompt}
|
| 17 |
|
| 18 |
Your role is to synthesize evidence and hypotheses into a clear, structured report.
|
| 19 |
|
|
|
|
| 41 |
|
| 42 |
Example:
|
| 43 |
hypotheses_tested: [
|
| 44 |
+
{{"hypothesis": "Metformin -> AMPK -> reduced inflammation",
|
| 45 |
+
"supported": 3, "contradicted": 1}},
|
| 46 |
+
{{"hypothesis": "Aspirin inhibits COX-2 pathway",
|
| 47 |
+
"supported": 5, "contradicted": 0}}
|
| 48 |
]
|
| 49 |
|
| 50 |
The `references` field MUST be a LIST of objects, each with these fields:
|
|
|
|
| 55 |
|
| 56 |
Example:
|
| 57 |
references: [
|
| 58 |
+
{{"title": "Metformin and Cancer", "authors": "Smith et al.", "source": "pubmed", "url": "https://pubmed.ncbi.nlm.nih.gov/12345678/"}}
|
| 59 |
]
|
| 60 |
|
| 61 |
─────────────────────────────────────────────────────────────────────────────
|
|
|
|
| 75 |
─────────────────────────────────────────────────────────────────────────────"""
|
| 76 |
|
| 77 |
|
| 78 |
+
# Keep SYSTEM_PROMPT for backwards compatibility
|
| 79 |
+
SYSTEM_PROMPT = get_system_prompt()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
async def format_report_prompt(
|
| 83 |
query: str,
|
| 84 |
evidence: list["Evidence"],
|
|
@@ -7,6 +7,7 @@ import structlog
|
|
| 7 |
from pydantic import Field
|
| 8 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 9 |
|
|
|
|
| 10 |
from src.utils.exceptions import ConfigurationError
|
| 11 |
|
| 12 |
|
|
@@ -20,6 +21,9 @@ class Settings(BaseSettings):
|
|
| 20 |
extra="ignore",
|
| 21 |
)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
# LLM Configuration
|
| 24 |
openai_api_key: str | None = Field(default=None, description="OpenAI API key")
|
| 25 |
anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
|
|
|
|
| 7 |
from pydantic import Field
|
| 8 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 9 |
|
| 10 |
+
from src.config.domain import ResearchDomain
|
| 11 |
from src.utils.exceptions import ConfigurationError
|
| 12 |
|
| 13 |
|
|
|
|
| 21 |
extra="ignore",
|
| 22 |
)
|
| 23 |
|
| 24 |
+
# Domain configuration
|
| 25 |
+
research_domain: ResearchDomain = ResearchDomain.GENERAL
|
| 26 |
+
|
| 27 |
# LLM Configuration
|
| 28 |
openai_api_key: str | None = Field(default=None, description="OpenAI API key")
|
| 29 |
anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
|
|
@@ -56,7 +56,7 @@ async def test_simple_mode_structure_validation(mock_search_handler, mock_judge_
|
|
| 56 |
report = complete_event.message
|
| 57 |
|
| 58 |
# Check markdown structure
|
| 59 |
-
assert "##
|
| 60 |
assert "### Citations" in report
|
| 61 |
assert "### Key Findings" in report
|
| 62 |
|
|
|
|
| 56 |
report = complete_event.message
|
| 57 |
|
| 58 |
# Check markdown structure
|
| 59 |
+
assert "## Research Analysis" in report
|
| 60 |
assert "### Citations" in report
|
| 61 |
assert "### Key Findings" in report
|
| 62 |
|
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for JudgeHandler domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import MagicMock, patch
|
| 4 |
+
|
| 5 |
+
from src.agent_factory.judges import JudgeHandler
|
| 6 |
+
from src.config.domain import ResearchDomain
|
| 7 |
+
from src.utils.models import AssessmentDetails, JudgeAssessment
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestJudgeHandlerDomain:
|
| 11 |
+
@patch("src.agent_factory.judges.get_model")
|
| 12 |
+
@patch("src.agent_factory.judges.Agent")
|
| 13 |
+
def test_judge_handler_accepts_domain(self, mock_agent_cls, mock_get_model):
|
| 14 |
+
# Mock get_model to avoid API key requirement
|
| 15 |
+
mock_get_model.return_value = MagicMock()
|
| 16 |
+
# Test init with domain
|
| 17 |
+
handler = JudgeHandler(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 18 |
+
assert handler.domain == ResearchDomain.SEXUAL_HEALTH
|
| 19 |
+
|
| 20 |
+
@patch("src.agent_factory.judges.get_model")
|
| 21 |
+
@patch("src.agent_factory.judges.Agent")
|
| 22 |
+
@patch("src.agent_factory.judges.format_user_prompt")
|
| 23 |
+
@patch("src.agent_factory.judges.select_evidence_for_judge")
|
| 24 |
+
async def test_judge_handler_passes_domain_to_prompt(
|
| 25 |
+
self, mock_select, mock_format, mock_agent_cls, mock_get_model
|
| 26 |
+
):
|
| 27 |
+
# Setup mocks
|
| 28 |
+
mock_get_model.return_value = MagicMock()
|
| 29 |
+
mock_agent_instance = MagicMock()
|
| 30 |
+
mock_agent_cls.return_value = mock_agent_instance
|
| 31 |
+
|
| 32 |
+
mock_assessment = JudgeAssessment(
|
| 33 |
+
details=AssessmentDetails(
|
| 34 |
+
mechanism_score=0,
|
| 35 |
+
mechanism_reasoning="Insufficient evidence to determine mechanism.",
|
| 36 |
+
clinical_evidence_score=0,
|
| 37 |
+
clinical_reasoning="Insufficient evidence to determine clinical viability.",
|
| 38 |
+
drug_candidates=[],
|
| 39 |
+
key_findings=[],
|
| 40 |
+
),
|
| 41 |
+
sufficient=False,
|
| 42 |
+
confidence=0.0,
|
| 43 |
+
recommendation="continue",
|
| 44 |
+
next_search_queries=[],
|
| 45 |
+
reasoning=("Insufficient evidence collected so far to form a conclusion."),
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Use async return value for run()
|
| 49 |
+
async def mock_run(*args, **kwargs):
|
| 50 |
+
return MagicMock(output=mock_assessment)
|
| 51 |
+
|
| 52 |
+
mock_agent_instance.run.side_effect = mock_run
|
| 53 |
+
|
| 54 |
+
mock_select.return_value = [] # mock select returns empty list
|
| 55 |
+
# Wait, if evidence is empty, format_empty_evidence_prompt is called.
|
| 56 |
+
# We want format_user_prompt to be called.
|
| 57 |
+
|
| 58 |
+
evidence = [MagicMock()] # Provide some evidence
|
| 59 |
+
mock_select.return_value = evidence
|
| 60 |
+
|
| 61 |
+
# Test
|
| 62 |
+
handler = JudgeHandler(domain=ResearchDomain.DRUG_REPURPOSING)
|
| 63 |
+
await handler.assess("query", evidence)
|
| 64 |
+
|
| 65 |
+
# Verify format_user_prompt called with domain
|
| 66 |
+
mock_format.assert_called_once()
|
| 67 |
+
call_kwargs = mock_format.call_args.kwargs
|
| 68 |
+
# Or check args if positional
|
| 69 |
+
# format_user_prompt signature: (question, evidence, iteration, max_iterations, ...)
|
| 70 |
+
|
| 71 |
+
# Check if domain was passed in kwargs
|
| 72 |
+
assert call_kwargs.get("domain") == ResearchDomain.DRUG_REPURPOSING
|
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Magentic Agents domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import patch
|
| 4 |
+
|
| 5 |
+
from src.agents.magentic_agents import (
|
| 6 |
+
create_hypothesis_agent,
|
| 7 |
+
create_judge_agent,
|
| 8 |
+
create_report_agent,
|
| 9 |
+
create_search_agent,
|
| 10 |
+
)
|
| 11 |
+
from src.config.domain import SEXUAL_HEALTH_CONFIG, ResearchDomain
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class TestMagenticAgentsDomain:
|
| 15 |
+
@patch("src.agents.magentic_agents.ChatAgent")
|
| 16 |
+
@patch("src.agents.magentic_agents.OpenAIChatClient")
|
| 17 |
+
def test_create_search_agent_uses_domain(self, mock_client, mock_agent_cls):
|
| 18 |
+
create_search_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 19 |
+
|
| 20 |
+
# Check instructions or description passed to ChatAgent
|
| 21 |
+
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 22 |
+
assert SEXUAL_HEALTH_CONFIG.search_agent_description in call_kwargs["description"]
|
| 23 |
+
# Ideally check instructions too if we update them
|
| 24 |
+
|
| 25 |
+
@patch("src.agents.magentic_agents.ChatAgent")
|
| 26 |
+
@patch("src.agents.magentic_agents.OpenAIChatClient")
|
| 27 |
+
def test_create_judge_agent_uses_domain(self, mock_client, mock_agent_cls):
|
| 28 |
+
create_judge_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 29 |
+
|
| 30 |
+
# Verify domain-specific judge system prompt is passed through
|
| 31 |
+
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 32 |
+
assert SEXUAL_HEALTH_CONFIG.judge_system_prompt in call_kwargs["instructions"]
|
| 33 |
+
|
| 34 |
+
@patch("src.agents.magentic_agents.ChatAgent")
|
| 35 |
+
@patch("src.agents.magentic_agents.OpenAIChatClient")
|
| 36 |
+
def test_create_hypothesis_agent_uses_domain(self, mock_client, mock_agent_cls):
|
| 37 |
+
create_hypothesis_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 38 |
+
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 39 |
+
assert SEXUAL_HEALTH_CONFIG.hypothesis_agent_description in call_kwargs["description"]
|
| 40 |
+
|
| 41 |
+
@patch("src.agents.magentic_agents.ChatAgent")
|
| 42 |
+
@patch("src.agents.magentic_agents.OpenAIChatClient")
|
| 43 |
+
def test_create_report_agent_uses_domain(self, mock_client, mock_agent_cls):
|
| 44 |
+
create_report_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 45 |
+
# Check instructions contains domain prompt
|
| 46 |
+
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 47 |
+
assert SEXUAL_HEALTH_CONFIG.report_system_prompt in call_kwargs["instructions"]
|
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Search Agent domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import MagicMock
|
| 4 |
+
|
| 5 |
+
from src.agents.search_agent import SearchAgent
|
| 6 |
+
from src.config.domain import SEXUAL_HEALTH_CONFIG, ResearchDomain
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestSearchAgentDomain:
|
| 10 |
+
def test_search_agent_accepts_domain(self):
|
| 11 |
+
mock_handler = MagicMock()
|
| 12 |
+
store = {"current": []}
|
| 13 |
+
|
| 14 |
+
agent = SearchAgent(
|
| 15 |
+
search_handler=mock_handler, evidence_store=store, domain=ResearchDomain.SEXUAL_HEALTH
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# Verify description updated
|
| 19 |
+
assert agent.description == SEXUAL_HEALTH_CONFIG.search_agent_description
|
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for domain configuration."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import (
|
| 4 |
+
ResearchDomain,
|
| 5 |
+
get_domain_config,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestResearchDomain:
|
| 10 |
+
def test_enum_values(self):
|
| 11 |
+
assert ResearchDomain.GENERAL.value == "general"
|
| 12 |
+
assert ResearchDomain.DRUG_REPURPOSING.value == "drug_repurposing"
|
| 13 |
+
assert ResearchDomain.SEXUAL_HEALTH.value == "sexual_health"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestGetDomainConfig:
|
| 17 |
+
def test_default_returns_general(self):
|
| 18 |
+
config = get_domain_config()
|
| 19 |
+
assert config.name == "General Research"
|
| 20 |
+
|
| 21 |
+
def test_explicit_general(self):
|
| 22 |
+
config = get_domain_config(ResearchDomain.GENERAL)
|
| 23 |
+
assert "Research Analysis" in config.report_title
|
| 24 |
+
|
| 25 |
+
def test_drug_repurposing(self):
|
| 26 |
+
config = get_domain_config(ResearchDomain.DRUG_REPURPOSING)
|
| 27 |
+
assert "Drug Repurposing" in config.report_title
|
| 28 |
+
assert "drug repurposing" in config.judge_system_prompt.lower()
|
| 29 |
+
|
| 30 |
+
def test_sexual_health(self):
|
| 31 |
+
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 32 |
+
assert "Sexual Health" in config.report_title
|
| 33 |
+
|
| 34 |
+
def test_accepts_string(self):
|
| 35 |
+
config = get_domain_config("drug_repurposing")
|
| 36 |
+
assert "Drug Repurposing" in config.name
|
| 37 |
+
|
| 38 |
+
def test_invalid_string_returns_default(self):
|
| 39 |
+
config = get_domain_config("invalid_domain")
|
| 40 |
+
assert config.name == "General Research"
|
| 41 |
+
|
| 42 |
+
def test_all_domains_have_required_fields(self):
|
| 43 |
+
required_fields = [
|
| 44 |
+
"name",
|
| 45 |
+
"report_title",
|
| 46 |
+
"judge_system_prompt",
|
| 47 |
+
"hypothesis_system_prompt",
|
| 48 |
+
"report_system_prompt",
|
| 49 |
+
]
|
| 50 |
+
for domain in ResearchDomain:
|
| 51 |
+
config = get_domain_config(domain)
|
| 52 |
+
for field in required_fields:
|
| 53 |
+
assert getattr(config, field), f"{domain} missing {field}"
|
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for MCP Tools domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import MagicMock, patch
|
| 4 |
+
|
| 5 |
+
from src.mcp_tools import search_pubmed
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class TestMCPToolsDomain:
|
| 9 |
+
@patch("src.mcp_tools._pubmed.search")
|
| 10 |
+
async def test_search_pubmed_accepts_domain(self, mock_search):
|
| 11 |
+
mock_search.return_value = []
|
| 12 |
+
|
| 13 |
+
result = await search_pubmed("query", domain="sexual_health")
|
| 14 |
+
|
| 15 |
+
# The function returns "No PubMed results found..." if empty
|
| 16 |
+
assert "No PubMed results" in result
|
| 17 |
+
|
| 18 |
+
# Let's mock results
|
| 19 |
+
mock_evidence = MagicMock()
|
| 20 |
+
mock_evidence.citation.title = "Test Title"
|
| 21 |
+
mock_evidence.citation.authors = ["Author"]
|
| 22 |
+
mock_evidence.citation.date = "2024"
|
| 23 |
+
mock_evidence.citation.url = "http://url"
|
| 24 |
+
mock_evidence.content = "content"
|
| 25 |
+
|
| 26 |
+
mock_search.return_value = [mock_evidence]
|
| 27 |
+
|
| 28 |
+
result = await search_pubmed("query", domain="sexual_health")
|
| 29 |
+
assert "## PubMed Results for: query (Sexual Health Research)" in result
|
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Advanced Orchestrator domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import MagicMock, patch
|
| 4 |
+
|
| 5 |
+
from src.config.domain import ResearchDomain
|
| 6 |
+
from src.orchestrators.advanced import AdvancedOrchestrator
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestAdvancedOrchestratorDomain:
|
| 10 |
+
@patch("src.orchestrators.advanced.check_magentic_requirements")
|
| 11 |
+
@patch("src.orchestrators.advanced.OpenAIChatClient")
|
| 12 |
+
def test_advanced_orchestrator_accepts_domain(self, mock_client, mock_check):
|
| 13 |
+
# Mock to avoid API key validation
|
| 14 |
+
mock_client.return_value = MagicMock()
|
| 15 |
+
orch = AdvancedOrchestrator(domain=ResearchDomain.SEXUAL_HEALTH, api_key="sk-test")
|
| 16 |
+
assert orch.domain == ResearchDomain.SEXUAL_HEALTH
|
| 17 |
+
|
| 18 |
+
@patch("src.orchestrators.advanced.check_magentic_requirements")
|
| 19 |
+
@patch("src.orchestrators.advanced.create_search_agent")
|
| 20 |
+
@patch("src.orchestrators.advanced.create_judge_agent")
|
| 21 |
+
@patch("src.orchestrators.advanced.create_hypothesis_agent")
|
| 22 |
+
@patch("src.orchestrators.advanced.create_report_agent")
|
| 23 |
+
@patch("src.orchestrators.advanced.MagenticBuilder")
|
| 24 |
+
@patch("src.orchestrators.advanced.OpenAIChatClient")
|
| 25 |
+
def test_build_workflow_uses_domain(
|
| 26 |
+
self,
|
| 27 |
+
mock_client,
|
| 28 |
+
mock_builder,
|
| 29 |
+
mock_create_report,
|
| 30 |
+
mock_create_hypothesis,
|
| 31 |
+
mock_create_judge,
|
| 32 |
+
mock_create_search,
|
| 33 |
+
mock_check,
|
| 34 |
+
):
|
| 35 |
+
mock_client.return_value = MagicMock()
|
| 36 |
+
orch = AdvancedOrchestrator(domain=ResearchDomain.SEXUAL_HEALTH, api_key="sk-test")
|
| 37 |
+
|
| 38 |
+
# Call private method to verify agent creation calls
|
| 39 |
+
orch._build_workflow()
|
| 40 |
+
|
| 41 |
+
# Verify agents created with domain
|
| 42 |
+
mock_create_search.assert_called_with(
|
| 43 |
+
orch._chat_client, domain=ResearchDomain.SEXUAL_HEALTH
|
| 44 |
+
)
|
| 45 |
+
mock_create_judge.assert_called_with(orch._chat_client, domain=ResearchDomain.SEXUAL_HEALTH)
|
| 46 |
+
mock_create_hypothesis.assert_called_with(
|
| 47 |
+
orch._chat_client, domain=ResearchDomain.SEXUAL_HEALTH
|
| 48 |
+
)
|
| 49 |
+
mock_create_report.assert_called_with(
|
| 50 |
+
orch._chat_client, domain=ResearchDomain.SEXUAL_HEALTH
|
| 51 |
+
)
|
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Orchestrator Factory domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import ANY, MagicMock, patch
|
| 4 |
+
|
| 5 |
+
from src.config.domain import ResearchDomain
|
| 6 |
+
from src.orchestrators.factory import create_orchestrator
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestFactoryDomain:
|
| 10 |
+
@patch("src.orchestrators.factory.Orchestrator")
|
| 11 |
+
def test_create_simple_uses_domain(self, mock_simple_cls):
|
| 12 |
+
mock_search = MagicMock()
|
| 13 |
+
mock_judge = MagicMock()
|
| 14 |
+
|
| 15 |
+
create_orchestrator(
|
| 16 |
+
search_handler=mock_search,
|
| 17 |
+
judge_handler=mock_judge,
|
| 18 |
+
mode="simple",
|
| 19 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
mock_simple_cls.assert_called_with(
|
| 23 |
+
search_handler=mock_search,
|
| 24 |
+
judge_handler=mock_judge,
|
| 25 |
+
config=ANY,
|
| 26 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
@patch("src.orchestrators.factory._get_advanced_orchestrator_class")
|
| 30 |
+
def test_create_advanced_uses_domain(self, mock_get_cls):
|
| 31 |
+
mock_adv_cls = MagicMock()
|
| 32 |
+
mock_get_cls.return_value = mock_adv_cls
|
| 33 |
+
|
| 34 |
+
create_orchestrator(mode="advanced", domain=ResearchDomain.SEXUAL_HEALTH)
|
| 35 |
+
|
| 36 |
+
call_kwargs = mock_adv_cls.call_args.kwargs
|
| 37 |
+
assert call_kwargs["domain"] == ResearchDomain.SEXUAL_HEALTH
|
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Orchestrator (Simple) domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import MagicMock
|
| 4 |
+
|
| 5 |
+
from src.config.domain import SEXUAL_HEALTH_CONFIG, ResearchDomain
|
| 6 |
+
from src.orchestrators.simple import Orchestrator
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestSimpleOrchestratorDomain:
|
| 10 |
+
def test_orchestrator_accepts_domain(self):
|
| 11 |
+
mock_search = MagicMock()
|
| 12 |
+
mock_judge = MagicMock()
|
| 13 |
+
|
| 14 |
+
orch = Orchestrator(
|
| 15 |
+
search_handler=mock_search,
|
| 16 |
+
judge_handler=mock_judge,
|
| 17 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
assert orch.domain == ResearchDomain.SEXUAL_HEALTH
|
| 21 |
+
assert orch.domain_config.name == SEXUAL_HEALTH_CONFIG.name
|
| 22 |
+
|
| 23 |
+
def test_orchestrator_uses_domain_title_in_synthesis(self):
|
| 24 |
+
mock_search = MagicMock()
|
| 25 |
+
mock_judge = MagicMock()
|
| 26 |
+
|
| 27 |
+
orch = Orchestrator(
|
| 28 |
+
search_handler=mock_search,
|
| 29 |
+
judge_handler=mock_judge,
|
| 30 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Test _generate_synthesis
|
| 34 |
+
mock_assessment = MagicMock()
|
| 35 |
+
mock_assessment.details.drug_candidates = []
|
| 36 |
+
mock_assessment.details.key_findings = []
|
| 37 |
+
mock_assessment.confidence = 0.5
|
| 38 |
+
mock_assessment.reasoning = "test"
|
| 39 |
+
mock_assessment.details.mechanism_score = 5
|
| 40 |
+
mock_assessment.details.clinical_evidence_score = 5
|
| 41 |
+
|
| 42 |
+
report = orch._generate_synthesis("query", [], mock_assessment)
|
| 43 |
+
assert "## Sexual Health Analysis" in report
|
| 44 |
+
|
| 45 |
+
# Test _generate_partial_synthesis
|
| 46 |
+
report_partial = orch._generate_partial_synthesis("query", [])
|
| 47 |
+
assert "## Sexual Health Analysis" in report_partial
|
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for hypothesis prompt domain support."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import DRUG_REPURPOSING_CONFIG, GENERAL_CONFIG, ResearchDomain
|
| 4 |
+
from src.prompts.hypothesis import get_system_prompt
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestHypothesisPromptDomain:
|
| 8 |
+
def test_get_system_prompt_default(self):
|
| 9 |
+
prompt = get_system_prompt()
|
| 10 |
+
assert GENERAL_CONFIG.hypothesis_system_prompt in prompt
|
| 11 |
+
assert "Your role is to generate mechanistic hypotheses" in prompt
|
| 12 |
+
|
| 13 |
+
def test_get_system_prompt_domain(self):
|
| 14 |
+
prompt = get_system_prompt(ResearchDomain.DRUG_REPURPOSING)
|
| 15 |
+
assert DRUG_REPURPOSING_CONFIG.hypothesis_system_prompt in prompt
|
| 16 |
+
assert "Your role is to generate mechanistic hypotheses" in prompt
|
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for judge prompt domain support."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import DRUG_REPURPOSING_CONFIG, GENERAL_CONFIG, ResearchDomain
|
| 4 |
+
from src.prompts.judge import format_user_prompt, get_scoring_prompt, get_system_prompt
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestJudgePromptDomain:
|
| 8 |
+
def test_get_system_prompt_default(self):
|
| 9 |
+
prompt = get_system_prompt()
|
| 10 |
+
assert GENERAL_CONFIG.judge_system_prompt in prompt
|
| 11 |
+
assert "Your task is to SCORE evidence" in prompt
|
| 12 |
+
|
| 13 |
+
def test_get_system_prompt_domain(self):
|
| 14 |
+
prompt = get_system_prompt(ResearchDomain.DRUG_REPURPOSING)
|
| 15 |
+
assert DRUG_REPURPOSING_CONFIG.judge_system_prompt in prompt
|
| 16 |
+
assert "Your task is to SCORE evidence" in prompt
|
| 17 |
+
|
| 18 |
+
def test_get_scoring_prompt_default(self):
|
| 19 |
+
prompt = get_scoring_prompt()
|
| 20 |
+
assert GENERAL_CONFIG.judge_scoring_prompt == prompt
|
| 21 |
+
|
| 22 |
+
def test_format_user_prompt_default(self):
|
| 23 |
+
prompt = format_user_prompt("query", [])
|
| 24 |
+
assert GENERAL_CONFIG.judge_scoring_prompt in prompt
|
| 25 |
+
assert "drug repurposing" not in prompt.lower()
|
| 26 |
+
|
| 27 |
+
def test_format_user_prompt_with_domain(self):
|
| 28 |
+
prompt = format_user_prompt("query", [], domain=ResearchDomain.DRUG_REPURPOSING)
|
| 29 |
+
assert DRUG_REPURPOSING_CONFIG.judge_scoring_prompt in prompt
|
| 30 |
+
# The drug repurposing prompt contains "drug repurposing"
|
| 31 |
+
assert "drug repurposing" in prompt.lower()
|
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for report prompt domain support."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import DRUG_REPURPOSING_CONFIG, GENERAL_CONFIG, ResearchDomain
|
| 4 |
+
from src.prompts.report import get_system_prompt
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestReportPromptDomain:
|
| 8 |
+
def test_get_system_prompt_default(self):
|
| 9 |
+
prompt = get_system_prompt()
|
| 10 |
+
assert GENERAL_CONFIG.report_system_prompt in prompt
|
| 11 |
+
assert "Your role is to synthesize evidence" in prompt
|
| 12 |
+
|
| 13 |
+
def test_get_system_prompt_domain(self):
|
| 14 |
+
prompt = get_system_prompt(ResearchDomain.DRUG_REPURPOSING)
|
| 15 |
+
assert DRUG_REPURPOSING_CONFIG.report_system_prompt in prompt
|
| 16 |
+
assert "Your role is to synthesize evidence" in prompt
|
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for App domain support."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import ANY, MagicMock, patch
|
| 4 |
+
|
| 5 |
+
from src.app import configure_orchestrator, research_agent
|
| 6 |
+
from src.config.domain import ResearchDomain
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestAppDomain:
|
| 10 |
+
@patch("src.app.create_orchestrator")
|
| 11 |
+
@patch("src.app.MockJudgeHandler")
|
| 12 |
+
def test_configure_orchestrator_passes_domain_mock_mode(self, mock_judge, mock_create):
|
| 13 |
+
"""Test domain is passed when using mock mode (unit test path)."""
|
| 14 |
+
configure_orchestrator(use_mock=True, mode="simple", domain=ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
+
|
| 16 |
+
# MockJudgeHandler should receive domain
|
| 17 |
+
mock_judge.assert_called_with(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 18 |
+
mock_create.assert_called_with(
|
| 19 |
+
search_handler=ANY,
|
| 20 |
+
judge_handler=ANY,
|
| 21 |
+
config=ANY,
|
| 22 |
+
mode="simple",
|
| 23 |
+
api_key=None,
|
| 24 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
@patch.dict("os.environ", {}, clear=True)
|
| 28 |
+
@patch("src.app.create_orchestrator")
|
| 29 |
+
@patch("src.app.HFInferenceJudgeHandler")
|
| 30 |
+
def test_configure_orchestrator_passes_domain_free_tier(self, mock_hf_judge, mock_create):
|
| 31 |
+
"""Test domain is passed when using free tier (no API keys)."""
|
| 32 |
+
configure_orchestrator(use_mock=False, mode="simple", domain=ResearchDomain.SEXUAL_HEALTH)
|
| 33 |
+
|
| 34 |
+
# HFInferenceJudgeHandler should receive domain (no API keys = free tier)
|
| 35 |
+
mock_hf_judge.assert_called_with(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 36 |
+
mock_create.assert_called_with(
|
| 37 |
+
search_handler=ANY,
|
| 38 |
+
judge_handler=ANY,
|
| 39 |
+
config=ANY,
|
| 40 |
+
mode="simple",
|
| 41 |
+
api_key=None,
|
| 42 |
+
domain=ResearchDomain.SEXUAL_HEALTH,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
@patch("src.app.configure_orchestrator")
|
| 46 |
+
async def test_research_agent_passes_domain(self, mock_config):
|
| 47 |
+
# Mock orchestrator
|
| 48 |
+
mock_orch = MagicMock()
|
| 49 |
+
mock_orch.run.return_value = [] # Async iterator?
|
| 50 |
+
|
| 51 |
+
# To mock async generator
|
| 52 |
+
async def async_gen(*args):
|
| 53 |
+
if False:
|
| 54 |
+
yield # Make it a generator
|
| 55 |
+
|
| 56 |
+
mock_orch.run = async_gen
|
| 57 |
+
|
| 58 |
+
mock_config.return_value = (mock_orch, "Test Backend")
|
| 59 |
+
|
| 60 |
+
# Consume the generator from research_agent
|
| 61 |
+
gen = research_agent(
|
| 62 |
+
message="query", history=[], mode="simple", domain=ResearchDomain.SEXUAL_HEALTH
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
async for _ in gen:
|
| 66 |
+
pass
|
| 67 |
+
|
| 68 |
+
mock_config.assert_called_with(
|
| 69 |
+
use_mock=False, mode="simple", user_api_key=None, domain=ResearchDomain.SEXUAL_HEALTH
|
| 70 |
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for research domain configuration settings."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import ResearchDomain
|
| 4 |
+
from src.utils.config import Settings
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def test_research_domain_default():
|
| 8 |
+
settings = Settings()
|
| 9 |
+
assert settings.research_domain == ResearchDomain.GENERAL
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_research_domain_from_env(monkeypatch):
|
| 13 |
+
monkeypatch.setenv("RESEARCH_DOMAIN", "drug_repurposing")
|
| 14 |
+
settings = Settings()
|
| 15 |
+
assert settings.research_domain == ResearchDomain.DRUG_REPURPOSING
|