blackccpie commited on
Commit
b51b727
·
1 Parent(s): a4b6466

feat : now managing image query tool.

Browse files
Files changed (4) hide show
  1. agent.py +6 -62
  2. agent_ui.py +8 -2
  3. other_tools.py +82 -0
  4. web_tools.py +91 -0
agent.py CHANGED
@@ -22,68 +22,10 @@
22
 
23
  import os
24
 
25
- from smolagents import CodeAgent, InferenceClientModel, Tool
26
- from tavily import TavilyClient
27
 
28
- # Define a custom tool for Tavily search
29
- from smolagents import Tool
30
- from tavily import TavilyClient
31
-
32
- tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
33
-
34
- class TavilySearchTool(Tool):
35
- name = "tavily_search"
36
- description = "Search the web using Tavily."
37
- inputs = {
38
- "query": {
39
- "type": "string",
40
- "description": "The search query string.",
41
- }
42
- }
43
- output_type = "string"
44
-
45
- def forward(self, query: str):
46
- response = tavily_client.search(query)
47
- return response
48
-
49
- class TavilyExtractTool(Tool):
50
- name = "tavily_extract"
51
- description = "Extract information from web pages using Tavily."
52
- inputs = {
53
- "url": {
54
- "type": "string",
55
- "description": "The URL of the web page to extract information from.",
56
- }
57
- }
58
- output_type = "string"
59
-
60
- def forward(self, url: str):
61
- response = tavily_client.extract(url)
62
- return response
63
-
64
- class TavilyImageURLSearchTool(Tool):
65
- name = "tavily_image_search"
66
- description = "Search for most relevant image URL on the web using Tavily."
67
- inputs = {
68
- "query": {
69
- "type": "string",
70
- "description": "The search query string.",
71
- }
72
- }
73
- output_type = "string"
74
-
75
- def forward(self, query: str):
76
- response = tavily_client.search(query, include_images=True)
77
-
78
- images = response.get("images", [])
79
-
80
- if images:
81
- # Return the URL of the first image
82
- first_image = images[0]
83
- if isinstance(first_image, dict):
84
- return first_image.get("url")
85
- return first_image
86
- return "none"
87
 
88
  class SmolAlbert(CodeAgent):
89
  """
@@ -93,6 +35,7 @@ class SmolAlbert(CodeAgent):
93
  #model_id = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
94
  #model_id = "Qwen/Qwen3-30B-A3B-Thinking-2507"
95
  model_id = "Qwen/Qwen3-235B-A22B-Instruct-2507"
 
96
  provider = "auto"
97
 
98
  def __init__(self):
@@ -103,12 +46,13 @@ class SmolAlbert(CodeAgent):
103
  search_tool = TavilySearchTool()
104
  image_search_tool = TavilyImageURLSearchTool()
105
  extract_tool = TavilyExtractTool()
 
106
  model = InferenceClientModel(
107
  model_id=self.model_id,
108
  provider=self.provider,
109
  token=os.getenv("HF_API_KEY"))
110
  self.agent = CodeAgent(
111
- tools=[search_tool, image_search_tool, extract_tool],
112
  model=model,
113
  stream_outputs=True,
114
  instructions=(
 
22
 
23
  import os
24
 
25
+ from smolagents import CodeAgent, InferenceClientModel
 
26
 
27
+ from other_tools import ImageQueryTool
28
+ from web_tools import TavilySearchTool, TavilyExtractTool, TavilyImageURLSearchTool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  class SmolAlbert(CodeAgent):
31
  """
 
35
  #model_id = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
36
  #model_id = "Qwen/Qwen3-30B-A3B-Thinking-2507"
37
  model_id = "Qwen/Qwen3-235B-A22B-Instruct-2507"
38
+ #model_id = "google/gemma-3-27b-it"
39
  provider = "auto"
40
 
41
  def __init__(self):
 
46
  search_tool = TavilySearchTool()
47
  image_search_tool = TavilyImageURLSearchTool()
48
  extract_tool = TavilyExtractTool()
49
+ image_query_tool = ImageQueryTool()
50
  model = InferenceClientModel(
51
  model_id=self.model_id,
52
  provider=self.provider,
53
  token=os.getenv("HF_API_KEY"))
54
  self.agent = CodeAgent(
55
+ tools=[search_tool, image_search_tool, extract_tool, image_query_tool],
56
  model=model,
57
  stream_outputs=True,
58
  instructions=(
agent_ui.py CHANGED
@@ -333,8 +333,14 @@ class AgentUI:
333
  # Detect final answer messages and append to quiet
334
  # HACK : FinalAnswerStep messages are produced by _process_final_answer_step and use "**Final answer:**" text
335
  if "final answer" in content_text.lower():
336
- # Replace pending with final answer in Quiet
337
- final_msg = gr.ChatMessage(role=MessageRole.ASSISTANT, content=content_text, metadata={"status": "done"})
 
 
 
 
 
 
338
  if quiet_pending_idx is not None:
339
  quiet_messages[quiet_pending_idx] = final_msg
340
  quiet_pending_idx = None
 
333
  # Detect final answer messages and append to quiet
334
  # HACK : FinalAnswerStep messages are produced by _process_final_answer_step and use "**Final answer:**" text
335
  if "final answer" in content_text.lower():
336
+ # Remove everything before and including the "Final answer:" label (and any leading/trailing whitespace/newlines)
337
+ answer_only = re.sub(
338
+ r"(?s)^.*?\*\*Final answer:\*\*\s*[\n]*", # (?s) allows . to match newlines
339
+ "",
340
+ content_text,
341
+ flags=re.IGNORECASE,
342
+ )
343
+ final_msg = gr.ChatMessage(role=MessageRole.ASSISTANT, content=answer_only, metadata={"status": "done"})
344
  if quiet_pending_idx is not None:
345
  quiet_messages[quiet_pending_idx] = final_msg
346
  quiet_pending_idx = None
other_tools.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The MIT License
2
+
3
+ # Copyright (c) 2025 Albert Murienne
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ import os
24
+ from pyexpat.errors import messages
25
+
26
+ from smolagents import InferenceClientModel, Tool
27
+
28
+ class ImageQueryTool(Tool):
29
+ """
30
+ A tool to ask a question about an image given its URL.
31
+ """
32
+ name = "image_query"
33
+ description = "Ask a question about an image given its URL."
34
+ inputs = {
35
+ "image_url": {
36
+ "type": "string",
37
+ "description": "The URL of the image to analyze.",
38
+ },
39
+ "question": {
40
+ "type": "string",
41
+ "description": "The question to ask about the image.",
42
+ }
43
+ }
44
+ output_type = "string"
45
+
46
+ def __init__(self):
47
+ """
48
+ Construct the ImageQueryTool with a specific model.
49
+ """
50
+ # call superclass constructor
51
+ super().__init__()
52
+ # Initialize the model
53
+ self.model = InferenceClientModel(
54
+ model_id="google/gemma-3-27b-it",
55
+ provider="auto",
56
+ token=os.getenv("HF_API_KEY")
57
+ )
58
+
59
+ def forward(self, image_url: str, question: str):
60
+ """
61
+ Forward method to process the image URL and question.
62
+ """
63
+ messages = [
64
+ {
65
+ "role": "user",
66
+ "content": [
67
+ {"type": "text", "text": question},
68
+ {"type": "image_url", "image_url": {"url": image_url}}
69
+ ],
70
+ }
71
+ ]
72
+
73
+ res = self.model(messages)
74
+ return res.content
75
+
76
+ if __name__ == "__main__":
77
+ tool = ImageQueryTool()
78
+ response = tool.forward(
79
+ image_url="https://upload.wikimedia.org/wikipedia/commons/9/99/Black_square.jpg",
80
+ question="What is this?"
81
+ )
82
+ print("Response:", response)
web_tools.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The MIT License
2
+
3
+ # Copyright (c) 2025 Albert Murienne
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ import os
24
+
25
+ from smolagents import Tool
26
+ from tavily import TavilyClient
27
+
28
+ tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
29
+
30
+ class TavilySearchTool(Tool):
31
+ """
32
+ A tool to perform web searches using the Tavily API.
33
+ """
34
+ name = "tavily_search"
35
+ description = "Search the web using Tavily."
36
+ inputs = {
37
+ "query": {
38
+ "type": "string",
39
+ "description": "The search query string.",
40
+ }
41
+ }
42
+ output_type = "string"
43
+
44
+ def forward(self, query: str):
45
+ response = tavily_client.search(query)
46
+ return response
47
+
48
+ class TavilyExtractTool(Tool):
49
+ """
50
+ A tool to extract information from web pages using the Tavily API.
51
+ """
52
+ name = "tavily_extract"
53
+ description = "Extract information from web pages using Tavily."
54
+ inputs = {
55
+ "url": {
56
+ "type": "string",
57
+ "description": "The URL of the web page to extract information from.",
58
+ }
59
+ }
60
+ output_type = "string"
61
+
62
+ def forward(self, url: str):
63
+ response = tavily_client.extract(url)
64
+ return response
65
+
66
+ class TavilyImageURLSearchTool(Tool):
67
+ """
68
+ A tool to search for image URLs using the Tavily API.
69
+ """
70
+ name = "tavily_image_search"
71
+ description = "Search for most relevant image URL on the web using Tavily."
72
+ inputs = {
73
+ "query": {
74
+ "type": "string",
75
+ "description": "The search query string.",
76
+ }
77
+ }
78
+ output_type = "string"
79
+
80
+ def forward(self, query: str):
81
+ response = tavily_client.search(query, include_images=True)
82
+
83
+ images = response.get("images", [])
84
+
85
+ if images:
86
+ # Return the URL of the first image
87
+ first_image = images[0]
88
+ if isinstance(first_image, dict):
89
+ return first_image.get("url")
90
+ return first_image
91
+ return "none"