Spaces:

andythebest
/

multi_model_detection

Sleeping

App Files Files Community

andythebest commited on Aug 24

Commit

4412caa

verified ·

1 Parent(s): 4fa6b76

Update gemini_ai.py

Browse files

Files changed (1) hide show

gemini_ai.py +124 -119

gemini_ai.py CHANGED Viewed

@@ -1,119 +1,124 @@
-#!pip install -q -U google-generativeai
-import google.generativeai as genai
-import PIL.Image
-import image_converter as img_converter
-import random
-import os
-import ast
-import target_object
-#基本設定都放這邊----------------------------------------
-#
-#
-# 設定圖檔位置 (此處僅為範例，純文字查詢時可忽略)
-image_path = r'input_images/雜圖混合02.jpg'
-# 要使用的模型種類，免費版一分鐘只能跑最多十筆
-gemini_model = 'gemini-2.5-flash'
-#要求AI扮演的角色和提示詞，這裡的提示詞會用來引導AI進行圖片分類
-給AI的提示詞 = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
-請在各大類中選最近似的一樣，輸出結果如範例:"物理環境[辦公室],技術應用[人工智慧,虛擬實境,其他],資訊設備[其他]"。
-若您覺得，該圖片完全不具要辨識的特徵，請回覆"XXX[NIL]",XXX為該類別,加上NIL表示該類未再提供的選項內。
-以下是我們要請您分辨的種類，會以JSON標示:"""
-#--------------------------------------------------------
-## 替換冒號和逗號為換行符號
-def replace_colon_comma_with_newline(input_string):
-  processed_string = input_string.replace(':', '\n').replace('：', '\n').replace('],', ']\n')
-  return processed_string
-def getApiToken():
-    try:
-        my_api_key = os.getenv('my_api_key')
-        my_list = ast.literal_eval(my_api_key) # Convert string to list因為存在環境變數中是字串格式
-        return  random.choice(my_list)
-    except Exception as e:
-        return ""
-# function，輸入是文字或是圖檔的位置
-def analyze_content_with_gemini(input_content, 辨識目標物=None):
-    """
-    透過 Gemini API 辨識內容，可處理純文字或圖片。
-    Args:
-        input_content (str or PIL.Image.Image):
-            如果輸入是字串，則代表要辨識的文字訊息或圖片路徑。
-            如果輸入是 PIL.Image.Image 物件，則直接使用該圖片。
-        user_prompt (str, optional):
-            使用者提供的自訂 prompt。如果為 None，則使用預設的 prompt。
-            Defaults to None.
-    Returns:
-        str: 辨識結果的文字描述。
-    """
-    my_api_key = getApiToken()  # 從環境變數中獲取API金鑰
-    genai.configure(api_key=my_api_key)
-    # 根據 user_prompt 決定要使用的 prompt
-    prompt_to_use = 給AI的提示詞+辨識目標物 if 辨識目標物 and 辨識目標物.strip() else 給AI的提示詞+ str(target_object.target_JSON)
-    # print("-"*50)
-    # print(prompt_to_use)
-    try:
-        # 判斷輸入的類型
-        if isinstance(input_content, str):
-            # 如果輸入是字串，嘗試判斷是否為圖片路徑
-            if input_content.lower().endswith(('.png', '.jpg', '.jpeg', '.gif','.webp')):
-                if input_content.lower().endswith(('.webp')):
-                    input_content = img_converter.convert_webp_to_jpg(input_content)  # 如果是 webp 圖片，先轉換為 jpg
-                model = genai.GenerativeModel(gemini_model)
-                image_obj = PIL.Image.open(input_content)
-                response = model.generate_content([prompt_to_use, image_obj])
-            else:
-                # 純文字輸入
-                model = genai.GenerativeModel(gemini_model)
-                response = model.generate_content(input_content) # 純文字直接使用輸入內容當 prompt
-        elif isinstance(input_content, PIL.Image.Image):
-            model = genai.GenerativeModel(gemini_model)
-            response = model.generate_content([prompt_to_use, input_content])
-        else:
-            return "錯誤：輸入必須是文字、圖片路徑（字串）或 PIL.Image 物件。"
-        return replace_colon_comma_with_newline(response.text)
-    except Exception as e:
-        return f"發生錯誤：{e}"
-if __name__ == '__main__':
-    # --- 程式碼使用範例 ---
-    # 範例 1：傳送純文字訊息
-    # print("正在處理純文字訊息...")
-    # text_message = "你好，請簡要說明一下Python是什麼？"
-    # response_text = analyze_content_with_gemini(text_message)
-    # print("回應結果：")
-    # print(response_text)
-    # print("-" * 20)
-    # 範例 2：傳送圖片路徑
-    # 請確保 image_path 指向有效的圖片檔案
-    print("正在處理圖片訊息...")
-    我要辨識的物體 = ""
-    我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子", "企鵝","長頸鹿"]}'
-    #我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子"]}'
-    response_image = analyze_content_with_gemini(image_path, 我要辨識的物體)
-    print("回應結果：")
-    print(response_image)
-    print("-" * 20)

+#!pip install -q -U google-generativeai
+import google.generativeai as genai
+import PIL.Image
+import image_converter as img_converter
+import random
+import os
+import ast
+import target_object
+#基本設定都放這邊----------------------------------------
+#
+#
+# 設定圖檔位置 (此處僅為範例，純文字查詢時可忽略)
+image_path = r'G:\Python\tools\input_images\1411135045-張華桀.jpg'
+# 要使用的模型種類，免費版一分鐘只能跑最多十筆
+gemini_model = 'gemini-2.5-flash'
+#要求AI扮演的角色和提示詞，這裡的提示詞會用來引導AI進行圖片分類
+# 給AI的提示詞 = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
+# 請在各大類中選最近似的一樣，輸出結果如範例:"物理環境[辦公室],技術應用[人工智慧,虛擬實境,其他],資訊設備[其他]"。
+# 若您覺得，該圖片完全不具要辨識的特徵，請回覆"XXX[NIL]",XXX為該類別,加上NIL表示該類未再提供的選項內。
+# 以下是我們要請您分辨的種類，會以JSON標示:"""
+給AI的提示詞 = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
+請在各大類中選最近似的一樣，輸出結果如範例:[物理環境_辦公室,技術應用_人工智慧,技術應用_大數據分析,社交關係_獨立工作(1人),資訊設備_電腦,資訊設備_鍵盤,資訊設備_滑鼠,資訊設備_手機,物體_桌子,物體_椅子,角色_工作人員]。
+若您覺得，該圖片完全不具要辨識的特徵，請回覆[NIL]。
+以下是我們要請您分辨的種類，會以JSON標示:"""
+#--------------------------------------------------------
+## 替換冒號和逗號為換行符號
+def replace_colon_comma_with_newline(input_string):
+  processed_string = input_string.replace(':', '\n').replace('：', '\n').replace('],', ']\n')
+  return processed_string
+def getApiToken():
+    try:
+        my_api_key = os.getenv('my_api_key')
+        my_list = ast.literal_eval(my_api_key) # Convert string to list因為存在環境變數中是字串格式
+        return  random.choice(my_list)
+    except Exception as e:
+        return ""
+# function，輸入是文字或是圖檔的位置
+def analyze_content_with_gemini(input_content, 辨識目標物=None):
+    """
+    透過 Gemini API 辨識內容，可處理純文字或圖片。
+    Args:
+        input_content (str or PIL.Image.Image):
+            如果輸入是字串，則代表要辨識的文字訊息或圖片路徑。
+            如果輸入是 PIL.Image.Image 物件，則直接使用該圖片。
+        user_prompt (str, optional):
+            使用者提供的自訂 prompt。如果為 None，則使用預設的 prompt。
+            Defaults to None.
+    Returns:
+        str: 辨識結果的文字描述。
+    """
+    my_api_key = getApiToken()  # 從環境變數中獲取API金鑰
+    genai.configure(api_key=my_api_key)
+    # 根據 user_prompt 決定要使用的 prompt
+    prompt_to_use = 給AI的提示詞+辨識目標物 if 辨識目標物 and 辨識目標物.strip() else 給AI的提示詞+ str(target_object.target_JSON)
+    # print("-"*50)
+    # print(prompt_to_use)
+    try:
+        # 判斷輸入的類型
+        if isinstance(input_content, str):
+            # 如果輸入是字串，嘗試判斷是否為圖片路徑
+            if input_content.lower().endswith(('.png', '.jpg', '.jpeg', '.gif','.webp')):
+                if input_content.lower().endswith(('.webp')):
+                    input_content = img_converter.convert_webp_to_jpg(input_content)  # 如果是 webp 圖片，先轉換為 jpg
+                model = genai.GenerativeModel(gemini_model)
+                image_obj = PIL.Image.open(input_content)
+                response = model.generate_content([prompt_to_use, image_obj])
+            else:
+                # 純文字輸入
+                model = genai.GenerativeModel(gemini_model)
+                response = model.generate_content(input_content) # 純文字直接使用輸入內容當 prompt
+        elif isinstance(input_content, PIL.Image.Image):
+            model = genai.GenerativeModel(gemini_model)
+            response = model.generate_content([prompt_to_use, input_content])
+        else:
+            return "錯誤：輸入必須是文字、圖片路徑（字串）或 PIL.Image 物件。"
+        return replace_colon_comma_with_newline(response.text)
+    except Exception as e:
+        return f"發生錯誤：{e}"
+if __name__ == '__main__':
+    # --- 程式碼使用範例 ---
+    # 範例 1：傳送純文字訊息
+    # print("正在處理純文字訊息...")
+    # text_message = "你好，請簡要說明一下Python是什麼？"
+    # response_text = analyze_content_with_gemini(text_message)
+    # print("回應結果：")
+    # print(response_text)
+    # print("-" * 20)
+    # 範例 2：傳送圖片路徑
+    # 請確保 image_path 指向有效的圖片檔案
+    print("正在處理圖片訊息...")
+    我要辨識的物體 = ""
+    #我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子", "企鵝","長頸鹿"]}'
+    #我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子"]}'
+    response_image = analyze_content_with_gemini(image_path, 我要辨識的物體)
+    print("回應結果：")
+    print(response_image)
+    print("-" * 20)