Spaces:
Sleeping
Sleeping
File size: 12,398 Bytes
5e4ea30 9f6e700 5e4ea30 abf83b1 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 abf83b1 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 abf83b1 5e4ea30 abf83b1 5e4ea30 abf83b1 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 1c5a36b 4fa6b76 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 9f6e700 5e4ea30 abf83b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
# -*- coding: utf-8 -*-
"""
系統需求:
- gradio: 用於建立 Web UI
- opencv-python: 用於圖片處理
- ultralytics: YOLOv8 官方函式庫
- Pillow: 圖片處理基礎庫
- transformers: (可選,若YOLO模型需要)
"""
import gradio as gr
import os
import cv2
from ultralytics import YOLO
import shutil
import zipfile
import uuid # 匯入 uuid 以生成唯一的執行 ID
from pathlib import Path # 匯入 Path 以更方便地操作路徑
import gemini_ai as genai
from datetime import datetime
import mongo_lib as mongo
def create_zip_archive(files, zip_filename):
"""
將一系列檔案壓縮成一個 zip 檔案。
Args:
files (list): 要壓縮的檔案路徑列表。
zip_filename (str): 產生的 zip 檔案路徑。
Returns:
str: 產生的 zip 檔案路徑。
"""
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in files:
if os.path.exists(file):
# 使用 os.path.basename 確保只寫入檔案名稱,而非完整路徑
zipf.write(file, os.path.basename(file))
else:
print(f"警告: 檔案 '{file}' 不存在,無法加入壓縮檔。")
return zip_filename
def gradio_multi_model_detection(
image_files,
model_files,
conf_threshold,
enable_mllm,
mllm_prompt,
progress=gr.Progress(track_tqdm=True)
):
"""
Gradio 的主要處理函式,使用生成器 (yield) 實現流式輸出。
Args:
image_files (list): Gradio File 元件回傳的圖片檔案列表。
model_files (list): Gradio File 元件回傳的模型檔案列表。
conf_threshold (float): 置信度閾值。
enable_mllm (bool): 是否啟用 MLLM 分析。
mllm_prompt (str): 使用者自訂的 MLLM prompt。
progress (gr.Progress): Gradio 的進度條元件。
Yields:
dict: 用於更新 Gradio 介面元件的字典。
"""
global_datetime = datetime.now()
#寫主表log
document = {"log_style":"master",
"create_datetime": str(global_datetime),
"image_files": image_files,
"model_files": model_files,
"conf_threshold":conf_threshold,
"enable_mllm":enable_mllm,
"mllm_prompt":mllm_prompt
}
mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
if not image_files:
yield {
output_status: gr.update(value="錯誤:請至少上傳一張圖片。"),
output_gallery: None,
output_text: None,
download_button: None
}
return
# --- 1. 初始化設定 ---
# 為本次執行創建一個唯一的子目錄
run_id = str(uuid.uuid4())
base_output_dir = Path('gradio_detection_results')
run_output_dir = base_output_dir / f"run_{run_id[:8]}"
run_output_dir.mkdir(parents=True, exist_ok=True)
image_paths = [file.name for file in image_files]
model_paths = [file.name for file in model_files] if model_files else []
# --- 2. 載入模型 ---
yield {output_status: gr.update(value="正在載入模型...")}
loaded_models = []
if not model_paths:
# 如果沒有上傳模型,使用預設模型
default_model_path = 'yolov8n.pt'
try:
model = YOLO(default_model_path)
loaded_models.append((default_model_path, model))
except Exception as e:
yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
return
else:
for model_path in model_paths:
try:
model = YOLO(model_path)
loaded_models.append((model_path, model))
except Exception as e:
print(f"警告: 無法載入模型 '{model_path}' - {e},將跳過此模型。")
continue
if not loaded_models:
yield {output_status: gr.update(value="錯誤: 沒有任何模型成功載入。")}
return
# --- 3. 逐一處理圖片 ---
total_images = len(image_paths)
annotated_image_paths = []
all_result_files = []
# results_map 儲存圖片路徑與其對應的文字檔路徑,用於後續點擊查詢
results_map = {}
# all_texts 用於收集所有圖片的辨識結果文字
all_texts = []
for i, image_path_str in enumerate(image_paths):
image_path = Path(image_path_str)
progress(i / total_images, desc=f"處理中: {image_path.name}")
yield {
output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
output_gallery: gr.update(value=annotated_image_paths)
}
original_image = cv2.imread(str(image_path))
if original_image is None:
print(f"警告: 無法讀取圖片 '{image_path}',跳過。")
continue
annotated_image = original_image.copy()
image_base_name = image_path.stem
# --- 3a. YOLO 物件偵測 ---
yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
all_detections_for_image = []
for model_path_str, model_obj in loaded_models:
model_name = Path(model_path_str).name
yolo_output_content.append(f"--- 模型: {model_name} ---")
results = model_obj(str(image_path), verbose=False, device="cpu")[0]
if results.boxes:
for box in results.boxes:
conf = float(box.conf[0])
if conf >= conf_threshold:
x1, y1, x2, y2 = map(int, box.xyxy[0])
cls_id = int(box.cls[0])
cls_name = model_obj.names[cls_id]
detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
all_detections_for_image.append(detection_info)
yolo_output_content.append(f" - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
else:
yolo_output_content.append(" 未偵測到任何物件。")
# 繪製偵測框
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
for det in all_detections_for_image:
x1, y1, x2, y2 = det['bbox']
color = color_map.get(det['model_name'], (200, 200, 200))
label = f"{det['class_name']} {det['confidence']:.2f}"
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# 儲存 YOLO 標註圖
output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
cv2.imwrite(str(output_image_path), annotated_image)
annotated_image_paths.append(str(output_image_path))
all_result_files.append(str(output_image_path))
# 儲存 YOLO 辨識資訊
output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
all_result_files.append(str(output_yolo_txt_path))
# --- 3b. MLLM 分析 (如果啟用) ---
output_mllm_txt_path = None
mllm_result_content = ""
if enable_mllm:
try:
prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
except Exception as e:
mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
all_result_files.append(str(output_mllm_txt_path))
#寫明細表log
document = {"log_style":"detail",
"create_datetime": str(global_datetime),
"image_path": str(image_path),
"yolo_result": yolo_output_content,
"enable_mllm": enable_mllm,
"mllm_prompt": mllm_prompt,
"mllm_result": mllm_result_content}
mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
# 將本次圖片的結果加入到總列表中
all_texts.append("\n".join(yolo_output_content))
if output_mllm_txt_path:
all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))
# --- 4. 完成處理,打包並更新最終結果 ---
progress(1, desc="打包結果中...")
zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
final_status = f"處理完成!共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
combined_text_output = "\n\n".join(all_texts)
yield {
output_status: gr.update(value=final_status),
download_button: gr.update(value=created_zip_path, visible=True),
output_text: gr.update(value=combined_text_output),
output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
}
def toggle_mllm_prompt(is_enabled):
"""
根據 Checkbox 狀態,顯示或隱藏 MLLM prompt 輸入框。
"""
return gr.update(visible=is_enabled)
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
gr.Markdown("上傳圖片與YOLO模型進行物件偵測,並可選用MLLM進行進階圖像理解。 ver.250824.1")
# mongo_uri = os.getenv('mongo_uri')
# gr.Markdown(mongo_uri)
with gr.Row():
with gr.Column(scale=1):
# 輸入元件
image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
#model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供,將使用預設的 yolov8n.pt 模型。")
model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
with gr.Accordion("進階設定", open=False):
conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如:請描述圖中人物的穿著與場景。", visible=False)
run_button = gr.Button("開始辨識", variant="primary")
with gr.Column(scale=2):
# 輸出元件
output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後,所有結果將顯示於此。")
output_status = gr.Textbox(label="執行狀態", interactive=False)
download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)
# --- 事件綁定 ---
# 點擊 "開始辨識" 按鈕
run_button.click(
fn=gradio_multi_model_detection,
inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
outputs=[output_gallery, output_status, download_button, output_text]
)
# 勾選/取消 "開啟MLLM辨識"
mllm_enabled_checkbox.change(
fn=toggle_mllm_prompt,
inputs=mllm_enabled_checkbox,
outputs=mllm_prompt_input
)
# 啟動 Gradio 應用
if __name__ == "__main__":
demo.launch(debug=True)
#demo.launch(share=True)
|