Spaces:

andythebest
/

multi_model_detection

Sleeping

File size: 12,398 Bytes

5e4ea30
 
 
 
 
 
 
 
 
9f6e700
 
 
 
 
5e4ea30
 
 
 
 
abf83b1
 
 
 
 
9f6e700
5e4ea30
9f6e700
5e4ea30
9f6e700
 
5e4ea30
 
9f6e700
 
5e4ea30
9f6e700
5e4ea30
 
 
 
 
 
 
 
9f6e700
5e4ea30
 
 
 
 
 
 
 
 
 
9f6e700
5e4ea30
 
 
 
 
 
 
 
 
 
 
abf83b1
 
 
 
 
 
 
 
 
 
 
 
 
 
5e4ea30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f6e700
5e4ea30
 
 
9f6e700
 
 
 
5e4ea30
 
9f6e700
5e4ea30
9f6e700
 
5e4ea30
9f6e700
5e4ea30
 
9f6e700
 
5e4ea30
 
9f6e700
5e4ea30
 
9f6e700
5e4ea30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f6e700
5e4ea30
9f6e700
5e4ea30
9f6e700
5e4ea30
9f6e700
5e4ea30
 
 
 
9f6e700
5e4ea30
 
 
9f6e700
5e4ea30
9f6e700
 
5e4ea30
9f6e700
 
5e4ea30
 
 
9f6e700
5e4ea30
9f6e700
5e4ea30
9f6e700
5e4ea30
 
 
9f6e700
 
5e4ea30
 
9f6e700
5e4ea30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abf83b1
5e4ea30
 
 
 
 
 
 
 
 
 
 
abf83b1
 
 
 
 
 
 
 
 
 
 
5e4ea30
 
 
 
 
 
 
 
 
 
 
 
 
 
abf83b1
 
 
5e4ea30
 
 
 
 
 
 
 
 
9f6e700
5e4ea30
9f6e700
5e4ea30
9f6e700
5e4ea30
 
 
1c5a36b
4fa6b76
 
9f6e700
 
5e4ea30
 
9f6e700
5e4ea30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f6e700
 
5e4ea30
 
 
 
 
 
 
 
 
9f6e700
 
5e4ea30
 
 
 
abf83b1

# -*- coding: utf-8 -*-
"""
系統需求:
- gradio: 用於建立 Web UI
- opencv-python: 用於圖片處理
- ultralytics: YOLOv8 官方函式庫
- Pillow: 圖片處理基礎庫
- transformers: (可選，若YOLO模型需要)
"""

import gradio as gr
import os
import cv2
from ultralytics import YOLO
import shutil
import zipfile
import uuid  # 匯入 uuid 以生成唯一的執行 ID
from pathlib import Path # 匯入 Path 以更方便地操作路徑
import gemini_ai as genai
from datetime import datetime
import mongo_lib as mongo




def create_zip_archive(files, zip_filename):
    """
    將一系列檔案壓縮成一個 zip 檔案。

    Args:
        files (list): 要壓縮的檔案路徑列表。
        zip_filename (str): 產生的 zip 檔案路徑。

    Returns:
        str: 產生的 zip 檔案路徑。
    """
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in files:
            if os.path.exists(file):
                # 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
                zipf.write(file, os.path.basename(file))
            else:
                print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
    return zip_filename

def gradio_multi_model_detection(
    image_files,
    model_files,
    conf_threshold,
    enable_mllm,
    mllm_prompt,
    progress=gr.Progress(track_tqdm=True)
):
    """
    Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。

    Args:
        image_files (list): Gradio File 元件回傳的圖片檔案列表。
        model_files (list): Gradio File 元件回傳的模型檔案列表。
        conf_threshold (float): 置信度閾值。
        enable_mllm (bool): 是否啟用 MLLM 分析。
        mllm_prompt (str): 使用者自訂的 MLLM prompt。
        progress (gr.Progress): Gradio 的進度條元件。

    Yields:
        dict: 用於更新 Gradio 介面元件的字典。
    """
    global_datetime = datetime.now()
    
    #寫主表log
    document = {"log_style":"master",
                "create_datetime": str(global_datetime), 
                "image_files": image_files, 
                "model_files": model_files,
                "conf_threshold":conf_threshold,
                "enable_mllm":enable_mllm,
                "mllm_prompt":mllm_prompt
                }
        
    mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗            
    
    if not image_files:
        yield {
            output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
            output_gallery: None,
            output_text: None,
            download_button: None
        }
        return

    # --- 1. 初始化設定 ---
    # 為本次執行創建一個唯一的子目錄
    run_id = str(uuid.uuid4())
    base_output_dir = Path('gradio_detection_results')
    run_output_dir = base_output_dir / f"run_{run_id[:8]}"
    run_output_dir.mkdir(parents=True, exist_ok=True)

    image_paths = [file.name for file in image_files]
    model_paths = [file.name for file in model_files] if model_files else []

    # --- 2. 載入模型 ---
    yield {output_status: gr.update(value="正在載入模型...")}
    loaded_models = []
    if not model_paths:
        # 如果沒有上傳模型，使用預設模型
        default_model_path = 'yolov8n.pt'
        try:
            model = YOLO(default_model_path)
            loaded_models.append((default_model_path, model))
        except Exception as e:
            yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
            return
    else:
        for model_path in model_paths:
            try:
                model = YOLO(model_path)
                loaded_models.append((model_path, model))
            except Exception as e:
                print(f"警告: 無法載入模型 '{model_path}' - {e}，將跳過此模型。")
                continue

    if not loaded_models:
        yield {output_status: gr.update(value="錯誤: 沒有任何模型成功載入。")}
        return

    # --- 3. 逐一處理圖片 ---
    total_images = len(image_paths)
    annotated_image_paths = []
    all_result_files = []
    # results_map 儲存圖片路徑與其對應的文字檔路徑，用於後續點擊查詢
    results_map = {}
    # all_texts 用於收集所有圖片的辨識結果文字
    all_texts = []

    for i, image_path_str in enumerate(image_paths):
        image_path = Path(image_path_str)
        progress(i / total_images, desc=f"處理中: {image_path.name}")
        yield {
            output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
            output_gallery: gr.update(value=annotated_image_paths)
        }

        original_image = cv2.imread(str(image_path))
        if original_image is None:
            print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
            continue
        
        annotated_image = original_image.copy()
        image_base_name = image_path.stem

        # --- 3a. YOLO 物件偵測 ---
        yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
        all_detections_for_image = []
        
        for model_path_str, model_obj in loaded_models:
            model_name = Path(model_path_str).name
            yolo_output_content.append(f"--- 模型: {model_name} ---")
            results = model_obj(str(image_path), verbose=False, device="cpu")[0]

            if results.boxes:
                for box in results.boxes:
                    conf = float(box.conf[0])
                    if conf >= conf_threshold:
                        x1, y1, x2, y2 = map(int, box.xyxy[0])
                        cls_id = int(box.cls[0])
                        cls_name = model_obj.names[cls_id]
                        
                        detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
                        all_detections_for_image.append(detection_info)
                        yolo_output_content.append(f"  - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
            else:
                yolo_output_content.append("  未偵測到任何物件。")

        # 繪製偵測框
        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
        color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
        for det in all_detections_for_image:
            x1, y1, x2, y2 = det['bbox']
            color = color_map.get(det['model_name'], (200, 200, 200))
            label = f"{det['class_name']} {det['confidence']:.2f}"
            cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # 儲存 YOLO 標註圖
        output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
        cv2.imwrite(str(output_image_path), annotated_image)
        annotated_image_paths.append(str(output_image_path))
        all_result_files.append(str(output_image_path))

        # 儲存 YOLO 辨識資訊
        output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
        output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
        all_result_files.append(str(output_yolo_txt_path))

        # --- 3b. MLLM 分析 (如果啟用) ---
        output_mllm_txt_path = None
        mllm_result_content = ""
        if enable_mllm:
            try:
                prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
                mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
                mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
            except Exception as e:
                mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
            
            output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
            output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
            all_result_files.append(str(output_mllm_txt_path))
        
        #寫明細表log
        document = {"log_style":"detail",
                    "create_datetime": str(global_datetime), 
                    "image_path": str(image_path), 
                    "yolo_result": yolo_output_content, 
                    "enable_mllm": enable_mllm,
                    "mllm_prompt": mllm_prompt,                    
                    "mllm_result": mllm_result_content}
        
        mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗

        # 將本次圖片的結果加入到總列表中
        all_texts.append("\n".join(yolo_output_content))
        if output_mllm_txt_path:
            all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))


    # --- 4. 完成處理，打包並更新最終結果 ---
    progress(1, desc="打包結果中...")
    zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
    created_zip_path = create_zip_archive(all_result_files, str(zip_filename))

    final_status = f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
    combined_text_output = "\n\n".join(all_texts)
    
    


    yield {
        output_status: gr.update(value=final_status),
        download_button: gr.update(value=created_zip_path, visible=True),
        output_text: gr.update(value=combined_text_output),
        output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
    }

def toggle_mllm_prompt(is_enabled):
    """
    根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
    """
    return gr.update(visible=is_enabled)

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
    gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。   ver.250824.1")
    # mongo_uri = os.getenv('mongo_uri')
    # gr.Markdown(mongo_uri)

    with gr.Row():
        with gr.Column(scale=1):
            # 輸入元件
            image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
            #model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
            model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
            
            with gr.Accordion("進階設定", open=False):
                conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
                mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
                mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如：請描述圖中人物的穿著與場景。", visible=False)

            run_button = gr.Button("開始辨識", variant="primary")

        with gr.Column(scale=2):
            # 輸出元件
            output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
            output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後，所有結果將顯示於此。")
            output_status = gr.Textbox(label="執行狀態", interactive=False)
            download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)

    # --- 事件綁定 ---
    
    # 點擊 "開始辨識" 按鈕
    run_button.click(
        fn=gradio_multi_model_detection,
        inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
        outputs=[output_gallery, output_status, download_button, output_text]
    )

    # 勾選/取消 "開啟MLLM辨識"
    mllm_enabled_checkbox.change(
        fn=toggle_mllm_prompt,
        inputs=mllm_enabled_checkbox,
        outputs=mllm_prompt_input
    )

# 啟動 Gradio 應用
if __name__ == "__main__":
    demo.launch(debug=True)
    #demo.launch(share=True)