Spaces:

andythebest
/

multi_model_detection

Sleeping

App Files Files Community

multi_model_detection / main.py

andythebest

Update main.py

1c5a36b verified 4 months ago

raw

history blame contribute delete

12.4 kB

	# -- coding: utf-8 --
	"""
	系統需求:
	- gradio: 用於建立 Web UI
	- opencv-python: 用於圖片處理
	- ultralytics: YOLOv8 官方函式庫
	- Pillow: 圖片處理基礎庫
	- transformers: (可選，若YOLO模型需要)
	"""

	import gradio as gr
	import os
	import cv2
	from ultralytics import YOLO
	import shutil
	import zipfile
	import uuid # 匯入 uuid 以生成唯一的執行 ID
	from pathlib import Path # 匯入 Path 以更方便地操作路徑
	import gemini_ai as genai
	from datetime import datetime
	import mongo_lib as mongo




	def create_zip_archive(files, zip_filename):
	"""
	將一系列檔案壓縮成一個 zip 檔案。

	Args:
	files (list): 要壓縮的檔案路徑列表。
	zip_filename (str): 產生的 zip 檔案路徑。

	Returns:
	str: 產生的 zip 檔案路徑。
	"""
	with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
	for file in files:
	if os.path.exists(file):
	# 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
	zipf.write(file, os.path.basename(file))
	else:
	print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
	return zip_filename

	def gradio_multi_model_detection(
	image_files,
	model_files,
	conf_threshold,
	enable_mllm,
	mllm_prompt,
	progress=gr.Progress(track_tqdm=True)
	):
	"""
	Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。

	Args:
	image_files (list): Gradio File 元件回傳的圖片檔案列表。
	model_files (list): Gradio File 元件回傳的模型檔案列表。
	conf_threshold (float): 置信度閾值。
	enable_mllm (bool): 是否啟用 MLLM 分析。
	mllm_prompt (str): 使用者自訂的 MLLM prompt。
	progress (gr.Progress): Gradio 的進度條元件。

	Yields:
	dict: 用於更新 Gradio 介面元件的字典。
	"""
	global_datetime = datetime.now()

	#寫主表log
	document = {"log_style":"master",
	"create_datetime": str(global_datetime),
	"image_files": image_files,
	"model_files": model_files,
	"conf_threshold":conf_threshold,
	"enable_mllm":enable_mllm,
	"mllm_prompt":mllm_prompt
	}

	mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗

	if not image_files:
	yield {
	output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
	output_gallery: None,
	output_text: None,
	download_button: None
	}
	return

	# --- 1. 初始化設定 ---
	# 為本次執行創建一個唯一的子目錄
	run_id = str(uuid.uuid4())
	base_output_dir = Path('gradio_detection_results')
	run_output_dir = base_output_dir / f"run_{run_id[:8]}"
	run_output_dir.mkdir(parents=True, exist_ok=True)

	image_paths = [file.name for file in image_files]
	model_paths = [file.name for file in model_files] if model_files else []

	# --- 2. 載入模型 ---
	yield {output_status: gr.update(value="正在載入模型...")}
	loaded_models = []
	if not model_paths:
	# 如果沒有上傳模型，使用預設模型
	default_model_path = 'yolov8n.pt'
	try:
	model = YOLO(default_model_path)
	loaded_models.append((default_model_path, model))
	except Exception as e:
	yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
	return
	else:
	for model_path in model_paths:
	try:
	model = YOLO(model_path)
	loaded_models.append((model_path, model))
	except Exception as e:
	print(f"警告: 無法載入模型 '{model_path}' - {e}，將跳過此模型。")
	continue

	if not loaded_models:
	yield {output_status: gr.update(value="錯誤: 沒有任何模型成功載入。")}
	return

	# --- 3. 逐一處理圖片 ---
	total_images = len(image_paths)
	annotated_image_paths = []
	all_result_files = []
	# results_map 儲存圖片路徑與其對應的文字檔路徑，用於後續點擊查詢
	results_map = {}
	# all_texts 用於收集所有圖片的辨識結果文字
	all_texts = []

	for i, image_path_str in enumerate(image_paths):
	image_path = Path(image_path_str)
	progress(i / total_images, desc=f"處理中: {image_path.name}")
	yield {
	output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
	output_gallery: gr.update(value=annotated_image_paths)
	}

	original_image = cv2.imread(str(image_path))
	if original_image is None:
	print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
	continue

	annotated_image = original_image.copy()
	image_base_name = image_path.stem

	# --- 3a. YOLO 物件偵測 ---
	yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
	all_detections_for_image = []

	for model_path_str, model_obj in loaded_models:
	model_name = Path(model_path_str).name
	yolo_output_content.append(f"--- 模型: {model_name} ---")
	results = model_obj(str(image_path), verbose=False, device="cpu")[0]

	if results.boxes:
	for box in results.boxes:
	conf = float(box.conf[0])
	if conf >= conf_threshold:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	cls_id = int(box.cls[0])
	cls_name = model_obj.names[cls_id]

	detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
	all_detections_for_image.append(detection_info)
	yolo_output_content.append(f" - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
	else:
	yolo_output_content.append(" 未偵測到任何物件。")

	# 繪製偵測框
	colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
	color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
	for det in all_detections_for_image:
	x1, y1, x2, y2 = det['bbox']
	color = color_map.get(det['model_name'], (200, 200, 200))
	label = f"{det['class_name']} {det['confidence']:.2f}"
	cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
	cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	# 儲存 YOLO 標註圖
	output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
	cv2.imwrite(str(output_image_path), annotated_image)
	annotated_image_paths.append(str(output_image_path))
	all_result_files.append(str(output_image_path))

	# 儲存 YOLO 辨識資訊
	output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
	output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
	all_result_files.append(str(output_yolo_txt_path))

	# --- 3b. MLLM 分析 (如果啟用) ---
	output_mllm_txt_path = None
	mllm_result_content = ""
	if enable_mllm:
	try:
	prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
	mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
	mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
	except Exception as e:
	mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"

	output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
	output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
	all_result_files.append(str(output_mllm_txt_path))

	#寫明細表log
	document = {"log_style":"detail",
	"create_datetime": str(global_datetime),
	"image_path": str(image_path),
	"yolo_result": yolo_output_content,
	"enable_mllm": enable_mllm,
	"mllm_prompt": mllm_prompt,
	"mllm_result": mllm_result_content}

	mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗

	# 將本次圖片的結果加入到總列表中
	all_texts.append("\n".join(yolo_output_content))
	if output_mllm_txt_path:
	all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))


	# --- 4. 完成處理，打包並更新最終結果 ---
	progress(1, desc="打包結果中...")
	zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
	created_zip_path = create_zip_archive(all_result_files, str(zip_filename))

	final_status = f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
	combined_text_output = "\n\n".join(all_texts)




	yield {
	output_status: gr.update(value=final_status),
	download_button: gr.update(value=created_zip_path, visible=True),
	output_text: gr.update(value=combined_text_output),
	output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
	}

	def toggle_mllm_prompt(is_enabled):
	"""
	根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
	"""
	return gr.update(visible=is_enabled)

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
	gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。 ver.250824.1")
	# mongo_uri = os.getenv('mongo_uri')
	# gr.Markdown(mongo_uri)

	with gr.Row():
	with gr.Column(scale=1):
	# 輸入元件
	image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
	#model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
	model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])

	with gr.Accordion("進階設定", open=False):
	conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
	mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
	mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如：請描述圖中人物的穿著與場景。", visible=False)

	run_button = gr.Button("開始辨識", variant="primary")

	with gr.Column(scale=2):
	# 輸出元件
	output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
	output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後，所有結果將顯示於此。")
	output_status = gr.Textbox(label="執行狀態", interactive=False)
	download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)

	# --- 事件綁定 ---

	# 點擊 "開始辨識" 按鈕
	run_button.click(
	fn=gradio_multi_model_detection,
	inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
	outputs=[output_gallery, output_status, download_button, output_text]
	)

	# 勾選/取消 "開啟MLLM辨識"
	mllm_enabled_checkbox.change(
	fn=toggle_mllm_prompt,
	inputs=mllm_enabled_checkbox,
	outputs=mllm_prompt_input
	)

	# 啟動 Gradio 應用
	if __name__ == "__main__":
	demo.launch(debug=True)
	#demo.launch(share=True)