Spaces:

Alizmoh98
/

irandoc_ocr

Sleeping

App Files Files Community

irandoc_ocr / src /processing /parse_img.py

Alizmoh98

deploy-app

e8e33af 24 days ago

raw

history blame contribute delete

1.9 kB

	from PIL import Image
	from huggingface_hub import hf_hub_download
	from doclayout_yolo import YOLOv10
	from ..storage.schemas import BaseBox
	import tempfile
	from pathlib import Path

	filepath = hf_hub_download(
	repo_id="juliozhao/DocLayout-YOLO-DocStructBench",
	filename="doclayout_yolo_docstructbench_imgsz1024.pt"
	)
	model = YOLOv10(filepath)


	def parse_img(
	img: Image.Image,
	device: str = "cpu",
	box_directory: str = "src/boxes",
	):
	"""
	Processes an image, runs detection, crops boxes, saves their images,
	and returns a list of BaseBox objects with box metadata.
	"""
	# Create box directory if it doesn't exist
	Path(box_directory).mkdir(parents=True, exist_ok=True)

	# Create temp file with delete=False so it stays on disk
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
	img.save(temp_file.name, format="PNG")
	img_path = temp_file.name

	# Now model.predict can access the file
	det_res = model.predict(
	img_path,
	imgsz=1024,
	conf=0.2,
	device=device
	)

	boxes_data = det_res[0].boxes.data
	boxes_result = []
	crop_image_list = []
	for i, box_data in enumerate(boxes_data):
	box_data = box_data.tolist()
	crop = img.crop(tuple(box_data[:4]))
	box_path = str(Path(box_directory) / f"box_{i}.png")
	crop.save(box_path)
	crop_image_list.append(crop)

	box_info = BaseBox(
	class_name=int(box_data[-1]),
	x_min=float(box_data[0]),
	y_min=float(box_data[1]),
	x_max=float(box_data[2]),
	y_max=float(box_data[3]),
	confidence=float(box_data[-2]),
	saved_img_path=box_path
	)
	boxes_result.append(box_info)

	# Clean up temp file
	Path(img_path).unlink(missing_ok=True)

	return boxes_result, crop_image_list