irandoc_ocr / src /processing /parse_img.py
Alizmoh98's picture
deploy-app
e8e33af
from PIL import Image
from huggingface_hub import hf_hub_download
from doclayout_yolo import YOLOv10
from ..storage.schemas import BaseBox
import tempfile
from pathlib import Path
filepath = hf_hub_download(
repo_id="juliozhao/DocLayout-YOLO-DocStructBench",
filename="doclayout_yolo_docstructbench_imgsz1024.pt"
)
model = YOLOv10(filepath)
def parse_img(
img: Image.Image,
device: str = "cpu",
box_directory: str = "src/boxes",
):
"""
Processes an image, runs detection, crops boxes, saves their images,
and returns a list of BaseBox objects with box metadata.
"""
# Create box directory if it doesn't exist
Path(box_directory).mkdir(parents=True, exist_ok=True)
# Create temp file with delete=False so it stays on disk
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
img.save(temp_file.name, format="PNG")
img_path = temp_file.name
# Now model.predict can access the file
det_res = model.predict(
img_path,
imgsz=1024,
conf=0.2,
device=device
)
boxes_data = det_res[0].boxes.data
boxes_result = []
crop_image_list = []
for i, box_data in enumerate(boxes_data):
box_data = box_data.tolist()
crop = img.crop(tuple(box_data[:4]))
box_path = str(Path(box_directory) / f"box_{i}.png")
crop.save(box_path)
crop_image_list.append(crop)
box_info = BaseBox(
class_name=int(box_data[-1]),
x_min=float(box_data[0]),
y_min=float(box_data[1]),
x_max=float(box_data[2]),
y_max=float(box_data[3]),
confidence=float(box_data[-2]),
saved_img_path=box_path
)
boxes_result.append(box_info)
# Clean up temp file
Path(img_path).unlink(missing_ok=True)
return boxes_result, crop_image_list