File size: 4,757 Bytes
a65508e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from abc import ABC, abstractmethod
from pathlib import Path

import cv2
from PIL import Image
from ultralytics import YOLO


class BaseModel(ABC):
    @abstractmethod
    def __init__(self, *args, **kwargs):
        pass

    @abstractmethod
    def predict_image(self, image):
        pass

    @abstractmethod
    def predict_video(self, video):
        pass


class YOLOModel(BaseModel):
    def __init__(self, model_path=None):
        if model_path is None:
            repo_root = Path(__file__).resolve().parent
            weights_path = repo_root / "models" / "yolov8n.pt"
        else:
            weights_path = Path(model_path)
        self.model = YOLO(str(weights_path), task="detect")

    def predict_image(self, image, min_confidence, classes=None):
        results = self.model.predict(
            image, save=False, imgsz=800, conf=min_confidence, classes=classes
        )
        annotated_image_filename = "annotated_image.png"
        last_im = None
        for result in results:
            im_array = result.plot()
            last_im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
            last_im.save(annotated_image_filename)
        # Return PIL Image for robust display in Streamlit
        return last_im if last_im is not None else Image.open(annotated_image_filename)

    def predict_video(

        self, video, min_confidence, target_dir_name="annotated_video", classes=None

    ):
        self.model.predict(
            video,
            save=True,
            project=".",
            name=target_dir_name,
            exist_ok=True,
            imgsz=800,
            conf=min_confidence,
            classes=classes,
        )

    @staticmethod
    def draw_yolo_dets(frame_bgr, result, show_score=True):
        """Draw YOLO detection results on a frame.



        Args:

            frame_bgr: Input frame in BGR format

            result: YOLO detection result object

            show_score: Whether to show confidence scores



        Returns:

            Annotated frame with bounding boxes and labels



        """
        out = frame_bgr.copy()
        boxes = getattr(result, "boxes", None)
        if boxes is None:
            return out
        names = result.names
        cls_ids = boxes.cls.numpy().astype(int)
        confs = boxes.conf.numpy()
        xyxy = boxes.xyxy.numpy()
        for (x1_coord, y1_coord, x2_coord, y2_coord), cls, score in zip(
            xyxy, cls_ids, confs
        ):
            x1, y1, x2, y2 = map(int, (x1_coord, y1_coord, x2_coord, y2_coord))
            label = names.get(int(cls), str(int(cls)))
            if show_score:
                label = f"{label} {score:.2f}"
            cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            cv2.rectangle(out, (x1, y1 - th - 8), (x1 + tw + 6, y1), (0, 255, 0), -1)
            cv2.putText(
                out,
                label,
                (x1 + 3, max(0, y1 - 6)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 0, 0),
                2,
                cv2.LINE_AA,
            )
        return out

    def predict_and_visualize(

        self, frame, min_confidence, classes=None, show_score=True, imgsz=640

    ):
        """Predict objects in a frame and return the annotated frame.



        Args:

            frame: Input frame (BGR format)

            min_confidence: Minimum confidence threshold

            classes: List of class IDs to detect (None for all)

            show_score: Whether to show confidence scores

            imgsz: Image size for inference



        Returns:

            tuple: (results, annotated_frame)

                - results: YOLO detection results

                - annotated_frame: Frame with bounding boxes drawn



        """
        try:
            results = self.model.predict(
                frame,
                conf=min_confidence,
                iou=0.5,
                verbose=False,
                classes=classes,
                imgsz=imgsz,
            )

            if results and len(results) > 0:
                annotated_frame = YOLOModel.draw_yolo_dets(
                    frame, results[0], show_score=show_score
                )
                return results, annotated_frame
            else:
                # Return original frame if no detections
                return [], frame.copy()

        except Exception as e:
            print(f"Error in YOLO prediction: {e}")
            # Return original frame on error
            return [], frame.copy()