#!/usr/bin/env python3 """ COCO Dataset Visualizer Copyright (c) 2024 The DEIM Authors. All Rights Reserved. A simple, dedicated visualizer for COCO format datasets using OpenCV. Displays images with bounding boxes and allows interactive browsing. """ import os import sys import json import argparse import cv2 import numpy as np from pathlib import Path from typing import Dict, List, Tuple, Optional, Any class COCOVisualizer: """Interactive COCO dataset visualizer using OpenCV.""" def __init__(self, images_dir: str, annotations_file: str, confidence_threshold: float = 0.0): """ Initialize the COCO visualizer. Args: images_dir: Path to directory containing images annotations_file: Path to COCO format JSON annotations file confidence_threshold: Minimum confidence threshold for predictions (default: 0.0) """ self.images_dir = Path(images_dir) self.annotations_file = Path(annotations_file) self.confidence_threshold = confidence_threshold self.current_index = 0 self.window_name = "COCO Dataset Visualizer" self.show_help = True # Color palette for different classes (BGR format for OpenCV) self.colors = [ (255, 0, 0), # Blue (0, 255, 0), # Green (0, 0, 255), # Red (255, 255, 0), # Cyan (255, 0, 255), # Magenta (0, 255, 255), # Yellow (128, 0, 128), # Purple (255, 165, 0), # Orange (0, 128, 255), # Light Blue (128, 255, 0), # Light Green (255, 192, 203), # Pink (220, 220, 220), # Light Gray (128, 128, 0), # Olive (0, 128, 128), # Teal (128, 0, 0), # Maroon (0, 0, 128), # Navy (255, 128, 128), # Light Red (128, 255, 128), # Light Green (128, 128, 255), # Light Blue (64, 64, 64), # Dark Gray ] # Load COCO data self.coco_data = self._load_coco_data() self.categories = self._build_category_map() self.images = self._build_image_list() self.annotations_by_image = self._build_annotations_map() print(f"Loaded COCO dataset:") print(f" Images: {len(self.images)}") print(f" Categories: {len(self.categories)}") print(f" Total annotations: {len(self.coco_data.get('annotations', []))}") def _load_coco_data(self) -> Dict[str, Any]: """Load COCO annotations from JSON file.""" if not self.annotations_file.exists(): raise FileNotFoundError(f"Annotations file not found: {self.annotations_file}") with open(self.annotations_file, 'r') as f: return json.load(f) def _build_category_map(self) -> Dict[int, Dict[str, Any]]: """Build mapping from category ID to category info.""" categories = {} for cat in self.coco_data.get('categories', []): categories[cat['id']] = { 'name': cat['name'], 'color': self.colors[cat['id'] % len(self.colors)] } return categories def _build_image_list(self) -> List[Dict[str, Any]]: """Build list of images with their metadata.""" images = [] for img in self.coco_data.get('images', []): img_path = self.images_dir / img['file_name'] if img_path.exists(): images.append(img) else: print(f"Warning: Image not found: {img_path}") return images def _build_annotations_map(self) -> Dict[int, List[Dict[str, Any]]]: """Build mapping from image ID to list of annotations.""" annotations_map = {} for ann in self.coco_data.get('annotations', []): image_id = ann['image_id'] if image_id not in annotations_map: annotations_map[image_id] = [] # Filter by confidence if it's a prediction file confidence = ann.get('score', 1.0) # Default to 1.0 for ground truth if confidence >= self.confidence_threshold: annotations_map[image_id].append(ann) return annotations_map def _load_image(self, image_info: Dict[str, Any]) -> Optional[np.ndarray]: """Load image from file.""" img_path = self.images_dir / image_info['file_name'] if not img_path.exists(): print(f"Image not found: {img_path}") return None image = cv2.imread(str(img_path)) if image is None: print(f"Failed to load image: {img_path}") return None return image def _draw_bounding_boxes(self, image: np.ndarray, annotations: List[Dict[str, Any]]) -> np.ndarray: """Draw bounding boxes and labels on image.""" if not annotations: return image image_copy = image.copy() for ann in annotations: # Get bounding box coordinates (COCO format: [x, y, width, height]) bbox = ann['bbox'] x, y, w, h = bbox x1, y1, x2, y2 = int(x), int(y), int(x + w), int(y + h) # Get category info category_id = ann['category_id'] category_info = self.categories.get(category_id, { 'name': f'class_{category_id}', 'color': self.colors[0] }) color = category_info['color'] label = category_info['name'] # Add confidence score if available (for predictions) confidence = ann.get('score') if confidence is not None: label = f"{label}: {confidence:.2f}" # Draw bounding box cv2.rectangle(image_copy, (x1, y1), (x2, y2), color, 2) # Draw label background and text font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.6 thickness = 1 (text_width, text_height), baseline = cv2.getTextSize( label, font, font_scale, thickness ) # Draw text background cv2.rectangle( image_copy, (x1, y1 - text_height - baseline - 5), (x1 + text_width + 5, y1), color, -1 ) # Draw text cv2.putText( image_copy, label, (x1 + 2, y1 - baseline - 2), font, font_scale, (255, 255, 255), thickness ) return image_copy def _draw_info_panel(self, image: np.ndarray, image_info: Dict[str, Any], annotations: List[Dict[str, Any]]) -> np.ndarray: """Draw information panel on the image.""" if not self.show_help: return image # Prepare info text info_lines = [ f"Image: {self.current_index + 1}/{len(self.images)}", f"File: {image_info['file_name']}", f"Size: {image_info['width']}x{image_info['height']}", f"Objects: {len(annotations)}", f"Conf >= {self.confidence_threshold:.2f}", "", "Controls:", "'n'/Right - Next image", "'p'/Left - Previous image", "'s' - Save current image", "'h' - Toggle this help", "'g' - Go to image index", "'+'/'-' - Adjust confidence", "'q'/ESC - Quit" ] # Calculate panel dimensions font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 0.5 thickness = 1 line_height = 20 max_width = 0 for line in info_lines: (text_width, _), _ = cv2.getTextSize(line, font, font_scale, thickness) max_width = max(max_width, text_width) panel_width = max_width + 20 panel_height = len(info_lines) * line_height + 20 # Create info panel h, w = image.shape[:2] panel_x = w - panel_width - 10 panel_y = 10 # Draw panel background cv2.rectangle( image, (panel_x, panel_y), (panel_x + panel_width, panel_y + panel_height), (0, 0, 0), -1 ) cv2.rectangle( image, (panel_x, panel_y), (panel_x + panel_width, panel_y + panel_height), (255, 255, 255), 1 ) # Draw text lines for i, line in enumerate(info_lines): if line == "": # Skip empty lines continue y_pos = panel_y + 15 + i * line_height color = (255, 255, 255) if not line.startswith("'") else (0, 255, 255) cv2.putText( image, line, (panel_x + 10, y_pos), font, font_scale, color, thickness ) return image def visualize_image(self, index: int) -> Optional[np.ndarray]: """Visualize a single image with its annotations.""" if index < 0 or index >= len(self.images): return None image_info = self.images[index] image = self._load_image(image_info) if image is None: return None # Get annotations for this image annotations = self.annotations_by_image.get(image_info['id'], []) # Draw bounding boxes image = self._draw_bounding_boxes(image, annotations) # Draw info panel image = self._draw_info_panel(image, image_info, annotations) return image def save_current_image(self) -> None: """Save the current annotated image.""" output_dir = Path("coco_visualizer_output") output_dir.mkdir(exist_ok=True) image = self.visualize_image(self.current_index) if image is not None: image_info = self.images[self.current_index] filename = f"annotated_{self.current_index:06d}_{Path(image_info['file_name']).stem}.jpg" output_path = output_dir / filename cv2.imwrite(str(output_path), image) print(f"Saved annotated image: {output_path}") def run(self): """Run the interactive visualizer.""" if not self.images: print("No images found to visualize!") return cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(self.window_name, 1200, 800) print("\n=== COCO Dataset Visualizer ===") print(f"Dataset: {self.annotations_file}") print(f"Images directory: {self.images_dir}") print(f"Total images: {len(self.images)}") print(f"Confidence threshold: {self.confidence_threshold}") print("\nPress 'h' to toggle help overlay") print("Press any key to start...\n") while True: # Visualize current image image = self.visualize_image(self.current_index) if image is None: print(f"Failed to load image at index {self.current_index}") self.current_index = (self.current_index + 1) % len(self.images) continue cv2.imshow(self.window_name, image) # Handle keyboard input key = cv2.waitKey(0) & 0xFF if key == ord('q') or key == 27: # 'q' or ESC break elif key == ord('n') or key == 83: # 'n' or Right arrow self.current_index = (self.current_index + 1) % len(self.images) elif key == ord('p') or key == 81: # 'p' or Left arrow self.current_index = (self.current_index - 1) % len(self.images) elif key == ord('s'): # 's' - Save self.save_current_image() elif key == ord('h'): # 'h' - Toggle help self.show_help = not self.show_help elif key == ord('g'): # 'g' - Go to index try: print(f"\nEnter image index (0-{len(self.images)-1}): ", end='', flush=True) # Note: This is a limitation of OpenCV - we can't easily get input # In a real application, you might want to use a GUI framework print("(Feature not available in OpenCV mode)") except: pass elif key == ord('+') or key == ord('='): # Increase confidence threshold self.confidence_threshold = min(1.0, self.confidence_threshold + 0.05) self.annotations_by_image = self._build_annotations_map() print(f"Confidence threshold: {self.confidence_threshold:.2f}") elif key == ord('-') or key == ord('_'): # Decrease confidence threshold self.confidence_threshold = max(0.0, self.confidence_threshold - 0.05) self.annotations_by_image = self._build_annotations_map() print(f"Confidence threshold: {self.confidence_threshold:.2f}") cv2.destroyAllWindows() print("Visualization ended.") def main(): """Main function.""" parser = argparse.ArgumentParser( description="COCO Dataset Visualizer - Interactive browsing of COCO format datasets", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Visualize ground truth annotations python coco_visualizer.py --images /path/to/images --annotations annotations.json # Visualize predictions with confidence filtering python coco_visualizer.py --images /path/to/images --annotations predictions.json --confidence 0.5 # COCO validation set example python coco_visualizer.py --images /data/coco/val2017 --annotations /data/coco/annotations/instances_val2017.json Controls: 'n' or Right Arrow - Next image 'p' or Left Arrow - Previous image 's' - Save current annotated image 'h' - Toggle help overlay '+'/'-' - Increase/decrease confidence threshold 'q' or ESC - Quit """ ) parser.add_argument('--images', '-i', required=True, help='Path to directory containing images') parser.add_argument('--annotations', '-a', required=True, help='Path to COCO format JSON annotations file') parser.add_argument('--confidence', '-c', type=float, default=0.0, help='Minimum confidence threshold for predictions (default: 0.0)') parser.add_argument('--start-index', type=int, default=0, help='Starting image index (default: 0)') args = parser.parse_args() # Validate inputs if not os.path.exists(args.images): print(f"Error: Images directory not found: {args.images}") return if not os.path.exists(args.annotations): print(f"Error: Annotations file not found: {args.annotations}") return try: # Create and run visualizer visualizer = COCOVisualizer(args.images, args.annotations, args.confidence) # Set starting index if 0 <= args.start_index < len(visualizer.images): visualizer.current_index = args.start_index else: print(f"Warning: Start index {args.start_index} out of range, using 0") visualizer.run() except KeyboardInterrupt: print("\nVisualization interrupted by user.") except Exception as e: print(f"Error: {e}") import traceback traceback.print_exc() if __name__ == '__main__': main()