Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Script to combine multiple drone/person detection datasets into a single COCO format dataset. | |
| All person-related categories are merged into a single "person" category. | |
| """ | |
| import json | |
| import os | |
| import shutil | |
| from pathlib import Path | |
| from typing import Dict, List, Any, Tuple | |
| from collections import defaultdict | |
| import logging | |
| from tqdm import tqdm | |
| import argparse | |
| import cv2 | |
| import numpy as np | |
| import random | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class DatasetCombiner: | |
| def __init__(self, output_dir: str, dry_run: bool = False, images_per_folder: int = 10000): | |
| self.output_dir = Path(output_dir) | |
| self.images_dir = self.output_dir / "images" | |
| self.annotations_file = self.output_dir / "annotations.json" | |
| self.dry_run = dry_run | |
| self.images_per_folder = images_per_folder | |
| if self.dry_run: | |
| logger.info("π DRY RUN MODE - No files will be copied") | |
| # Create output directories | |
| self.output_dir.mkdir(parents=True, exist_ok=True) | |
| if not self.dry_run: | |
| self.images_dir.mkdir(exist_ok=True) | |
| # Initialize COCO format structure | |
| self.combined_data = { | |
| "info": { | |
| "description": "Combined Person Detection Dataset from Multiple Drone Datasets", | |
| "version": "1.0", | |
| "year": 2024 | |
| }, | |
| "licenses": [], | |
| "categories": [ | |
| {"id": 0, "name": "person", "supercategory": "person"}, | |
| {"id": 1, "name": "ignore", "supercategory": "ignore"} | |
| ], | |
| "images": [], | |
| "annotations": [] | |
| } | |
| # Tracking | |
| self.image_id_counter = 0 | |
| self.annotation_id_counter = 0 | |
| self.image_filename_mapping = {} # old_path -> new_filename | |
| self.stats = defaultdict(lambda: { | |
| "total_images": 0, | |
| "total_annotations": 0, | |
| "images_with_persons": 0, | |
| "crowd_annotations": 0, | |
| "ignore_annotations": 0, | |
| "modalities": set(), | |
| "splits": set(), | |
| "missing_images": 0 | |
| }) | |
| def is_person_category(self, category_name: str) -> bool: | |
| """Check if a category name refers to a person.""" | |
| person_keywords = [ | |
| 'person', 'people', 'pedestrian', 'human', 'crowd', | |
| 'rider', 'biker', 'skater', 'swimmer' | |
| ] | |
| name_lower = category_name.lower() | |
| return any(keyword in name_lower for keyword in person_keywords) | |
| def is_crowd_category(self, category_name: str) -> bool: | |
| """Check if a category should be marked as crowd.""" | |
| crowd_keywords = ['crowd', 'people', 'group'] | |
| return any(keyword in category_name.lower() for keyword in crowd_keywords) | |
| def get_new_image_path(self, dataset_name: str, original_filename: str, image_id: int) -> Tuple[str, Path]: | |
| """Generate a new unique filename with pagination folder structure. | |
| Returns: | |
| Tuple of (relative_path_for_json, full_destination_path) | |
| """ | |
| ext = Path(original_filename).suffix | |
| # Calculate folder number (0-based, but display as 1-based) | |
| folder_num = image_id // self.images_per_folder | |
| folder_name = f"{folder_num:07d}" # 0000000, 0000001, etc. | |
| # Create filename | |
| filename = f"{dataset_name}_{image_id:08d}{ext}" | |
| # Relative path for JSON (images/0000001/filename.jpg) | |
| relative_path = f"{folder_name}/{filename}" | |
| # Full destination path | |
| folder_path = self.images_dir / folder_name | |
| if not self.dry_run: | |
| folder_path.mkdir(parents=True, exist_ok=True) | |
| full_path = folder_path / filename | |
| return relative_path, full_path | |
| def copy_image(self, source_path: Path, dest_path: Path) -> bool: | |
| """Copy image to the combined dataset directory.""" | |
| if self.dry_run: | |
| # In dry run, just check if source exists | |
| if source_path.exists(): | |
| return True | |
| else: | |
| logger.warning(f"Source image not found: {source_path}") | |
| return False | |
| try: | |
| if source_path.exists(): | |
| shutil.copy2(source_path, dest_path) | |
| return True | |
| else: | |
| logger.warning(f"Source image not found: {source_path}") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error copying image {source_path}: {e}") | |
| return False | |
| def process_rgbt_drone_person(self): | |
| """Process RGBTDronePerson dataset.""" | |
| dataset_name = "rgbt_drone_person" | |
| base_path = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson") | |
| annotation_files = [ | |
| ("train_thermal.json", "train", "thermal"), | |
| ("val_thermal.json", "val", "thermal"), | |
| ("sub_train_thermal.json", "sub_train", "thermal"), | |
| ("sub_train_visible.json", "sub_train", "visible") | |
| ] | |
| for ann_file, split, modality in annotation_files: | |
| ann_path = base_path / ann_file | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| continue | |
| logger.info(f"Processing {dataset_name} - {split} - {modality}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (drone dataset - keep all images) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} {modality} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| # Determine image path based on split and modality | |
| img_filename = img['file_name'] | |
| # RGBTDronePerson has structure: RGBTDronePerson/{split}/{modality}/{filename} | |
| if split == "sub_train": | |
| # sub_train doesn't have its own folder, uses train folder | |
| source_path = base_path / "RGBTDronePerson" / "train" / modality / img_filename | |
| else: | |
| source_path = base_path / "RGBTDronePerson" / split / modality / img_filename | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) | |
| if not source_path.exists(): | |
| self.stats[dataset_name]["missing_images"] += 1 | |
| if not self.dry_run: | |
| continue | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": split, | |
| "modality": modality, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["modalities"].add(modality) | |
| self.stats[dataset_name]["splits"].add(split) | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '') | |
| is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1 | |
| # Crowd annotations go to ignore category | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": 1 if is_crowd else 0 | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_search_and_rescue(self): | |
| """Process Search and Rescue dataset.""" | |
| dataset_name = "search_and_rescue" | |
| base_path = Path("/mnt/archive/person_drone/search-and-rescue") | |
| splits = ["train", "valid", "test"] | |
| for split in splits: | |
| ann_path = base_path / f"{split}.json" | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| continue | |
| logger.info(f"Processing {dataset_name} - {split}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs (human -> person) | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (drone dataset - keep all images) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # Search and rescue has images in train/images, valid/images, test/images folders | |
| source_path = base_path / split / "images" / img_filename | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": split, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["splits"].add(split) | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": ann.get('iscrowd', 0) | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_stanford_drone(self): | |
| """Process Stanford Drone dataset.""" | |
| dataset_name = "stanford_drone" | |
| base_path = Path("/mnt/archive/person_drone/stanford_drone_coco") | |
| ann_path = base_path / "train.json" | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| return | |
| logger.info(f"Processing {dataset_name}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs (Pedestrian, Biker, Skater -> person) | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (drone dataset - keep all images) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # Stanford drone has images in train_images folder | |
| possible_paths = [ | |
| base_path / "train_images" / img_filename, | |
| base_path / img_filename, | |
| base_path / "images" / img_filename | |
| ] | |
| source_path = None | |
| for path in possible_paths: | |
| if path.exists(): | |
| source_path = path | |
| break | |
| if source_path is None: | |
| logger.warning(f"Image not found in any expected location: {img_filename}") | |
| continue | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": "train", | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["splits"].add("train") | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": ann.get('iscrowd', 0) | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_coco_format_dataset(self, dataset_name: str, base_path: Path, ann_filename: str = "annotations.json"): | |
| """Generic processor for COCO format datasets.""" | |
| ann_path = base_path / ann_filename | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| return | |
| logger.info(f"Processing {dataset_name}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (drone dataset - keep all images) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # Most COCO datasets have the path included in file_name | |
| # Try different possible paths | |
| possible_paths = [ | |
| base_path / img_filename, # Full path as specified in JSON | |
| base_path / "images" / img_filename, | |
| base_path / Path(img_filename).name # Just filename without path | |
| ] | |
| source_path = None | |
| for path in possible_paths: | |
| if path.exists(): | |
| source_path = path | |
| break | |
| if source_path is None: | |
| logger.warning(f"Image not found: {img_filename}") | |
| continue | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "original_filename": img_filename | |
| } | |
| # Add split info if available | |
| if 'split' in img: | |
| new_img['split'] = img['split'] | |
| self.stats[dataset_name]["splits"].add(img['split']) | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| # Check for crowd based on category name | |
| old_cat = next((c for c in data['categories'] if c['id'] == ann['category_id']), None) | |
| is_crowd = ann.get('iscrowd', 0) | |
| if old_cat and self.is_crowd_category(old_cat['name']): | |
| is_crowd = 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": is_crowd | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_visdrone(self): | |
| """Process VisDrone2019-DET dataset.""" | |
| dataset_name = "visdrone2019" | |
| base_path = Path("/mnt/archive/person_drone/VisDrone2019-DET") | |
| ann_path = base_path / "annotations.json" | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| return | |
| logger.info(f"Processing {dataset_name}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs (pedestrian, people -> person) | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (drone dataset - keep all images) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # VisDrone has images in train_images, val_images, test_images folders | |
| # The file_name already includes the folder (e.g., "train_images/xxx.jpg") | |
| possible_paths = [ | |
| base_path / img_filename, # This should work as file_name includes the folder | |
| base_path / "images" / img_filename | |
| ] | |
| source_path = None | |
| for path in possible_paths: | |
| if path.exists(): | |
| source_path = path | |
| break | |
| if source_path is None: | |
| logger.warning(f"Image not found: {img_filename}") | |
| continue | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| is_crowd = 0 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": is_crowd | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_seadronessee(self): | |
| """Process SeaDronesSee dataset.""" | |
| dataset_name = "seadronessee" | |
| base_path = Path("/mnt/archive/person_drone/seadronessee") | |
| ann_dir = base_path / "annotations" | |
| # Process train and val splits | |
| for split, ann_file in [("train", "instances_train.json"), ("val", "instances_val.json")]: | |
| ann_path = ann_dir / ann_file | |
| if not ann_path.exists(): | |
| logger.warning(f"SeaDronesSee annotations missing for {split}: {ann_path}") | |
| continue | |
| logger.info(f"Processing {dataset_name} - {split}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Create category mappings | |
| swimmer_cat_id = None | |
| ignore_cat_ids = set() # For boats, jetskis, and ignored | |
| for cat in data.get('categories', []): | |
| cat_name = cat.get('name', '').lower() | |
| if cat_name == 'swimmer': | |
| swimmer_cat_id = cat['id'] | |
| elif cat_name in ['boat', 'jetski', 'ignored']: | |
| ignore_cat_ids.add(cat['id']) | |
| if swimmer_cat_id is None: | |
| logger.warning(f"No 'swimmer' category found in {ann_path}") | |
| # Identify images with swimmers or ignore regions | |
| images_with_persons = set() | |
| images_with_ignore = set() | |
| for ann in data.get('annotations', []): | |
| cat_id = ann.get('category_id') | |
| img_id = ann['image_id'] | |
| if cat_id == swimmer_cat_id: | |
| images_with_persons.add(img_id) | |
| elif cat_id in ignore_cat_ids: | |
| images_with_ignore.add(img_id) | |
| # Process images | |
| image_id_mapping = {} | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img.get('file_name', '') | |
| source_path = base_path / "images" / split / img_filename | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) | |
| if not source_path.exists(): | |
| self.stats[dataset_name]["missing_images"] += 1 | |
| if not self.dry_run: | |
| continue | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": split, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["splits"].add(split) | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| cat_id = ann.get('category_id') | |
| img_id = ann['image_id'] | |
| if img_id not in image_id_mapping: | |
| continue | |
| # Determine target category | |
| if cat_id == swimmer_cat_id: | |
| # Swimmer -> person (0) or ignore (1) if crowd | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| target_cat = 1 if is_crowd else 0 | |
| elif cat_id in ignore_cat_ids: | |
| # Boats, jetskis, ignored -> ignore (1) | |
| target_cat = 1 | |
| is_crowd = 1 # Treat all ignore regions as crowd | |
| else: | |
| # Skip other categories (life_saving_appliances, buoy) | |
| continue | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[img_id], | |
| "category_id": target_cat, | |
| "bbox": ann.get('bbox', []), | |
| "area": ann.get('area', ann.get('bbox', [0, 0, 0, 0])[2] * ann.get('bbox', [0, 0, 0, 0])[3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": is_crowd if cat_id == swimmer_cat_id else 1 | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if target_cat == 1: | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| if new_ann['iscrowd']: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| def process_lisa_alert(self): | |
| """Process LISA Alert dataset - combines all splits (train/val/test) into one.""" | |
| dataset_name = "lisa_alert" | |
| base_path = Path("/mnt/archive/person_drone/lisa_alert") | |
| ann_dir = base_path / "annotations" | |
| logger.info(f"Processing {dataset_name} - combining all splits") | |
| # We'll process all splits but combine them into one dataset | |
| splits_to_process = ["train", "val", "test"] | |
| # Track which images we've already processed (to avoid duplicates) | |
| processed_images = set() | |
| for split in splits_to_process: | |
| ann_path = ann_dir / f"{split}.json" | |
| if not ann_path.exists(): | |
| logger.warning(f"LISA Alert annotation file not found: {ann_path}") | |
| continue | |
| logger.info(f"Processing {dataset_name} - {split} split") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Map category IDs (Pedestrian -> person) | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if self.is_person_category(cat['name']): | |
| category_mapping[cat['id']] = 0 | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process images | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): | |
| old_id = img['id'] | |
| img_filename = str(img['file_name']).strip() | |
| # LISA Alert filenames in JSON don't have .jpg extension | |
| if not img_filename.endswith('.jpg'): | |
| img_filename = img_filename + '.jpg' | |
| # Skip if we've already processed this image from another split | |
| if img_filename in processed_images: | |
| continue | |
| processed_images.add(img_filename) | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| # Images are in the images/ folder | |
| source_path = base_path / "images" / img_filename | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) | |
| if not source_path.exists(): | |
| self.stats[dataset_name]["missing_images"] += 1 | |
| if not self.dry_run: | |
| continue | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| # Check for crowd | |
| old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '') | |
| is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": 1 if is_crowd else 0 | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_wisard(self): | |
| """Process WISARD dataset with special handling for pseudolabeled annotations.""" | |
| dataset_name = "wisard" | |
| base_path = Path("/mnt/archive/person_drone/wisard_coco") | |
| ann_path = base_path / "annotations.json" | |
| if not ann_path.exists(): | |
| logger.warning(f"Annotation file not found: {ann_path}") | |
| return | |
| logger.info(f"Processing {dataset_name} - includes pseudolabeled annotations") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # WISARD has special category mapping: | |
| # Category 0: pseudolabeled persons | |
| # Category 1: verified/original persons | |
| # Both should map to our unified person category (0) | |
| # Track statistics for pseudolabeled vs verified | |
| pseudolabel_stats = { | |
| "pseudolabeled": 0, | |
| "verified": 0, | |
| "low_confidence": 0, # confidence < 0.3 | |
| "high_confidence": 0 # confidence >= 0.7 | |
| } | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations (both categories) | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in [0, 1]: # Both are person categories | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # WISARD images are organized in sequence folders | |
| # The file_name already includes the folder structure | |
| possible_paths = [ | |
| base_path / img_filename, # Full path as specified in JSON | |
| base_path / Path(img_filename).name # Just filename without path | |
| ] | |
| source_path = None | |
| for path in possible_paths: | |
| if path.exists(): | |
| source_path = path | |
| break | |
| if source_path is None: | |
| # Many images were removed, so we just skip them | |
| self.stats[dataset_name]["missing_images"] += 1 | |
| continue | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) | |
| if self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "original_filename": img_filename | |
| } | |
| # Add WISARD-specific metadata | |
| if 'sequence' in img: | |
| new_img['sequence'] = img['sequence'] | |
| if 'modality' in img: | |
| new_img['modality'] = img['modality'] | |
| self.stats[dataset_name]["modalities"].add(img['modality']) | |
| if 'location' in img: | |
| new_img['location'] = img['location'] | |
| if 'sensor' in img: | |
| new_img['sensor'] = img['sensor'] | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in [0, 1]: # Only process person categories | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| # All WISARD annotations map to person (0) in our unified dataset | |
| # We preserve the pseudolabel metadata | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": ann.get('iscrowd', 0) | |
| } | |
| # Preserve WISARD-specific metadata | |
| if 'is_pseudolabel' in ann: | |
| new_ann['is_pseudolabel'] = ann['is_pseudolabel'] | |
| if ann['is_pseudolabel']: | |
| pseudolabel_stats['pseudolabeled'] += 1 | |
| if 'verified' in ann: | |
| new_ann['verified'] = ann['verified'] | |
| if ann['verified']: | |
| pseudolabel_stats['verified'] += 1 | |
| if 'confidence' in ann: | |
| new_ann['confidence'] = ann['confidence'] | |
| if ann['confidence'] < 0.3: | |
| pseudolabel_stats['low_confidence'] += 1 | |
| elif ann['confidence'] >= 0.7: | |
| pseudolabel_stats['high_confidence'] += 1 | |
| if 'sequence' in ann: | |
| new_ann['sequence'] = ann['sequence'] | |
| if 'modality' in ann: | |
| new_ann['modality'] = ann['modality'] | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| # Add pseudolabel statistics to dataset stats | |
| self.stats[dataset_name]["pseudolabeled_annotations"] = pseudolabel_stats['pseudolabeled'] | |
| self.stats[dataset_name]["verified_annotations"] = pseudolabel_stats['verified'] | |
| self.stats[dataset_name]["low_confidence_annotations"] = pseudolabel_stats['low_confidence'] | |
| self.stats[dataset_name]["high_confidence_annotations"] = pseudolabel_stats['high_confidence'] | |
| logger.info(f"WISARD dataset processed: {pseudolabel_stats['pseudolabeled']} pseudolabeled, " | |
| f"{pseudolabel_stats['verified']} verified annotations") | |
| def process_crowd_human(self): | |
| """Process CrowdHuman dataset from pre-converted COCO format.""" | |
| dataset_name = "crowd_human" | |
| # Use the pre-converted COCO format annotations | |
| ann_path = Path("/home/svakhreev/projects/DEIM/data/crowd_human_coco/annotations_combined.json") | |
| if not ann_path.exists(): | |
| logger.warning(f"CrowdHuman annotations not found: {ann_path}") | |
| return | |
| logger.info(f"Processing {dataset_name}") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # CrowdHuman has person category with id=1 in the converted format | |
| # Map it to our unified person category (id=0) | |
| category_mapping = {} | |
| for cat in data.get('categories', []): | |
| if cat['name'].lower() == 'person': | |
| category_mapping[cat['id']] = 0 # Map to our person category | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] in category_mapping: | |
| images_with_persons.add(ann['image_id']) | |
| # Process ALL images (keep all images from CrowdHuman) | |
| for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # CrowdHuman file_name format: "CrowdHuman_train/Images/273271,1a0d6000b9e1f5b7.jpg" | |
| # or "CrowdHuman_val/Images/273278,c9db000d5146c15.jpg" | |
| # Construct the source path | |
| source_path = Path("/mnt/archive/person_drone/crowd_human") / img_filename | |
| # Extract just the filename for the new path | |
| actual_filename = Path(img_filename).name | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, actual_filename, new_id) | |
| if not source_path.exists(): | |
| self.stats[dataset_name]["missing_images"] += 1 | |
| if not self.dry_run: | |
| continue | |
| if self.copy_image(source_path, dest_path): | |
| # Determine split from the path | |
| split = "unknown" | |
| if "train" in img_filename.lower(): | |
| split = "train" | |
| elif "val" in img_filename.lower(): | |
| split = "val" | |
| elif "test" in img_filename.lower(): | |
| split = "test" | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": split, | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| if old_id in images_with_persons: | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["splits"].add(split) | |
| # Process annotations | |
| for ann in data.get('annotations', []): | |
| if ann['category_id'] not in category_mapping: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| # CrowdHuman uses iscrowd flag for heavily occluded persons | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": ann.get('iscrowd', 0) | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def process_objects365(self): | |
| """Process Objects365 dataset (only person category).""" | |
| dataset_name = "objects365" | |
| base_path = Path("/mnt/archive/datasets/OpenDataLab___Objects365") | |
| # Process train split | |
| ann_path = base_path / "raw/Objects365/data/train/zhiyuan_objv2_train.json" | |
| if ann_path.exists(): | |
| logger.info(f"Processing {dataset_name} - train split (this may take a while...)") | |
| with open(ann_path, 'r') as f: | |
| data = json.load(f) | |
| # Find person category ID | |
| person_cat_id = None | |
| for cat in data.get('categories', []): | |
| if cat['name'].lower() == 'person': | |
| person_cat_id = cat['id'] | |
| break | |
| if person_cat_id is None: | |
| logger.warning("Person category not found in Objects365") | |
| return | |
| # Process images and annotations | |
| image_id_mapping = {} | |
| images_with_persons = set() | |
| # First pass: identify images with person annotations | |
| logger.info("Identifying images with person annotations...") | |
| for ann in tqdm(data.get('annotations', []), desc="Scanning annotations"): | |
| if ann['category_id'] == person_cat_id: | |
| images_with_persons.add(ann['image_id']) | |
| logger.info(f"Found {len(images_with_persons)} images with persons") | |
| # Create image ID to image dict for faster lookup | |
| id_to_image = {img['id']: img for img in data.get('images', [])} | |
| # Process only images with persons | |
| processed = 0 | |
| for img_id in tqdm(images_with_persons, desc=f"Processing {dataset_name} images"): | |
| if img_id not in id_to_image: | |
| continue | |
| img = id_to_image[img_id] | |
| old_id = img['id'] | |
| new_id = self.image_id_counter | |
| self.image_id_counter += 1 | |
| image_id_mapping[old_id] = new_id | |
| img_filename = img['file_name'] | |
| # Objects365 image paths need adjustment | |
| # JSON has: "images/v1/patch8/objects365_v1_00420917.jpg" | |
| # Actual path: "train/patch8/objects365_v1_00420917.jpg" | |
| # Extract patch and filename from the path | |
| path_parts = Path(img_filename).parts | |
| if len(path_parts) >= 3: | |
| # Get patch directory and filename | |
| patch_dir = path_parts[-2] # e.g., "patch8" | |
| filename = path_parts[-1] # e.g., "objects365_v1_00420917.jpg" | |
| source_path = base_path / "raw/Objects365/data/train" / patch_dir / filename | |
| else: | |
| source_path = base_path / "raw/Objects365/data" / img_filename | |
| if not source_path.exists(): | |
| # In dry run, we still want to count the image even if file doesn't exist | |
| if not self.dry_run: | |
| continue | |
| else: | |
| self.stats[dataset_name]["missing_images"] = self.stats[dataset_name].get("missing_images", 0) + 1 | |
| relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) | |
| # In dry run or if file exists, add to dataset | |
| if self.dry_run or self.copy_image(source_path, dest_path): | |
| new_img = { | |
| "id": new_id, | |
| "file_name": relative_path, | |
| "width": img.get('width', 0), | |
| "height": img.get('height', 0), | |
| "dataset": dataset_name, | |
| "split": "train", | |
| "original_filename": img_filename | |
| } | |
| self.combined_data['images'].append(new_img) | |
| self.stats[dataset_name]["total_images"] += 1 | |
| self.stats[dataset_name]["images_with_persons"] += 1 | |
| self.stats[dataset_name]["splits"].add("train") | |
| processed += 1 | |
| # Process annotations | |
| for ann in tqdm(data.get('annotations', []), desc="Processing annotations"): | |
| if ann['category_id'] != person_cat_id: | |
| continue | |
| if ann['image_id'] not in image_id_mapping: | |
| continue | |
| is_crowd = ann.get('iscrowd', 0) == 1 | |
| new_ann = { | |
| "id": self.annotation_id_counter, | |
| "image_id": image_id_mapping[ann['image_id']], | |
| "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 | |
| "bbox": ann['bbox'], | |
| "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), | |
| "segmentation": ann.get('segmentation', []), | |
| "iscrowd": ann.get('iscrowd', 0) | |
| } | |
| self.combined_data['annotations'].append(new_ann) | |
| self.annotation_id_counter += 1 | |
| self.stats[dataset_name]["total_annotations"] += 1 | |
| if is_crowd: | |
| self.stats[dataset_name]["crowd_annotations"] += 1 | |
| self.stats[dataset_name]["ignore_annotations"] += 1 | |
| def visualize_samples(self, num_samples: int = 10, dataset_filter: str = None): | |
| """Visualize random samples with bounding boxes using cv2.imshow.""" | |
| if len(self.combined_data['images']) == 0: | |
| logger.warning("No images to visualize") | |
| return | |
| # Create image_id to annotations mapping | |
| img_to_anns = defaultdict(list) | |
| for ann in self.combined_data['annotations']: | |
| img_to_anns[ann['image_id']].append(ann) | |
| # Sample random images that have annotations | |
| images_with_anns = [img for img in self.combined_data['images'] | |
| if img['id'] in img_to_anns] | |
| # Filter by dataset if specified | |
| if dataset_filter: | |
| images_with_anns = [img for img in images_with_anns | |
| if img.get('dataset', '').lower() == dataset_filter.lower()] | |
| if not images_with_anns: | |
| logger.warning(f"No images with annotations from dataset '{dataset_filter}'") | |
| return | |
| if not images_with_anns: | |
| logger.warning("No images with annotations to visualize") | |
| return | |
| num_samples = min(num_samples, len(images_with_anns)) | |
| sampled_images = random.sample(images_with_anns, num_samples) | |
| logger.info(f"Visualizing {num_samples} sample images with bounding boxes...") | |
| logger.info("Press any key to see next image, 'q' to quit") | |
| for idx, img_info in enumerate(sampled_images): | |
| # Determine the actual image path based on dataset | |
| dataset_name = img_info.get('dataset', '') | |
| original_filename = img_info.get('original_filename', img_info['file_name']) | |
| # Find the source image path | |
| if self.dry_run or not (self.images_dir / img_info['file_name']).exists(): | |
| # In dry-run mode or if copied image doesn't exist, load from original location | |
| source_path = None | |
| if dataset_name == "rgbt_drone_person": | |
| base = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson/RGBTDronePerson") | |
| split = img_info.get('split', 'train') | |
| modality = img_info.get('modality', 'thermal') | |
| if split == "sub_train": | |
| split = "train" | |
| source_path = base / split / modality / original_filename | |
| elif dataset_name == "search_and_rescue": | |
| base = Path("/mnt/archive/person_drone/search-and-rescue") | |
| split = img_info.get('split', 'train') | |
| source_path = base / split / "images" / Path(original_filename).name | |
| elif dataset_name == "stanford_drone": | |
| base = Path("/mnt/archive/person_drone/stanford_drone_coco") | |
| source_path = base / "train_images" / original_filename | |
| elif dataset_name == "vtsar": | |
| base = Path("/mnt/archive/person_drone/vtsar_coco") | |
| source_path = base / original_filename | |
| elif dataset_name == "vtuav": | |
| base = Path("/mnt/archive/person_drone/vtuav_coco") | |
| source_path = base / original_filename | |
| elif dataset_name == "wisard": | |
| base = Path("/mnt/archive/person_drone/wisard_coco") | |
| source_path = base / original_filename | |
| elif dataset_name == "visdrone2019": | |
| base = Path("/mnt/archive/person_drone/VisDrone2019-DET") | |
| source_path = base / original_filename | |
| elif dataset_name == "seadronessee": | |
| base = Path("/mnt/archive/person_drone/seadronessee") | |
| split = img_info.get('split', 'train') | |
| source_path = base / "images" / split / Path(original_filename).name | |
| elif dataset_name == "lisa_alert": | |
| base = Path("/mnt/archive/person_drone/lisa_alert") | |
| source_path = base / "images" / Path(original_filename).name | |
| elif dataset_name == "crowd_human": | |
| base = Path("/mnt/archive/person_drone/crowd_human") | |
| # original_filename contains the full path like "CrowdHuman_train/Images/xxx.jpg" | |
| source_path = base / original_filename | |
| elif dataset_name == "objects365": | |
| base = Path("/mnt/archive/datasets/OpenDataLab___Objects365") | |
| path_parts = Path(original_filename).parts | |
| if len(path_parts) >= 3: | |
| patch_dir = path_parts[-2] | |
| filename = path_parts[-1] | |
| source_path = base / "raw/Objects365/data/train" / patch_dir / filename | |
| else: | |
| source_path = base / "raw/Objects365/data" / original_filename | |
| if source_path and source_path.exists(): | |
| img = cv2.imread(str(source_path)) | |
| else: | |
| # Create placeholder if image not found | |
| img = np.zeros((img_info.get('height', 480), | |
| img_info.get('width', 640), 3), dtype=np.uint8) | |
| img[:] = (50, 50, 50) | |
| cv2.putText(img, "Image not found", (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) | |
| else: | |
| # Load from copied location | |
| img_path = self.images_dir / img_info['file_name'] | |
| img = cv2.imread(str(img_path)) | |
| if img is None: | |
| # Create placeholder if loading failed | |
| img = np.zeros((img_info.get('height', 480), | |
| img_info.get('width', 640), 3), dtype=np.uint8) | |
| img[:] = (50, 50, 50) | |
| cv2.putText(img, "Failed to load image", (10, 30), | |
| cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) | |
| # Draw annotations | |
| annotations = img_to_anns[img_info['id']] | |
| for ann in annotations: | |
| # Get bbox | |
| x, y, w, h = ann['bbox'] | |
| x, y, w, h = int(x), int(y), int(w), int(h) | |
| # Choose color based on category | |
| if ann.get('category_id', 0) == 1: | |
| color = (0, 165, 255) # Orange for ignore regions | |
| label = "ignore" | |
| elif ann.get('iscrowd', 0): | |
| color = (0, 165, 255) # Orange for crowd (should be in ignore now) | |
| label = "crowd" | |
| else: | |
| color = (0, 255, 0) # Green for individual person | |
| label = "person" | |
| # Draw rectangle | |
| cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) | |
| # Add label | |
| label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) | |
| cv2.rectangle(img, (x, y - label_size[1] - 4), | |
| (x + label_size[0], y), color, -1) | |
| cv2.putText(img, label, (x, y - 2), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) | |
| # Add image info | |
| info_text = [ | |
| f"Dataset: {img_info.get('dataset', 'unknown')}", | |
| f"Image ID: {img_info['id']}", | |
| f"Annotations: {len(annotations)}", | |
| f"Size: {img_info.get('width', 0)}x{img_info.get('height', 0)}" | |
| ] | |
| if 'split' in img_info: | |
| info_text.append(f"Split: {img_info['split']}") | |
| if 'modality' in img_info: | |
| info_text.append(f"Modality: {img_info['modality']}") | |
| # Draw info background | |
| y_offset = 10 | |
| for text in info_text: | |
| text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1) | |
| cv2.rectangle(img, (5, y_offset), | |
| (10 + text_size[0], y_offset + text_size[1] + 5), | |
| (0, 0, 0), -1) | |
| cv2.putText(img, text, (10, y_offset + text_size[1]), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1) | |
| y_offset += text_size[1] + 10 | |
| # Display image | |
| window_name = f"Sample {idx+1}/{num_samples} - {img_info.get('dataset', 'unknown')} - ID: {img_info['id']}" | |
| cv2.namedWindow(window_name, cv2.WINDOW_KEEPRATIO) | |
| cv2.imshow(window_name, img) | |
| # Wait for key press | |
| key = cv2.waitKey(0) & 0xFF | |
| cv2.destroyWindow(window_name) | |
| if key == ord('q'): | |
| logger.info("Visualization stopped by user") | |
| break | |
| cv2.destroyAllWindows() | |
| logger.info("Visualization complete") | |
| def save_combined_dataset(self): | |
| """Save the combined dataset to disk.""" | |
| if self.dry_run: | |
| logger.info(f"DRY RUN: Would save combined dataset to {self.annotations_file}") | |
| logger.info(f"DRY RUN: Dataset would contain {len(self.combined_data['images'])} images and {len(self.combined_data['annotations'])} annotations") | |
| else: | |
| logger.info(f"Saving combined dataset to {self.annotations_file}") | |
| with open(self.annotations_file, 'w') as f: | |
| json.dump(self.combined_data, f) | |
| logger.info("Dataset saved successfully") | |
| def print_statistics(self): | |
| """Print detailed statistics about the combined dataset.""" | |
| print("\n" + "="*80) | |
| if self.dry_run: | |
| print("COMBINED DATASET STATISTICS (DRY RUN)") | |
| else: | |
| print("COMBINED DATASET STATISTICS") | |
| print("="*80) | |
| total_images = len(self.combined_data['images']) | |
| total_annotations = len(self.combined_data['annotations']) | |
| print(f"\nOVERALL:") | |
| print(f" Total Images: {total_images:,}") | |
| print(f" Total Annotations: {total_annotations:,}") | |
| print(f" Average Annotations per Image: {total_annotations/max(total_images, 1):.2f}") | |
| print("\n" + "-"*80) | |
| print("PER-DATASET BREAKDOWN:") | |
| print("-"*80) | |
| # Calculate percentages | |
| for dataset_name, stats in sorted(self.stats.items()): | |
| img_pct = 100 * stats['images_with_persons'] / max(total_images, 1) | |
| ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1) | |
| print(f"\n{dataset_name.upper()}:") | |
| print(f" Total images: {stats['total_images']:,}") | |
| print(f" Images with persons: {stats['images_with_persons']:,} ({img_pct:.1f}% of total)") | |
| print(f" Total annotations: {stats['total_annotations']:,} ({ann_pct:.1f}% of total)") | |
| if stats.get('missing_images', 0) > 0: | |
| print(f" β οΈ Missing images: {stats['missing_images']:,}") | |
| if stats['total_annotations'] > 0: | |
| print(f" Crowd annotations: {stats['crowd_annotations']:,} ({100*stats['crowd_annotations']/stats['total_annotations']:.1f}%)") | |
| print(f" Ignore annotations: {stats['ignore_annotations']:,} ({100*stats['ignore_annotations']/stats['total_annotations']:.1f}%)") | |
| # Show pseudolabel statistics if available (for WISARD) | |
| if 'pseudolabeled_annotations' in stats: | |
| print(f" Pseudolabeled: {stats['pseudolabeled_annotations']:,} ({100*stats['pseudolabeled_annotations']/stats['total_annotations']:.1f}%)") | |
| print(f" Verified: {stats['verified_annotations']:,} ({100*stats['verified_annotations']/stats['total_annotations']:.1f}%)") | |
| if stats.get('low_confidence_annotations', 0) > 0 or stats.get('high_confidence_annotations', 0) > 0: | |
| print(f" Low confidence (<0.3): {stats['low_confidence_annotations']:,}") | |
| print(f" High confidence (β₯0.7): {stats['high_confidence_annotations']:,}") | |
| if stats['modalities']: | |
| print(f" Modalities: {', '.join(sorted(stats['modalities']))}") | |
| if stats['splits']: | |
| print(f" Splits: {', '.join(sorted(stats['splits']))}") | |
| if stats['images_with_persons'] > 0: | |
| print(f" Avg annotations/image: {stats['total_annotations']/stats['images_with_persons']:.2f}") | |
| # Image statistics | |
| if self.combined_data['images']: | |
| widths = [img['width'] for img in self.combined_data['images'] if img['width'] > 0] | |
| heights = [img['height'] for img in self.combined_data['images'] if img['height'] > 0] | |
| if widths and heights: | |
| print("\n" + "-"*80) | |
| print("IMAGE DIMENSIONS:") | |
| print("-"*80) | |
| print(f" Width range: {min(widths)} - {max(widths)} pixels") | |
| print(f" Height range: {min(heights)} - {max(heights)} pixels") | |
| print(f" Average width: {sum(widths)/len(widths):.0f} pixels") | |
| print(f" Average height: {sum(heights)/len(heights):.0f} pixels") | |
| # Annotation statistics | |
| if self.combined_data['annotations']: | |
| areas = [ann['area'] for ann in self.combined_data['annotations'] if ann['area'] > 0] | |
| crowd_count = sum(1 for ann in self.combined_data['annotations'] if ann['iscrowd'] == 1) | |
| print("\n" + "-"*80) | |
| print("ANNOTATION STATISTICS:") | |
| print("-"*80) | |
| print(f" Total bounding boxes: {len(self.combined_data['annotations']):,}") | |
| print(f" Crowd annotations: {crowd_count:,} ({100*crowd_count/len(self.combined_data['annotations']):.1f}%)") | |
| if areas: | |
| print(f" Area range: {min(areas):.0f} - {max(areas):.0f} pixelsΒ²") | |
| print(f" Average area: {sum(areas)/len(areas):.0f} pixelsΒ²") | |
| # Dataset contribution summary table | |
| print("\n" + "-"*80) | |
| print("DATASET CONTRIBUTION SUMMARY:") | |
| print("-"*80) | |
| print(f"{'Dataset':<25} {'Total Images':>12} {'With Person':>12} {'%':>7} {'Annotations':>12} {'%':>7}") | |
| print("-"*100) | |
| # Only show datasets that actually have images | |
| datasets_with_images = [(name, stats) for name, stats in self.stats.items() if stats['total_images'] > 0] | |
| for dataset_name, stats in sorted(datasets_with_images, key=lambda x: x[1]['total_images'], reverse=True): | |
| img_pct = 100 * stats['total_images'] / max(total_images, 1) | |
| ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1) | |
| print(f"{dataset_name:<25} {stats['total_images']:>12,} {stats['images_with_persons']:>12,} {img_pct:>6.1f}% {stats['total_annotations']:>12,} {ann_pct:>6.1f}%") | |
| print("-"*100) | |
| # Calculate totals for images with persons | |
| total_with_persons = sum(stats['images_with_persons'] for stats in self.stats.values()) | |
| print(f"{'TOTAL':<25} {total_images:>12,} {total_with_persons:>12,} {'100.0%':>7} {total_annotations:>12,} {'100.0%':>7}") | |
| print("\n" + "="*80) | |
| def check_datasets(self): | |
| """Check which datasets are available.""" | |
| datasets = { | |
| "RGBTDronePerson": Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson"), | |
| "search-and-rescue": Path("/mnt/archive/person_drone/search-and-rescue"), | |
| "stanford_drone_coco": Path("/mnt/archive/person_drone/stanford_drone_coco"), | |
| "vtsar_coco": Path("/mnt/archive/person_drone/vtsar_coco"), | |
| "vtuav_coco": Path("/mnt/archive/person_drone/vtuav_coco"), | |
| "wisard_coco": Path("/mnt/archive/person_drone/wisard_coco"), | |
| "VisDrone2019-DET": Path("/mnt/archive/person_drone/VisDrone2019-DET"), | |
| "SeaDronesSee": Path("/mnt/archive/person_drone/seadronessee"), | |
| "LISA Alert": Path("/mnt/archive/person_drone/lisa_alert"), | |
| "CrowdHuman": Path("/mnt/archive/person_drone/crowd_human"), | |
| "Objects365": Path("/mnt/archive/datasets/OpenDataLab___Objects365") | |
| } | |
| print("\n" + "="*80) | |
| print("CHECKING DATASET AVAILABILITY") | |
| print("="*80) | |
| available = [] | |
| missing = [] | |
| for name, path in datasets.items(): | |
| if path.exists(): | |
| available.append(name) | |
| print(f"β {name}: Found at {path}") | |
| else: | |
| missing.append(name) | |
| print(f"β {name}: Not found at {path}") | |
| print(f"\nSummary: {len(available)} available, {len(missing)} missing") | |
| print("="*80 + "\n") | |
| return available, missing | |
| def run(self, visualize: bool = False, num_vis_samples: int = 10, vis_dataset: str = None): | |
| """Run the complete dataset combination pipeline.""" | |
| # Check dataset availability | |
| available, missing = self.check_datasets() | |
| if missing and not self.dry_run: | |
| response = input(f"\nβ οΈ {len(missing)} dataset(s) missing. Continue anyway? (y/n): ") | |
| if response.lower() != 'y': | |
| logger.info("Aborted by user") | |
| return | |
| logger.info("Starting dataset combination process...") | |
| # Process each dataset | |
| self.process_rgbt_drone_person() | |
| self.process_search_and_rescue() | |
| # bad annotations | |
| # self.process_stanford_drone() | |
| # Process COCO format datasets | |
| self.process_coco_format_dataset("vtsar", Path("/mnt/archive/person_drone/vtsar_coco")) | |
| # bad annotations | |
| # self.process_coco_format_dataset("vtuav", Path("/mnt/archive/person_drone/vtuav_coco")) | |
| # Process WISARD with special pseudolabel handling | |
| self.process_wisard() | |
| # Process VisDrone | |
| self.process_visdrone() | |
| # Process SeaDronesSee | |
| self.process_seadronessee() | |
| # Process LISA Alert | |
| self.process_lisa_alert() | |
| # Process CrowdHuman | |
| self.process_crowd_human() | |
| # # Process Objects365 (limited due to size) | |
| # self.process_objects365() | |
| # Save combined dataset | |
| self.save_combined_dataset() | |
| # Create visualizations if requested | |
| if visualize: | |
| self.visualize_samples(num_vis_samples, dataset_filter=vis_dataset) | |
| # Print statistics | |
| self.print_statistics() | |
| logger.info("Dataset combination complete!") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Combine multiple person detection datasets into a single COCO format dataset") | |
| parser.add_argument( | |
| "--output-dir", | |
| type=str, | |
| default="/mnt/archive/person_drone/combined_dataset", | |
| help="Output directory for the combined dataset" | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Run without copying images to check correctness" | |
| ) | |
| parser.add_argument( | |
| "--skip-objects365", | |
| action="store_true", | |
| help="Skip Objects365 dataset (it's very large)" | |
| ) | |
| parser.add_argument( | |
| "--visualize", | |
| action="store_true", | |
| help="Create visualization samples with bounding boxes" | |
| ) | |
| parser.add_argument( | |
| "--num-vis-samples", | |
| type=int, | |
| default=200, | |
| help="Number of samples to visualize with cv2.imshow (default: 20)" | |
| ) | |
| parser.add_argument( | |
| "--vis-dataset", | |
| type=str, | |
| default=None, | |
| help="Visualize samples only from specific daztaset (e.g., stanford_drone, visdrone2019)" | |
| ) | |
| parser.add_argument( | |
| "--images-per-folder", | |
| type=int, | |
| default=10000, | |
| help="Number of images per folder for pagination (default: 10000)" | |
| ) | |
| args = parser.parse_args() | |
| combiner = DatasetCombiner(args.output_dir, dry_run=args.dry_run, images_per_folder=args.images_per_folder) | |
| if args.skip_objects365: | |
| # Override the process_objects365 method to skip it | |
| combiner.process_objects365 = lambda: logger.info("Skipping Objects365 dataset") | |
| combiner.run(visualize=args.visualize, num_vis_samples=args.num_vis_samples, vis_dataset=args.vis_dataset) | |
| if __name__ == "__main__": | |
| main() | |