#!/usr/bin/env python3 """ Script to combine multiple drone/person detection datasets into a single COCO format dataset. All person-related categories are merged into a single "person" category. """ import json import os import shutil from pathlib import Path from typing import Dict, List, Any, Tuple from collections import defaultdict import logging from tqdm import tqdm import argparse import cv2 import numpy as np import random # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class DatasetCombiner: def __init__(self, output_dir: str, dry_run: bool = False, images_per_folder: int = 10000): self.output_dir = Path(output_dir) self.images_dir = self.output_dir / "images" self.annotations_file = self.output_dir / "annotations.json" self.dry_run = dry_run self.images_per_folder = images_per_folder if self.dry_run: logger.info("🔍 DRY RUN MODE - No files will be copied") # Create output directories self.output_dir.mkdir(parents=True, exist_ok=True) if not self.dry_run: self.images_dir.mkdir(exist_ok=True) # Initialize COCO format structure self.combined_data = { "info": { "description": "Combined Person Detection Dataset from Multiple Drone Datasets", "version": "1.0", "year": 2024 }, "licenses": [], "categories": [ {"id": 0, "name": "person", "supercategory": "person"}, {"id": 1, "name": "ignore", "supercategory": "ignore"} ], "images": [], "annotations": [] } # Tracking self.image_id_counter = 0 self.annotation_id_counter = 0 self.image_filename_mapping = {} # old_path -> new_filename self.stats = defaultdict(lambda: { "total_images": 0, "total_annotations": 0, "images_with_persons": 0, "crowd_annotations": 0, "ignore_annotations": 0, "modalities": set(), "splits": set(), "missing_images": 0 }) def is_person_category(self, category_name: str) -> bool: """Check if a category name refers to a person.""" person_keywords = [ 'person', 'people', 'pedestrian', 'human', 'crowd', 'rider', 'biker', 'skater', 'swimmer' ] name_lower = category_name.lower() return any(keyword in name_lower for keyword in person_keywords) def is_crowd_category(self, category_name: str) -> bool: """Check if a category should be marked as crowd.""" crowd_keywords = ['crowd', 'people', 'group'] return any(keyword in category_name.lower() for keyword in crowd_keywords) def get_new_image_path(self, dataset_name: str, original_filename: str, image_id: int) -> Tuple[str, Path]: """Generate a new unique filename with pagination folder structure. Returns: Tuple of (relative_path_for_json, full_destination_path) """ ext = Path(original_filename).suffix # Calculate folder number (0-based, but display as 1-based) folder_num = image_id // self.images_per_folder folder_name = f"{folder_num:07d}" # 0000000, 0000001, etc. # Create filename filename = f"{dataset_name}_{image_id:08d}{ext}" # Relative path for JSON (images/0000001/filename.jpg) relative_path = f"{folder_name}/{filename}" # Full destination path folder_path = self.images_dir / folder_name if not self.dry_run: folder_path.mkdir(parents=True, exist_ok=True) full_path = folder_path / filename return relative_path, full_path def copy_image(self, source_path: Path, dest_path: Path) -> bool: """Copy image to the combined dataset directory.""" if self.dry_run: # In dry run, just check if source exists if source_path.exists(): return True else: logger.warning(f"Source image not found: {source_path}") return False try: if source_path.exists(): shutil.copy2(source_path, dest_path) return True else: logger.warning(f"Source image not found: {source_path}") return False except Exception as e: logger.error(f"Error copying image {source_path}: {e}") return False def process_rgbt_drone_person(self): """Process RGBTDronePerson dataset.""" dataset_name = "rgbt_drone_person" base_path = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson") annotation_files = [ ("train_thermal.json", "train", "thermal"), ("val_thermal.json", "val", "thermal"), ("sub_train_thermal.json", "sub_train", "thermal"), ("sub_train_visible.json", "sub_train", "visible") ] for ann_file, split, modality in annotation_files: ann_path = base_path / ann_file if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") continue logger.info(f"Processing {dataset_name} - {split} - {modality}") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (drone dataset - keep all images) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} {modality} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id # Determine image path based on split and modality img_filename = img['file_name'] # RGBTDronePerson has structure: RGBTDronePerson/{split}/{modality}/{filename} if split == "sub_train": # sub_train doesn't have its own folder, uses train folder source_path = base_path / "RGBTDronePerson" / "train" / modality / img_filename else: source_path = base_path / "RGBTDronePerson" / split / modality / img_filename relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) if not source_path.exists(): self.stats[dataset_name]["missing_images"] += 1 if not self.dry_run: continue if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": split, "modality": modality, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["modalities"].add(modality) self.stats[dataset_name]["splits"].add(split) # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '') is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1 # Crowd annotations go to ignore category new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": 1 if is_crowd else 0 } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_search_and_rescue(self): """Process Search and Rescue dataset.""" dataset_name = "search_and_rescue" base_path = Path("/mnt/archive/person_drone/search-and-rescue") splits = ["train", "valid", "test"] for split in splits: ann_path = base_path / f"{split}.json" if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") continue logger.info(f"Processing {dataset_name} - {split}") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs (human -> person) category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (drone dataset - keep all images) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # Search and rescue has images in train/images, valid/images, test/images folders source_path = base_path / split / "images" / img_filename relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": split, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["splits"].add(split) # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue is_crowd = ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": ann.get('iscrowd', 0) } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_stanford_drone(self): """Process Stanford Drone dataset.""" dataset_name = "stanford_drone" base_path = Path("/mnt/archive/person_drone/stanford_drone_coco") ann_path = base_path / "train.json" if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") return logger.info(f"Processing {dataset_name}") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs (Pedestrian, Biker, Skater -> person) category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (drone dataset - keep all images) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # Stanford drone has images in train_images folder possible_paths = [ base_path / "train_images" / img_filename, base_path / img_filename, base_path / "images" / img_filename ] source_path = None for path in possible_paths: if path.exists(): source_path = path break if source_path is None: logger.warning(f"Image not found in any expected location: {img_filename}") continue relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": "train", "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["splits"].add("train") # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue is_crowd = ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": ann.get('iscrowd', 0) } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_coco_format_dataset(self, dataset_name: str, base_path: Path, ann_filename: str = "annotations.json"): """Generic processor for COCO format datasets.""" ann_path = base_path / ann_filename if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") return logger.info(f"Processing {dataset_name}") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (drone dataset - keep all images) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # Most COCO datasets have the path included in file_name # Try different possible paths possible_paths = [ base_path / img_filename, # Full path as specified in JSON base_path / "images" / img_filename, base_path / Path(img_filename).name # Just filename without path ] source_path = None for path in possible_paths: if path.exists(): source_path = path break if source_path is None: logger.warning(f"Image not found: {img_filename}") continue relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "original_filename": img_filename } # Add split info if available if 'split' in img: new_img['split'] = img['split'] self.stats[dataset_name]["splits"].add(img['split']) self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue # Check for crowd based on category name old_cat = next((c for c in data['categories'] if c['id'] == ann['category_id']), None) is_crowd = ann.get('iscrowd', 0) if old_cat and self.is_crowd_category(old_cat['name']): is_crowd = 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": is_crowd } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_visdrone(self): """Process VisDrone2019-DET dataset.""" dataset_name = "visdrone2019" base_path = Path("/mnt/archive/person_drone/VisDrone2019-DET") ann_path = base_path / "annotations.json" if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") return logger.info(f"Processing {dataset_name}") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs (pedestrian, people -> person) category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (drone dataset - keep all images) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # VisDrone has images in train_images, val_images, test_images folders # The file_name already includes the folder (e.g., "train_images/xxx.jpg") possible_paths = [ base_path / img_filename, # This should work as file_name includes the folder base_path / "images" / img_filename ] source_path = None for path in possible_paths: if path.exists(): source_path = path break if source_path is None: logger.warning(f"Image not found: {img_filename}") continue relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue is_crowd = 0 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": is_crowd } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_seadronessee(self): """Process SeaDronesSee dataset.""" dataset_name = "seadronessee" base_path = Path("/mnt/archive/person_drone/seadronessee") ann_dir = base_path / "annotations" # Process train and val splits for split, ann_file in [("train", "instances_train.json"), ("val", "instances_val.json")]: ann_path = ann_dir / ann_file if not ann_path.exists(): logger.warning(f"SeaDronesSee annotations missing for {split}: {ann_path}") continue logger.info(f"Processing {dataset_name} - {split}") with open(ann_path, 'r') as f: data = json.load(f) # Create category mappings swimmer_cat_id = None ignore_cat_ids = set() # For boats, jetskis, and ignored for cat in data.get('categories', []): cat_name = cat.get('name', '').lower() if cat_name == 'swimmer': swimmer_cat_id = cat['id'] elif cat_name in ['boat', 'jetski', 'ignored']: ignore_cat_ids.add(cat['id']) if swimmer_cat_id is None: logger.warning(f"No 'swimmer' category found in {ann_path}") # Identify images with swimmers or ignore regions images_with_persons = set() images_with_ignore = set() for ann in data.get('annotations', []): cat_id = ann.get('category_id') img_id = ann['image_id'] if cat_id == swimmer_cat_id: images_with_persons.add(img_id) elif cat_id in ignore_cat_ids: images_with_ignore.add(img_id) # Process images image_id_mapping = {} for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img.get('file_name', '') source_path = base_path / "images" / split / img_filename relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) if not source_path.exists(): self.stats[dataset_name]["missing_images"] += 1 if not self.dry_run: continue if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": split, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["splits"].add(split) # Process annotations for ann in data.get('annotations', []): cat_id = ann.get('category_id') img_id = ann['image_id'] if img_id not in image_id_mapping: continue # Determine target category if cat_id == swimmer_cat_id: # Swimmer -> person (0) or ignore (1) if crowd is_crowd = ann.get('iscrowd', 0) == 1 target_cat = 1 if is_crowd else 0 elif cat_id in ignore_cat_ids: # Boats, jetskis, ignored -> ignore (1) target_cat = 1 is_crowd = 1 # Treat all ignore regions as crowd else: # Skip other categories (life_saving_appliances, buoy) continue new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[img_id], "category_id": target_cat, "bbox": ann.get('bbox', []), "area": ann.get('area', ann.get('bbox', [0, 0, 0, 0])[2] * ann.get('bbox', [0, 0, 0, 0])[3]), "segmentation": ann.get('segmentation', []), "iscrowd": is_crowd if cat_id == swimmer_cat_id else 1 } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if target_cat == 1: self.stats[dataset_name]["ignore_annotations"] += 1 if new_ann['iscrowd']: self.stats[dataset_name]["crowd_annotations"] += 1 def process_lisa_alert(self): """Process LISA Alert dataset - combines all splits (train/val/test) into one.""" dataset_name = "lisa_alert" base_path = Path("/mnt/archive/person_drone/lisa_alert") ann_dir = base_path / "annotations" logger.info(f"Processing {dataset_name} - combining all splits") # We'll process all splits but combine them into one dataset splits_to_process = ["train", "val", "test"] # Track which images we've already processed (to avoid duplicates) processed_images = set() for split in splits_to_process: ann_path = ann_dir / f"{split}.json" if not ann_path.exists(): logger.warning(f"LISA Alert annotation file not found: {ann_path}") continue logger.info(f"Processing {dataset_name} - {split} split") with open(ann_path, 'r') as f: data = json.load(f) # Map category IDs (Pedestrian -> person) category_mapping = {} for cat in data.get('categories', []): if self.is_person_category(cat['name']): category_mapping[cat['id']] = 0 # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process images for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"): old_id = img['id'] img_filename = str(img['file_name']).strip() # LISA Alert filenames in JSON don't have .jpg extension if not img_filename.endswith('.jpg'): img_filename = img_filename + '.jpg' # Skip if we've already processed this image from another split if img_filename in processed_images: continue processed_images.add(img_filename) new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id # Images are in the images/ folder source_path = base_path / "images" / img_filename relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id) if not source_path.exists(): self.stats[dataset_name]["missing_images"] += 1 if not self.dry_run: continue if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue # Check for crowd old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '') is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": 1 if is_crowd else 0 } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_wisard(self): """Process WISARD dataset with special handling for pseudolabeled annotations.""" dataset_name = "wisard" base_path = Path("/mnt/archive/person_drone/wisard_coco") ann_path = base_path / "annotations.json" if not ann_path.exists(): logger.warning(f"Annotation file not found: {ann_path}") return logger.info(f"Processing {dataset_name} - includes pseudolabeled annotations") with open(ann_path, 'r') as f: data = json.load(f) # WISARD has special category mapping: # Category 0: pseudolabeled persons # Category 1: verified/original persons # Both should map to our unified person category (0) # Track statistics for pseudolabeled vs verified pseudolabel_stats = { "pseudolabeled": 0, "verified": 0, "low_confidence": 0, # confidence < 0.3 "high_confidence": 0 # confidence >= 0.7 } # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations (both categories) for ann in data.get('annotations', []): if ann['category_id'] in [0, 1]: # Both are person categories images_with_persons.add(ann['image_id']) # Process ALL images for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # WISARD images are organized in sequence folders # The file_name already includes the folder structure possible_paths = [ base_path / img_filename, # Full path as specified in JSON base_path / Path(img_filename).name # Just filename without path ] source_path = None for path in possible_paths: if path.exists(): source_path = path break if source_path is None: # Many images were removed, so we just skip them self.stats[dataset_name]["missing_images"] += 1 continue relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) if self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "original_filename": img_filename } # Add WISARD-specific metadata if 'sequence' in img: new_img['sequence'] = img['sequence'] if 'modality' in img: new_img['modality'] = img['modality'] self.stats[dataset_name]["modalities"].add(img['modality']) if 'location' in img: new_img['location'] = img['location'] if 'sensor' in img: new_img['sensor'] = img['sensor'] self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in [0, 1]: # Only process person categories continue if ann['image_id'] not in image_id_mapping: continue # All WISARD annotations map to person (0) in our unified dataset # We preserve the pseudolabel metadata is_crowd = ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": ann.get('iscrowd', 0) } # Preserve WISARD-specific metadata if 'is_pseudolabel' in ann: new_ann['is_pseudolabel'] = ann['is_pseudolabel'] if ann['is_pseudolabel']: pseudolabel_stats['pseudolabeled'] += 1 if 'verified' in ann: new_ann['verified'] = ann['verified'] if ann['verified']: pseudolabel_stats['verified'] += 1 if 'confidence' in ann: new_ann['confidence'] = ann['confidence'] if ann['confidence'] < 0.3: pseudolabel_stats['low_confidence'] += 1 elif ann['confidence'] >= 0.7: pseudolabel_stats['high_confidence'] += 1 if 'sequence' in ann: new_ann['sequence'] = ann['sequence'] if 'modality' in ann: new_ann['modality'] = ann['modality'] self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 # Add pseudolabel statistics to dataset stats self.stats[dataset_name]["pseudolabeled_annotations"] = pseudolabel_stats['pseudolabeled'] self.stats[dataset_name]["verified_annotations"] = pseudolabel_stats['verified'] self.stats[dataset_name]["low_confidence_annotations"] = pseudolabel_stats['low_confidence'] self.stats[dataset_name]["high_confidence_annotations"] = pseudolabel_stats['high_confidence'] logger.info(f"WISARD dataset processed: {pseudolabel_stats['pseudolabeled']} pseudolabeled, " f"{pseudolabel_stats['verified']} verified annotations") def process_crowd_human(self): """Process CrowdHuman dataset from pre-converted COCO format.""" dataset_name = "crowd_human" # Use the pre-converted COCO format annotations ann_path = Path("/home/svakhreev/projects/DEIM/data/crowd_human_coco/annotations_combined.json") if not ann_path.exists(): logger.warning(f"CrowdHuman annotations not found: {ann_path}") return logger.info(f"Processing {dataset_name}") with open(ann_path, 'r') as f: data = json.load(f) # CrowdHuman has person category with id=1 in the converted format # Map it to our unified person category (id=0) category_mapping = {} for cat in data.get('categories', []): if cat['name'].lower() == 'person': category_mapping[cat['id']] = 0 # Map to our person category # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations for ann in data.get('annotations', []): if ann['category_id'] in category_mapping: images_with_persons.add(ann['image_id']) # Process ALL images (keep all images from CrowdHuman) for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"): old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # CrowdHuman file_name format: "CrowdHuman_train/Images/273271,1a0d6000b9e1f5b7.jpg" # or "CrowdHuman_val/Images/273278,c9db000d5146c15.jpg" # Construct the source path source_path = Path("/mnt/archive/person_drone/crowd_human") / img_filename # Extract just the filename for the new path actual_filename = Path(img_filename).name relative_path, dest_path = self.get_new_image_path(dataset_name, actual_filename, new_id) if not source_path.exists(): self.stats[dataset_name]["missing_images"] += 1 if not self.dry_run: continue if self.copy_image(source_path, dest_path): # Determine split from the path split = "unknown" if "train" in img_filename.lower(): split = "train" elif "val" in img_filename.lower(): split = "val" elif "test" in img_filename.lower(): split = "test" new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": split, "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 if old_id in images_with_persons: self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["splits"].add(split) # Process annotations for ann in data.get('annotations', []): if ann['category_id'] not in category_mapping: continue if ann['image_id'] not in image_id_mapping: continue # CrowdHuman uses iscrowd flag for heavily occluded persons is_crowd = ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": ann.get('iscrowd', 0) } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def process_objects365(self): """Process Objects365 dataset (only person category).""" dataset_name = "objects365" base_path = Path("/mnt/archive/datasets/OpenDataLab___Objects365") # Process train split ann_path = base_path / "raw/Objects365/data/train/zhiyuan_objv2_train.json" if ann_path.exists(): logger.info(f"Processing {dataset_name} - train split (this may take a while...)") with open(ann_path, 'r') as f: data = json.load(f) # Find person category ID person_cat_id = None for cat in data.get('categories', []): if cat['name'].lower() == 'person': person_cat_id = cat['id'] break if person_cat_id is None: logger.warning("Person category not found in Objects365") return # Process images and annotations image_id_mapping = {} images_with_persons = set() # First pass: identify images with person annotations logger.info("Identifying images with person annotations...") for ann in tqdm(data.get('annotations', []), desc="Scanning annotations"): if ann['category_id'] == person_cat_id: images_with_persons.add(ann['image_id']) logger.info(f"Found {len(images_with_persons)} images with persons") # Create image ID to image dict for faster lookup id_to_image = {img['id']: img for img in data.get('images', [])} # Process only images with persons processed = 0 for img_id in tqdm(images_with_persons, desc=f"Processing {dataset_name} images"): if img_id not in id_to_image: continue img = id_to_image[img_id] old_id = img['id'] new_id = self.image_id_counter self.image_id_counter += 1 image_id_mapping[old_id] = new_id img_filename = img['file_name'] # Objects365 image paths need adjustment # JSON has: "images/v1/patch8/objects365_v1_00420917.jpg" # Actual path: "train/patch8/objects365_v1_00420917.jpg" # Extract patch and filename from the path path_parts = Path(img_filename).parts if len(path_parts) >= 3: # Get patch directory and filename patch_dir = path_parts[-2] # e.g., "patch8" filename = path_parts[-1] # e.g., "objects365_v1_00420917.jpg" source_path = base_path / "raw/Objects365/data/train" / patch_dir / filename else: source_path = base_path / "raw/Objects365/data" / img_filename if not source_path.exists(): # In dry run, we still want to count the image even if file doesn't exist if not self.dry_run: continue else: self.stats[dataset_name]["missing_images"] = self.stats[dataset_name].get("missing_images", 0) + 1 relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id) # In dry run or if file exists, add to dataset if self.dry_run or self.copy_image(source_path, dest_path): new_img = { "id": new_id, "file_name": relative_path, "width": img.get('width', 0), "height": img.get('height', 0), "dataset": dataset_name, "split": "train", "original_filename": img_filename } self.combined_data['images'].append(new_img) self.stats[dataset_name]["total_images"] += 1 self.stats[dataset_name]["images_with_persons"] += 1 self.stats[dataset_name]["splits"].add("train") processed += 1 # Process annotations for ann in tqdm(data.get('annotations', []), desc="Processing annotations"): if ann['category_id'] != person_cat_id: continue if ann['image_id'] not in image_id_mapping: continue is_crowd = ann.get('iscrowd', 0) == 1 new_ann = { "id": self.annotation_id_counter, "image_id": image_id_mapping[ann['image_id']], "category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0 "bbox": ann['bbox'], "area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]), "segmentation": ann.get('segmentation', []), "iscrowd": ann.get('iscrowd', 0) } self.combined_data['annotations'].append(new_ann) self.annotation_id_counter += 1 self.stats[dataset_name]["total_annotations"] += 1 if is_crowd: self.stats[dataset_name]["crowd_annotations"] += 1 self.stats[dataset_name]["ignore_annotations"] += 1 def visualize_samples(self, num_samples: int = 10, dataset_filter: str = None): """Visualize random samples with bounding boxes using cv2.imshow.""" if len(self.combined_data['images']) == 0: logger.warning("No images to visualize") return # Create image_id to annotations mapping img_to_anns = defaultdict(list) for ann in self.combined_data['annotations']: img_to_anns[ann['image_id']].append(ann) # Sample random images that have annotations images_with_anns = [img for img in self.combined_data['images'] if img['id'] in img_to_anns] # Filter by dataset if specified if dataset_filter: images_with_anns = [img for img in images_with_anns if img.get('dataset', '').lower() == dataset_filter.lower()] if not images_with_anns: logger.warning(f"No images with annotations from dataset '{dataset_filter}'") return if not images_with_anns: logger.warning("No images with annotations to visualize") return num_samples = min(num_samples, len(images_with_anns)) sampled_images = random.sample(images_with_anns, num_samples) logger.info(f"Visualizing {num_samples} sample images with bounding boxes...") logger.info("Press any key to see next image, 'q' to quit") for idx, img_info in enumerate(sampled_images): # Determine the actual image path based on dataset dataset_name = img_info.get('dataset', '') original_filename = img_info.get('original_filename', img_info['file_name']) # Find the source image path if self.dry_run or not (self.images_dir / img_info['file_name']).exists(): # In dry-run mode or if copied image doesn't exist, load from original location source_path = None if dataset_name == "rgbt_drone_person": base = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson/RGBTDronePerson") split = img_info.get('split', 'train') modality = img_info.get('modality', 'thermal') if split == "sub_train": split = "train" source_path = base / split / modality / original_filename elif dataset_name == "search_and_rescue": base = Path("/mnt/archive/person_drone/search-and-rescue") split = img_info.get('split', 'train') source_path = base / split / "images" / Path(original_filename).name elif dataset_name == "stanford_drone": base = Path("/mnt/archive/person_drone/stanford_drone_coco") source_path = base / "train_images" / original_filename elif dataset_name == "vtsar": base = Path("/mnt/archive/person_drone/vtsar_coco") source_path = base / original_filename elif dataset_name == "vtuav": base = Path("/mnt/archive/person_drone/vtuav_coco") source_path = base / original_filename elif dataset_name == "wisard": base = Path("/mnt/archive/person_drone/wisard_coco") source_path = base / original_filename elif dataset_name == "visdrone2019": base = Path("/mnt/archive/person_drone/VisDrone2019-DET") source_path = base / original_filename elif dataset_name == "seadronessee": base = Path("/mnt/archive/person_drone/seadronessee") split = img_info.get('split', 'train') source_path = base / "images" / split / Path(original_filename).name elif dataset_name == "lisa_alert": base = Path("/mnt/archive/person_drone/lisa_alert") source_path = base / "images" / Path(original_filename).name elif dataset_name == "crowd_human": base = Path("/mnt/archive/person_drone/crowd_human") # original_filename contains the full path like "CrowdHuman_train/Images/xxx.jpg" source_path = base / original_filename elif dataset_name == "objects365": base = Path("/mnt/archive/datasets/OpenDataLab___Objects365") path_parts = Path(original_filename).parts if len(path_parts) >= 3: patch_dir = path_parts[-2] filename = path_parts[-1] source_path = base / "raw/Objects365/data/train" / patch_dir / filename else: source_path = base / "raw/Objects365/data" / original_filename if source_path and source_path.exists(): img = cv2.imread(str(source_path)) else: # Create placeholder if image not found img = np.zeros((img_info.get('height', 480), img_info.get('width', 640), 3), dtype=np.uint8) img[:] = (50, 50, 50) cv2.putText(img, "Image not found", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) else: # Load from copied location img_path = self.images_dir / img_info['file_name'] img = cv2.imread(str(img_path)) if img is None: # Create placeholder if loading failed img = np.zeros((img_info.get('height', 480), img_info.get('width', 640), 3), dtype=np.uint8) img[:] = (50, 50, 50) cv2.putText(img, "Failed to load image", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # Draw annotations annotations = img_to_anns[img_info['id']] for ann in annotations: # Get bbox x, y, w, h = ann['bbox'] x, y, w, h = int(x), int(y), int(w), int(h) # Choose color based on category if ann.get('category_id', 0) == 1: color = (0, 165, 255) # Orange for ignore regions label = "ignore" elif ann.get('iscrowd', 0): color = (0, 165, 255) # Orange for crowd (should be in ignore now) label = "crowd" else: color = (0, 255, 0) # Green for individual person label = "person" # Draw rectangle cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) # Add label label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.rectangle(img, (x, y - label_size[1] - 4), (x + label_size[0], y), color, -1) cv2.putText(img, label, (x, y - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # Add image info info_text = [ f"Dataset: {img_info.get('dataset', 'unknown')}", f"Image ID: {img_info['id']}", f"Annotations: {len(annotations)}", f"Size: {img_info.get('width', 0)}x{img_info.get('height', 0)}" ] if 'split' in img_info: info_text.append(f"Split: {img_info['split']}") if 'modality' in img_info: info_text.append(f"Modality: {img_info['modality']}") # Draw info background y_offset = 10 for text in info_text: text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1) cv2.rectangle(img, (5, y_offset), (10 + text_size[0], y_offset + text_size[1] + 5), (0, 0, 0), -1) cv2.putText(img, text, (10, y_offset + text_size[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1) y_offset += text_size[1] + 10 # Display image window_name = f"Sample {idx+1}/{num_samples} - {img_info.get('dataset', 'unknown')} - ID: {img_info['id']}" cv2.namedWindow(window_name, cv2.WINDOW_KEEPRATIO) cv2.imshow(window_name, img) # Wait for key press key = cv2.waitKey(0) & 0xFF cv2.destroyWindow(window_name) if key == ord('q'): logger.info("Visualization stopped by user") break cv2.destroyAllWindows() logger.info("Visualization complete") def save_combined_dataset(self): """Save the combined dataset to disk.""" if self.dry_run: logger.info(f"DRY RUN: Would save combined dataset to {self.annotations_file}") logger.info(f"DRY RUN: Dataset would contain {len(self.combined_data['images'])} images and {len(self.combined_data['annotations'])} annotations") else: logger.info(f"Saving combined dataset to {self.annotations_file}") with open(self.annotations_file, 'w') as f: json.dump(self.combined_data, f) logger.info("Dataset saved successfully") def print_statistics(self): """Print detailed statistics about the combined dataset.""" print("\n" + "="*80) if self.dry_run: print("COMBINED DATASET STATISTICS (DRY RUN)") else: print("COMBINED DATASET STATISTICS") print("="*80) total_images = len(self.combined_data['images']) total_annotations = len(self.combined_data['annotations']) print(f"\nOVERALL:") print(f" Total Images: {total_images:,}") print(f" Total Annotations: {total_annotations:,}") print(f" Average Annotations per Image: {total_annotations/max(total_images, 1):.2f}") print("\n" + "-"*80) print("PER-DATASET BREAKDOWN:") print("-"*80) # Calculate percentages for dataset_name, stats in sorted(self.stats.items()): img_pct = 100 * stats['images_with_persons'] / max(total_images, 1) ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1) print(f"\n{dataset_name.upper()}:") print(f" Total images: {stats['total_images']:,}") print(f" Images with persons: {stats['images_with_persons']:,} ({img_pct:.1f}% of total)") print(f" Total annotations: {stats['total_annotations']:,} ({ann_pct:.1f}% of total)") if stats.get('missing_images', 0) > 0: print(f" ⚠️ Missing images: {stats['missing_images']:,}") if stats['total_annotations'] > 0: print(f" Crowd annotations: {stats['crowd_annotations']:,} ({100*stats['crowd_annotations']/stats['total_annotations']:.1f}%)") print(f" Ignore annotations: {stats['ignore_annotations']:,} ({100*stats['ignore_annotations']/stats['total_annotations']:.1f}%)") # Show pseudolabel statistics if available (for WISARD) if 'pseudolabeled_annotations' in stats: print(f" Pseudolabeled: {stats['pseudolabeled_annotations']:,} ({100*stats['pseudolabeled_annotations']/stats['total_annotations']:.1f}%)") print(f" Verified: {stats['verified_annotations']:,} ({100*stats['verified_annotations']/stats['total_annotations']:.1f}%)") if stats.get('low_confidence_annotations', 0) > 0 or stats.get('high_confidence_annotations', 0) > 0: print(f" Low confidence (<0.3): {stats['low_confidence_annotations']:,}") print(f" High confidence (≥0.7): {stats['high_confidence_annotations']:,}") if stats['modalities']: print(f" Modalities: {', '.join(sorted(stats['modalities']))}") if stats['splits']: print(f" Splits: {', '.join(sorted(stats['splits']))}") if stats['images_with_persons'] > 0: print(f" Avg annotations/image: {stats['total_annotations']/stats['images_with_persons']:.2f}") # Image statistics if self.combined_data['images']: widths = [img['width'] for img in self.combined_data['images'] if img['width'] > 0] heights = [img['height'] for img in self.combined_data['images'] if img['height'] > 0] if widths and heights: print("\n" + "-"*80) print("IMAGE DIMENSIONS:") print("-"*80) print(f" Width range: {min(widths)} - {max(widths)} pixels") print(f" Height range: {min(heights)} - {max(heights)} pixels") print(f" Average width: {sum(widths)/len(widths):.0f} pixels") print(f" Average height: {sum(heights)/len(heights):.0f} pixels") # Annotation statistics if self.combined_data['annotations']: areas = [ann['area'] for ann in self.combined_data['annotations'] if ann['area'] > 0] crowd_count = sum(1 for ann in self.combined_data['annotations'] if ann['iscrowd'] == 1) print("\n" + "-"*80) print("ANNOTATION STATISTICS:") print("-"*80) print(f" Total bounding boxes: {len(self.combined_data['annotations']):,}") print(f" Crowd annotations: {crowd_count:,} ({100*crowd_count/len(self.combined_data['annotations']):.1f}%)") if areas: print(f" Area range: {min(areas):.0f} - {max(areas):.0f} pixels²") print(f" Average area: {sum(areas)/len(areas):.0f} pixels²") # Dataset contribution summary table print("\n" + "-"*80) print("DATASET CONTRIBUTION SUMMARY:") print("-"*80) print(f"{'Dataset':<25} {'Total Images':>12} {'With Person':>12} {'%':>7} {'Annotations':>12} {'%':>7}") print("-"*100) # Only show datasets that actually have images datasets_with_images = [(name, stats) for name, stats in self.stats.items() if stats['total_images'] > 0] for dataset_name, stats in sorted(datasets_with_images, key=lambda x: x[1]['total_images'], reverse=True): img_pct = 100 * stats['total_images'] / max(total_images, 1) ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1) print(f"{dataset_name:<25} {stats['total_images']:>12,} {stats['images_with_persons']:>12,} {img_pct:>6.1f}% {stats['total_annotations']:>12,} {ann_pct:>6.1f}%") print("-"*100) # Calculate totals for images with persons total_with_persons = sum(stats['images_with_persons'] for stats in self.stats.values()) print(f"{'TOTAL':<25} {total_images:>12,} {total_with_persons:>12,} {'100.0%':>7} {total_annotations:>12,} {'100.0%':>7}") print("\n" + "="*80) def check_datasets(self): """Check which datasets are available.""" datasets = { "RGBTDronePerson": Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson"), "search-and-rescue": Path("/mnt/archive/person_drone/search-and-rescue"), "stanford_drone_coco": Path("/mnt/archive/person_drone/stanford_drone_coco"), "vtsar_coco": Path("/mnt/archive/person_drone/vtsar_coco"), "vtuav_coco": Path("/mnt/archive/person_drone/vtuav_coco"), "wisard_coco": Path("/mnt/archive/person_drone/wisard_coco"), "VisDrone2019-DET": Path("/mnt/archive/person_drone/VisDrone2019-DET"), "SeaDronesSee": Path("/mnt/archive/person_drone/seadronessee"), "LISA Alert": Path("/mnt/archive/person_drone/lisa_alert"), "CrowdHuman": Path("/mnt/archive/person_drone/crowd_human"), "Objects365": Path("/mnt/archive/datasets/OpenDataLab___Objects365") } print("\n" + "="*80) print("CHECKING DATASET AVAILABILITY") print("="*80) available = [] missing = [] for name, path in datasets.items(): if path.exists(): available.append(name) print(f"✅ {name}: Found at {path}") else: missing.append(name) print(f"❌ {name}: Not found at {path}") print(f"\nSummary: {len(available)} available, {len(missing)} missing") print("="*80 + "\n") return available, missing def run(self, visualize: bool = False, num_vis_samples: int = 10, vis_dataset: str = None): """Run the complete dataset combination pipeline.""" # Check dataset availability available, missing = self.check_datasets() if missing and not self.dry_run: response = input(f"\n⚠️ {len(missing)} dataset(s) missing. Continue anyway? (y/n): ") if response.lower() != 'y': logger.info("Aborted by user") return logger.info("Starting dataset combination process...") # Process each dataset self.process_rgbt_drone_person() self.process_search_and_rescue() # bad annotations # self.process_stanford_drone() # Process COCO format datasets self.process_coco_format_dataset("vtsar", Path("/mnt/archive/person_drone/vtsar_coco")) # bad annotations # self.process_coco_format_dataset("vtuav", Path("/mnt/archive/person_drone/vtuav_coco")) # Process WISARD with special pseudolabel handling self.process_wisard() # Process VisDrone self.process_visdrone() # Process SeaDronesSee self.process_seadronessee() # Process LISA Alert self.process_lisa_alert() # Process CrowdHuman self.process_crowd_human() # # Process Objects365 (limited due to size) # self.process_objects365() # Save combined dataset self.save_combined_dataset() # Create visualizations if requested if visualize: self.visualize_samples(num_vis_samples, dataset_filter=vis_dataset) # Print statistics self.print_statistics() logger.info("Dataset combination complete!") def main(): parser = argparse.ArgumentParser(description="Combine multiple person detection datasets into a single COCO format dataset") parser.add_argument( "--output-dir", type=str, default="/mnt/archive/person_drone/combined_dataset", help="Output directory for the combined dataset" ) parser.add_argument( "--dry-run", action="store_true", help="Run without copying images to check correctness" ) parser.add_argument( "--skip-objects365", action="store_true", help="Skip Objects365 dataset (it's very large)" ) parser.add_argument( "--visualize", action="store_true", help="Create visualization samples with bounding boxes" ) parser.add_argument( "--num-vis-samples", type=int, default=200, help="Number of samples to visualize with cv2.imshow (default: 20)" ) parser.add_argument( "--vis-dataset", type=str, default=None, help="Visualize samples only from specific daztaset (e.g., stanford_drone, visdrone2019)" ) parser.add_argument( "--images-per-folder", type=int, default=10000, help="Number of images per folder for pagination (default: 10000)" ) args = parser.parse_args() combiner = DatasetCombiner(args.output_dir, dry_run=args.dry_run, images_per_folder=args.images_per_folder) if args.skip_objects365: # Override the process_objects365 method to skip it combiner.process_objects365 = lambda: logger.info("Skipping Objects365 dataset") combiner.run(visualize=args.visualize, num_vis_samples=args.num_vis_samples, vis_dataset=args.vis_dataset) if __name__ == "__main__": main()