lucid-hf's picture
CI: deploy Docker/PDM Space
98a3af2 verified
#!/usr/bin/env python3
"""
Script to combine multiple drone/person detection datasets into a single COCO format dataset.
All person-related categories are merged into a single "person" category.
"""
import json
import os
import shutil
from pathlib import Path
from typing import Dict, List, Any, Tuple
from collections import defaultdict
import logging
from tqdm import tqdm
import argparse
import cv2
import numpy as np
import random
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class DatasetCombiner:
def __init__(self, output_dir: str, dry_run: bool = False, images_per_folder: int = 10000):
self.output_dir = Path(output_dir)
self.images_dir = self.output_dir / "images"
self.annotations_file = self.output_dir / "annotations.json"
self.dry_run = dry_run
self.images_per_folder = images_per_folder
if self.dry_run:
logger.info("πŸ” DRY RUN MODE - No files will be copied")
# Create output directories
self.output_dir.mkdir(parents=True, exist_ok=True)
if not self.dry_run:
self.images_dir.mkdir(exist_ok=True)
# Initialize COCO format structure
self.combined_data = {
"info": {
"description": "Combined Person Detection Dataset from Multiple Drone Datasets",
"version": "1.0",
"year": 2024
},
"licenses": [],
"categories": [
{"id": 0, "name": "person", "supercategory": "person"},
{"id": 1, "name": "ignore", "supercategory": "ignore"}
],
"images": [],
"annotations": []
}
# Tracking
self.image_id_counter = 0
self.annotation_id_counter = 0
self.image_filename_mapping = {} # old_path -> new_filename
self.stats = defaultdict(lambda: {
"total_images": 0,
"total_annotations": 0,
"images_with_persons": 0,
"crowd_annotations": 0,
"ignore_annotations": 0,
"modalities": set(),
"splits": set(),
"missing_images": 0
})
def is_person_category(self, category_name: str) -> bool:
"""Check if a category name refers to a person."""
person_keywords = [
'person', 'people', 'pedestrian', 'human', 'crowd',
'rider', 'biker', 'skater', 'swimmer'
]
name_lower = category_name.lower()
return any(keyword in name_lower for keyword in person_keywords)
def is_crowd_category(self, category_name: str) -> bool:
"""Check if a category should be marked as crowd."""
crowd_keywords = ['crowd', 'people', 'group']
return any(keyword in category_name.lower() for keyword in crowd_keywords)
def get_new_image_path(self, dataset_name: str, original_filename: str, image_id: int) -> Tuple[str, Path]:
"""Generate a new unique filename with pagination folder structure.
Returns:
Tuple of (relative_path_for_json, full_destination_path)
"""
ext = Path(original_filename).suffix
# Calculate folder number (0-based, but display as 1-based)
folder_num = image_id // self.images_per_folder
folder_name = f"{folder_num:07d}" # 0000000, 0000001, etc.
# Create filename
filename = f"{dataset_name}_{image_id:08d}{ext}"
# Relative path for JSON (images/0000001/filename.jpg)
relative_path = f"{folder_name}/{filename}"
# Full destination path
folder_path = self.images_dir / folder_name
if not self.dry_run:
folder_path.mkdir(parents=True, exist_ok=True)
full_path = folder_path / filename
return relative_path, full_path
def copy_image(self, source_path: Path, dest_path: Path) -> bool:
"""Copy image to the combined dataset directory."""
if self.dry_run:
# In dry run, just check if source exists
if source_path.exists():
return True
else:
logger.warning(f"Source image not found: {source_path}")
return False
try:
if source_path.exists():
shutil.copy2(source_path, dest_path)
return True
else:
logger.warning(f"Source image not found: {source_path}")
return False
except Exception as e:
logger.error(f"Error copying image {source_path}: {e}")
return False
def process_rgbt_drone_person(self):
"""Process RGBTDronePerson dataset."""
dataset_name = "rgbt_drone_person"
base_path = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson")
annotation_files = [
("train_thermal.json", "train", "thermal"),
("val_thermal.json", "val", "thermal"),
("sub_train_thermal.json", "sub_train", "thermal"),
("sub_train_visible.json", "sub_train", "visible")
]
for ann_file, split, modality in annotation_files:
ann_path = base_path / ann_file
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
continue
logger.info(f"Processing {dataset_name} - {split} - {modality}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (drone dataset - keep all images)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} {modality} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
# Determine image path based on split and modality
img_filename = img['file_name']
# RGBTDronePerson has structure: RGBTDronePerson/{split}/{modality}/{filename}
if split == "sub_train":
# sub_train doesn't have its own folder, uses train folder
source_path = base_path / "RGBTDronePerson" / "train" / modality / img_filename
else:
source_path = base_path / "RGBTDronePerson" / split / modality / img_filename
relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id)
if not source_path.exists():
self.stats[dataset_name]["missing_images"] += 1
if not self.dry_run:
continue
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": split,
"modality": modality,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["modalities"].add(modality)
self.stats[dataset_name]["splits"].add(split)
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '')
is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1
# Crowd annotations go to ignore category
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": 1 if is_crowd else 0
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_search_and_rescue(self):
"""Process Search and Rescue dataset."""
dataset_name = "search_and_rescue"
base_path = Path("/mnt/archive/person_drone/search-and-rescue")
splits = ["train", "valid", "test"]
for split in splits:
ann_path = base_path / f"{split}.json"
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
continue
logger.info(f"Processing {dataset_name} - {split}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs (human -> person)
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (drone dataset - keep all images)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# Search and rescue has images in train/images, valid/images, test/images folders
source_path = base_path / split / "images" / img_filename
relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id)
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": split,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["splits"].add(split)
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
is_crowd = ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": ann.get('iscrowd', 0)
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_stanford_drone(self):
"""Process Stanford Drone dataset."""
dataset_name = "stanford_drone"
base_path = Path("/mnt/archive/person_drone/stanford_drone_coco")
ann_path = base_path / "train.json"
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
return
logger.info(f"Processing {dataset_name}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs (Pedestrian, Biker, Skater -> person)
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (drone dataset - keep all images)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# Stanford drone has images in train_images folder
possible_paths = [
base_path / "train_images" / img_filename,
base_path / img_filename,
base_path / "images" / img_filename
]
source_path = None
for path in possible_paths:
if path.exists():
source_path = path
break
if source_path is None:
logger.warning(f"Image not found in any expected location: {img_filename}")
continue
relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id)
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": "train",
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["splits"].add("train")
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
is_crowd = ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": ann.get('iscrowd', 0)
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_coco_format_dataset(self, dataset_name: str, base_path: Path, ann_filename: str = "annotations.json"):
"""Generic processor for COCO format datasets."""
ann_path = base_path / ann_filename
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
return
logger.info(f"Processing {dataset_name}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (drone dataset - keep all images)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# Most COCO datasets have the path included in file_name
# Try different possible paths
possible_paths = [
base_path / img_filename, # Full path as specified in JSON
base_path / "images" / img_filename,
base_path / Path(img_filename).name # Just filename without path
]
source_path = None
for path in possible_paths:
if path.exists():
source_path = path
break
if source_path is None:
logger.warning(f"Image not found: {img_filename}")
continue
relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id)
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"original_filename": img_filename
}
# Add split info if available
if 'split' in img:
new_img['split'] = img['split']
self.stats[dataset_name]["splits"].add(img['split'])
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
# Check for crowd based on category name
old_cat = next((c for c in data['categories'] if c['id'] == ann['category_id']), None)
is_crowd = ann.get('iscrowd', 0)
if old_cat and self.is_crowd_category(old_cat['name']):
is_crowd = 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": is_crowd
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_visdrone(self):
"""Process VisDrone2019-DET dataset."""
dataset_name = "visdrone2019"
base_path = Path("/mnt/archive/person_drone/VisDrone2019-DET")
ann_path = base_path / "annotations.json"
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
return
logger.info(f"Processing {dataset_name}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs (pedestrian, people -> person)
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (drone dataset - keep all images)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# VisDrone has images in train_images, val_images, test_images folders
# The file_name already includes the folder (e.g., "train_images/xxx.jpg")
possible_paths = [
base_path / img_filename, # This should work as file_name includes the folder
base_path / "images" / img_filename
]
source_path = None
for path in possible_paths:
if path.exists():
source_path = path
break
if source_path is None:
logger.warning(f"Image not found: {img_filename}")
continue
relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id)
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
is_crowd = 0
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": is_crowd
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_seadronessee(self):
"""Process SeaDronesSee dataset."""
dataset_name = "seadronessee"
base_path = Path("/mnt/archive/person_drone/seadronessee")
ann_dir = base_path / "annotations"
# Process train and val splits
for split, ann_file in [("train", "instances_train.json"), ("val", "instances_val.json")]:
ann_path = ann_dir / ann_file
if not ann_path.exists():
logger.warning(f"SeaDronesSee annotations missing for {split}: {ann_path}")
continue
logger.info(f"Processing {dataset_name} - {split}")
with open(ann_path, 'r') as f:
data = json.load(f)
# Create category mappings
swimmer_cat_id = None
ignore_cat_ids = set() # For boats, jetskis, and ignored
for cat in data.get('categories', []):
cat_name = cat.get('name', '').lower()
if cat_name == 'swimmer':
swimmer_cat_id = cat['id']
elif cat_name in ['boat', 'jetski', 'ignored']:
ignore_cat_ids.add(cat['id'])
if swimmer_cat_id is None:
logger.warning(f"No 'swimmer' category found in {ann_path}")
# Identify images with swimmers or ignore regions
images_with_persons = set()
images_with_ignore = set()
for ann in data.get('annotations', []):
cat_id = ann.get('category_id')
img_id = ann['image_id']
if cat_id == swimmer_cat_id:
images_with_persons.add(img_id)
elif cat_id in ignore_cat_ids:
images_with_ignore.add(img_id)
# Process images
image_id_mapping = {}
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img.get('file_name', '')
source_path = base_path / "images" / split / img_filename
relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id)
if not source_path.exists():
self.stats[dataset_name]["missing_images"] += 1
if not self.dry_run:
continue
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": split,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["splits"].add(split)
# Process annotations
for ann in data.get('annotations', []):
cat_id = ann.get('category_id')
img_id = ann['image_id']
if img_id not in image_id_mapping:
continue
# Determine target category
if cat_id == swimmer_cat_id:
# Swimmer -> person (0) or ignore (1) if crowd
is_crowd = ann.get('iscrowd', 0) == 1
target_cat = 1 if is_crowd else 0
elif cat_id in ignore_cat_ids:
# Boats, jetskis, ignored -> ignore (1)
target_cat = 1
is_crowd = 1 # Treat all ignore regions as crowd
else:
# Skip other categories (life_saving_appliances, buoy)
continue
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[img_id],
"category_id": target_cat,
"bbox": ann.get('bbox', []),
"area": ann.get('area', ann.get('bbox', [0, 0, 0, 0])[2] * ann.get('bbox', [0, 0, 0, 0])[3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": is_crowd if cat_id == swimmer_cat_id else 1
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if target_cat == 1:
self.stats[dataset_name]["ignore_annotations"] += 1
if new_ann['iscrowd']:
self.stats[dataset_name]["crowd_annotations"] += 1
def process_lisa_alert(self):
"""Process LISA Alert dataset - combines all splits (train/val/test) into one."""
dataset_name = "lisa_alert"
base_path = Path("/mnt/archive/person_drone/lisa_alert")
ann_dir = base_path / "annotations"
logger.info(f"Processing {dataset_name} - combining all splits")
# We'll process all splits but combine them into one dataset
splits_to_process = ["train", "val", "test"]
# Track which images we've already processed (to avoid duplicates)
processed_images = set()
for split in splits_to_process:
ann_path = ann_dir / f"{split}.json"
if not ann_path.exists():
logger.warning(f"LISA Alert annotation file not found: {ann_path}")
continue
logger.info(f"Processing {dataset_name} - {split} split")
with open(ann_path, 'r') as f:
data = json.load(f)
# Map category IDs (Pedestrian -> person)
category_mapping = {}
for cat in data.get('categories', []):
if self.is_person_category(cat['name']):
category_mapping[cat['id']] = 0
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process images
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} {split} images"):
old_id = img['id']
img_filename = str(img['file_name']).strip()
# LISA Alert filenames in JSON don't have .jpg extension
if not img_filename.endswith('.jpg'):
img_filename = img_filename + '.jpg'
# Skip if we've already processed this image from another split
if img_filename in processed_images:
continue
processed_images.add(img_filename)
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
# Images are in the images/ folder
source_path = base_path / "images" / img_filename
relative_path, dest_path = self.get_new_image_path(dataset_name, img_filename, new_id)
if not source_path.exists():
self.stats[dataset_name]["missing_images"] += 1
if not self.dry_run:
continue
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
# Check for crowd
old_cat_name = next((c['name'] for c in data['categories'] if c['id'] == ann['category_id']), '')
is_crowd = self.is_crowd_category(old_cat_name) or ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": 1 if is_crowd else 0
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_wisard(self):
"""Process WISARD dataset with special handling for pseudolabeled annotations."""
dataset_name = "wisard"
base_path = Path("/mnt/archive/person_drone/wisard_coco")
ann_path = base_path / "annotations.json"
if not ann_path.exists():
logger.warning(f"Annotation file not found: {ann_path}")
return
logger.info(f"Processing {dataset_name} - includes pseudolabeled annotations")
with open(ann_path, 'r') as f:
data = json.load(f)
# WISARD has special category mapping:
# Category 0: pseudolabeled persons
# Category 1: verified/original persons
# Both should map to our unified person category (0)
# Track statistics for pseudolabeled vs verified
pseudolabel_stats = {
"pseudolabeled": 0,
"verified": 0,
"low_confidence": 0, # confidence < 0.3
"high_confidence": 0 # confidence >= 0.7
}
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations (both categories)
for ann in data.get('annotations', []):
if ann['category_id'] in [0, 1]: # Both are person categories
images_with_persons.add(ann['image_id'])
# Process ALL images
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# WISARD images are organized in sequence folders
# The file_name already includes the folder structure
possible_paths = [
base_path / img_filename, # Full path as specified in JSON
base_path / Path(img_filename).name # Just filename without path
]
source_path = None
for path in possible_paths:
if path.exists():
source_path = path
break
if source_path is None:
# Many images were removed, so we just skip them
self.stats[dataset_name]["missing_images"] += 1
continue
relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id)
if self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"original_filename": img_filename
}
# Add WISARD-specific metadata
if 'sequence' in img:
new_img['sequence'] = img['sequence']
if 'modality' in img:
new_img['modality'] = img['modality']
self.stats[dataset_name]["modalities"].add(img['modality'])
if 'location' in img:
new_img['location'] = img['location']
if 'sensor' in img:
new_img['sensor'] = img['sensor']
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in [0, 1]: # Only process person categories
continue
if ann['image_id'] not in image_id_mapping:
continue
# All WISARD annotations map to person (0) in our unified dataset
# We preserve the pseudolabel metadata
is_crowd = ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": ann.get('iscrowd', 0)
}
# Preserve WISARD-specific metadata
if 'is_pseudolabel' in ann:
new_ann['is_pseudolabel'] = ann['is_pseudolabel']
if ann['is_pseudolabel']:
pseudolabel_stats['pseudolabeled'] += 1
if 'verified' in ann:
new_ann['verified'] = ann['verified']
if ann['verified']:
pseudolabel_stats['verified'] += 1
if 'confidence' in ann:
new_ann['confidence'] = ann['confidence']
if ann['confidence'] < 0.3:
pseudolabel_stats['low_confidence'] += 1
elif ann['confidence'] >= 0.7:
pseudolabel_stats['high_confidence'] += 1
if 'sequence' in ann:
new_ann['sequence'] = ann['sequence']
if 'modality' in ann:
new_ann['modality'] = ann['modality']
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
# Add pseudolabel statistics to dataset stats
self.stats[dataset_name]["pseudolabeled_annotations"] = pseudolabel_stats['pseudolabeled']
self.stats[dataset_name]["verified_annotations"] = pseudolabel_stats['verified']
self.stats[dataset_name]["low_confidence_annotations"] = pseudolabel_stats['low_confidence']
self.stats[dataset_name]["high_confidence_annotations"] = pseudolabel_stats['high_confidence']
logger.info(f"WISARD dataset processed: {pseudolabel_stats['pseudolabeled']} pseudolabeled, "
f"{pseudolabel_stats['verified']} verified annotations")
def process_crowd_human(self):
"""Process CrowdHuman dataset from pre-converted COCO format."""
dataset_name = "crowd_human"
# Use the pre-converted COCO format annotations
ann_path = Path("/home/svakhreev/projects/DEIM/data/crowd_human_coco/annotations_combined.json")
if not ann_path.exists():
logger.warning(f"CrowdHuman annotations not found: {ann_path}")
return
logger.info(f"Processing {dataset_name}")
with open(ann_path, 'r') as f:
data = json.load(f)
# CrowdHuman has person category with id=1 in the converted format
# Map it to our unified person category (id=0)
category_mapping = {}
for cat in data.get('categories', []):
if cat['name'].lower() == 'person':
category_mapping[cat['id']] = 0 # Map to our person category
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
for ann in data.get('annotations', []):
if ann['category_id'] in category_mapping:
images_with_persons.add(ann['image_id'])
# Process ALL images (keep all images from CrowdHuman)
for img in tqdm(data.get('images', []), desc=f"Processing {dataset_name} images"):
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# CrowdHuman file_name format: "CrowdHuman_train/Images/273271,1a0d6000b9e1f5b7.jpg"
# or "CrowdHuman_val/Images/273278,c9db000d5146c15.jpg"
# Construct the source path
source_path = Path("/mnt/archive/person_drone/crowd_human") / img_filename
# Extract just the filename for the new path
actual_filename = Path(img_filename).name
relative_path, dest_path = self.get_new_image_path(dataset_name, actual_filename, new_id)
if not source_path.exists():
self.stats[dataset_name]["missing_images"] += 1
if not self.dry_run:
continue
if self.copy_image(source_path, dest_path):
# Determine split from the path
split = "unknown"
if "train" in img_filename.lower():
split = "train"
elif "val" in img_filename.lower():
split = "val"
elif "test" in img_filename.lower():
split = "test"
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": split,
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
if old_id in images_with_persons:
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["splits"].add(split)
# Process annotations
for ann in data.get('annotations', []):
if ann['category_id'] not in category_mapping:
continue
if ann['image_id'] not in image_id_mapping:
continue
# CrowdHuman uses iscrowd flag for heavily occluded persons
is_crowd = ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": ann.get('iscrowd', 0)
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def process_objects365(self):
"""Process Objects365 dataset (only person category)."""
dataset_name = "objects365"
base_path = Path("/mnt/archive/datasets/OpenDataLab___Objects365")
# Process train split
ann_path = base_path / "raw/Objects365/data/train/zhiyuan_objv2_train.json"
if ann_path.exists():
logger.info(f"Processing {dataset_name} - train split (this may take a while...)")
with open(ann_path, 'r') as f:
data = json.load(f)
# Find person category ID
person_cat_id = None
for cat in data.get('categories', []):
if cat['name'].lower() == 'person':
person_cat_id = cat['id']
break
if person_cat_id is None:
logger.warning("Person category not found in Objects365")
return
# Process images and annotations
image_id_mapping = {}
images_with_persons = set()
# First pass: identify images with person annotations
logger.info("Identifying images with person annotations...")
for ann in tqdm(data.get('annotations', []), desc="Scanning annotations"):
if ann['category_id'] == person_cat_id:
images_with_persons.add(ann['image_id'])
logger.info(f"Found {len(images_with_persons)} images with persons")
# Create image ID to image dict for faster lookup
id_to_image = {img['id']: img for img in data.get('images', [])}
# Process only images with persons
processed = 0
for img_id in tqdm(images_with_persons, desc=f"Processing {dataset_name} images"):
if img_id not in id_to_image:
continue
img = id_to_image[img_id]
old_id = img['id']
new_id = self.image_id_counter
self.image_id_counter += 1
image_id_mapping[old_id] = new_id
img_filename = img['file_name']
# Objects365 image paths need adjustment
# JSON has: "images/v1/patch8/objects365_v1_00420917.jpg"
# Actual path: "train/patch8/objects365_v1_00420917.jpg"
# Extract patch and filename from the path
path_parts = Path(img_filename).parts
if len(path_parts) >= 3:
# Get patch directory and filename
patch_dir = path_parts[-2] # e.g., "patch8"
filename = path_parts[-1] # e.g., "objects365_v1_00420917.jpg"
source_path = base_path / "raw/Objects365/data/train" / patch_dir / filename
else:
source_path = base_path / "raw/Objects365/data" / img_filename
if not source_path.exists():
# In dry run, we still want to count the image even if file doesn't exist
if not self.dry_run:
continue
else:
self.stats[dataset_name]["missing_images"] = self.stats[dataset_name].get("missing_images", 0) + 1
relative_path, dest_path = self.get_new_image_path(dataset_name, Path(img_filename).name, new_id)
# In dry run or if file exists, add to dataset
if self.dry_run or self.copy_image(source_path, dest_path):
new_img = {
"id": new_id,
"file_name": relative_path,
"width": img.get('width', 0),
"height": img.get('height', 0),
"dataset": dataset_name,
"split": "train",
"original_filename": img_filename
}
self.combined_data['images'].append(new_img)
self.stats[dataset_name]["total_images"] += 1
self.stats[dataset_name]["images_with_persons"] += 1
self.stats[dataset_name]["splits"].add("train")
processed += 1
# Process annotations
for ann in tqdm(data.get('annotations', []), desc="Processing annotations"):
if ann['category_id'] != person_cat_id:
continue
if ann['image_id'] not in image_id_mapping:
continue
is_crowd = ann.get('iscrowd', 0) == 1
new_ann = {
"id": self.annotation_id_counter,
"image_id": image_id_mapping[ann['image_id']],
"category_id": 1 if is_crowd else 0, # crowd -> ignore (1), person -> 0
"bbox": ann['bbox'],
"area": ann.get('area', ann['bbox'][2] * ann['bbox'][3]),
"segmentation": ann.get('segmentation', []),
"iscrowd": ann.get('iscrowd', 0)
}
self.combined_data['annotations'].append(new_ann)
self.annotation_id_counter += 1
self.stats[dataset_name]["total_annotations"] += 1
if is_crowd:
self.stats[dataset_name]["crowd_annotations"] += 1
self.stats[dataset_name]["ignore_annotations"] += 1
def visualize_samples(self, num_samples: int = 10, dataset_filter: str = None):
"""Visualize random samples with bounding boxes using cv2.imshow."""
if len(self.combined_data['images']) == 0:
logger.warning("No images to visualize")
return
# Create image_id to annotations mapping
img_to_anns = defaultdict(list)
for ann in self.combined_data['annotations']:
img_to_anns[ann['image_id']].append(ann)
# Sample random images that have annotations
images_with_anns = [img for img in self.combined_data['images']
if img['id'] in img_to_anns]
# Filter by dataset if specified
if dataset_filter:
images_with_anns = [img for img in images_with_anns
if img.get('dataset', '').lower() == dataset_filter.lower()]
if not images_with_anns:
logger.warning(f"No images with annotations from dataset '{dataset_filter}'")
return
if not images_with_anns:
logger.warning("No images with annotations to visualize")
return
num_samples = min(num_samples, len(images_with_anns))
sampled_images = random.sample(images_with_anns, num_samples)
logger.info(f"Visualizing {num_samples} sample images with bounding boxes...")
logger.info("Press any key to see next image, 'q' to quit")
for idx, img_info in enumerate(sampled_images):
# Determine the actual image path based on dataset
dataset_name = img_info.get('dataset', '')
original_filename = img_info.get('original_filename', img_info['file_name'])
# Find the source image path
if self.dry_run or not (self.images_dir / img_info['file_name']).exists():
# In dry-run mode or if copied image doesn't exist, load from original location
source_path = None
if dataset_name == "rgbt_drone_person":
base = Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson/RGBTDronePerson")
split = img_info.get('split', 'train')
modality = img_info.get('modality', 'thermal')
if split == "sub_train":
split = "train"
source_path = base / split / modality / original_filename
elif dataset_name == "search_and_rescue":
base = Path("/mnt/archive/person_drone/search-and-rescue")
split = img_info.get('split', 'train')
source_path = base / split / "images" / Path(original_filename).name
elif dataset_name == "stanford_drone":
base = Path("/mnt/archive/person_drone/stanford_drone_coco")
source_path = base / "train_images" / original_filename
elif dataset_name == "vtsar":
base = Path("/mnt/archive/person_drone/vtsar_coco")
source_path = base / original_filename
elif dataset_name == "vtuav":
base = Path("/mnt/archive/person_drone/vtuav_coco")
source_path = base / original_filename
elif dataset_name == "wisard":
base = Path("/mnt/archive/person_drone/wisard_coco")
source_path = base / original_filename
elif dataset_name == "visdrone2019":
base = Path("/mnt/archive/person_drone/VisDrone2019-DET")
source_path = base / original_filename
elif dataset_name == "seadronessee":
base = Path("/mnt/archive/person_drone/seadronessee")
split = img_info.get('split', 'train')
source_path = base / "images" / split / Path(original_filename).name
elif dataset_name == "lisa_alert":
base = Path("/mnt/archive/person_drone/lisa_alert")
source_path = base / "images" / Path(original_filename).name
elif dataset_name == "crowd_human":
base = Path("/mnt/archive/person_drone/crowd_human")
# original_filename contains the full path like "CrowdHuman_train/Images/xxx.jpg"
source_path = base / original_filename
elif dataset_name == "objects365":
base = Path("/mnt/archive/datasets/OpenDataLab___Objects365")
path_parts = Path(original_filename).parts
if len(path_parts) >= 3:
patch_dir = path_parts[-2]
filename = path_parts[-1]
source_path = base / "raw/Objects365/data/train" / patch_dir / filename
else:
source_path = base / "raw/Objects365/data" / original_filename
if source_path and source_path.exists():
img = cv2.imread(str(source_path))
else:
# Create placeholder if image not found
img = np.zeros((img_info.get('height', 480),
img_info.get('width', 640), 3), dtype=np.uint8)
img[:] = (50, 50, 50)
cv2.putText(img, "Image not found", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
else:
# Load from copied location
img_path = self.images_dir / img_info['file_name']
img = cv2.imread(str(img_path))
if img is None:
# Create placeholder if loading failed
img = np.zeros((img_info.get('height', 480),
img_info.get('width', 640), 3), dtype=np.uint8)
img[:] = (50, 50, 50)
cv2.putText(img, "Failed to load image", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# Draw annotations
annotations = img_to_anns[img_info['id']]
for ann in annotations:
# Get bbox
x, y, w, h = ann['bbox']
x, y, w, h = int(x), int(y), int(w), int(h)
# Choose color based on category
if ann.get('category_id', 0) == 1:
color = (0, 165, 255) # Orange for ignore regions
label = "ignore"
elif ann.get('iscrowd', 0):
color = (0, 165, 255) # Orange for crowd (should be in ignore now)
label = "crowd"
else:
color = (0, 255, 0) # Green for individual person
label = "person"
# Draw rectangle
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
# Add label
label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(img, (x, y - label_size[1] - 4),
(x + label_size[0], y), color, -1)
cv2.putText(img, label, (x, y - 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# Add image info
info_text = [
f"Dataset: {img_info.get('dataset', 'unknown')}",
f"Image ID: {img_info['id']}",
f"Annotations: {len(annotations)}",
f"Size: {img_info.get('width', 0)}x{img_info.get('height', 0)}"
]
if 'split' in img_info:
info_text.append(f"Split: {img_info['split']}")
if 'modality' in img_info:
info_text.append(f"Modality: {img_info['modality']}")
# Draw info background
y_offset = 10
for text in info_text:
text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
cv2.rectangle(img, (5, y_offset),
(10 + text_size[0], y_offset + text_size[1] + 5),
(0, 0, 0), -1)
cv2.putText(img, text, (10, y_offset + text_size[1]),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
y_offset += text_size[1] + 10
# Display image
window_name = f"Sample {idx+1}/{num_samples} - {img_info.get('dataset', 'unknown')} - ID: {img_info['id']}"
cv2.namedWindow(window_name, cv2.WINDOW_KEEPRATIO)
cv2.imshow(window_name, img)
# Wait for key press
key = cv2.waitKey(0) & 0xFF
cv2.destroyWindow(window_name)
if key == ord('q'):
logger.info("Visualization stopped by user")
break
cv2.destroyAllWindows()
logger.info("Visualization complete")
def save_combined_dataset(self):
"""Save the combined dataset to disk."""
if self.dry_run:
logger.info(f"DRY RUN: Would save combined dataset to {self.annotations_file}")
logger.info(f"DRY RUN: Dataset would contain {len(self.combined_data['images'])} images and {len(self.combined_data['annotations'])} annotations")
else:
logger.info(f"Saving combined dataset to {self.annotations_file}")
with open(self.annotations_file, 'w') as f:
json.dump(self.combined_data, f)
logger.info("Dataset saved successfully")
def print_statistics(self):
"""Print detailed statistics about the combined dataset."""
print("\n" + "="*80)
if self.dry_run:
print("COMBINED DATASET STATISTICS (DRY RUN)")
else:
print("COMBINED DATASET STATISTICS")
print("="*80)
total_images = len(self.combined_data['images'])
total_annotations = len(self.combined_data['annotations'])
print(f"\nOVERALL:")
print(f" Total Images: {total_images:,}")
print(f" Total Annotations: {total_annotations:,}")
print(f" Average Annotations per Image: {total_annotations/max(total_images, 1):.2f}")
print("\n" + "-"*80)
print("PER-DATASET BREAKDOWN:")
print("-"*80)
# Calculate percentages
for dataset_name, stats in sorted(self.stats.items()):
img_pct = 100 * stats['images_with_persons'] / max(total_images, 1)
ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1)
print(f"\n{dataset_name.upper()}:")
print(f" Total images: {stats['total_images']:,}")
print(f" Images with persons: {stats['images_with_persons']:,} ({img_pct:.1f}% of total)")
print(f" Total annotations: {stats['total_annotations']:,} ({ann_pct:.1f}% of total)")
if stats.get('missing_images', 0) > 0:
print(f" ⚠️ Missing images: {stats['missing_images']:,}")
if stats['total_annotations'] > 0:
print(f" Crowd annotations: {stats['crowd_annotations']:,} ({100*stats['crowd_annotations']/stats['total_annotations']:.1f}%)")
print(f" Ignore annotations: {stats['ignore_annotations']:,} ({100*stats['ignore_annotations']/stats['total_annotations']:.1f}%)")
# Show pseudolabel statistics if available (for WISARD)
if 'pseudolabeled_annotations' in stats:
print(f" Pseudolabeled: {stats['pseudolabeled_annotations']:,} ({100*stats['pseudolabeled_annotations']/stats['total_annotations']:.1f}%)")
print(f" Verified: {stats['verified_annotations']:,} ({100*stats['verified_annotations']/stats['total_annotations']:.1f}%)")
if stats.get('low_confidence_annotations', 0) > 0 or stats.get('high_confidence_annotations', 0) > 0:
print(f" Low confidence (<0.3): {stats['low_confidence_annotations']:,}")
print(f" High confidence (β‰₯0.7): {stats['high_confidence_annotations']:,}")
if stats['modalities']:
print(f" Modalities: {', '.join(sorted(stats['modalities']))}")
if stats['splits']:
print(f" Splits: {', '.join(sorted(stats['splits']))}")
if stats['images_with_persons'] > 0:
print(f" Avg annotations/image: {stats['total_annotations']/stats['images_with_persons']:.2f}")
# Image statistics
if self.combined_data['images']:
widths = [img['width'] for img in self.combined_data['images'] if img['width'] > 0]
heights = [img['height'] for img in self.combined_data['images'] if img['height'] > 0]
if widths and heights:
print("\n" + "-"*80)
print("IMAGE DIMENSIONS:")
print("-"*80)
print(f" Width range: {min(widths)} - {max(widths)} pixels")
print(f" Height range: {min(heights)} - {max(heights)} pixels")
print(f" Average width: {sum(widths)/len(widths):.0f} pixels")
print(f" Average height: {sum(heights)/len(heights):.0f} pixels")
# Annotation statistics
if self.combined_data['annotations']:
areas = [ann['area'] for ann in self.combined_data['annotations'] if ann['area'] > 0]
crowd_count = sum(1 for ann in self.combined_data['annotations'] if ann['iscrowd'] == 1)
print("\n" + "-"*80)
print("ANNOTATION STATISTICS:")
print("-"*80)
print(f" Total bounding boxes: {len(self.combined_data['annotations']):,}")
print(f" Crowd annotations: {crowd_count:,} ({100*crowd_count/len(self.combined_data['annotations']):.1f}%)")
if areas:
print(f" Area range: {min(areas):.0f} - {max(areas):.0f} pixelsΒ²")
print(f" Average area: {sum(areas)/len(areas):.0f} pixelsΒ²")
# Dataset contribution summary table
print("\n" + "-"*80)
print("DATASET CONTRIBUTION SUMMARY:")
print("-"*80)
print(f"{'Dataset':<25} {'Total Images':>12} {'With Person':>12} {'%':>7} {'Annotations':>12} {'%':>7}")
print("-"*100)
# Only show datasets that actually have images
datasets_with_images = [(name, stats) for name, stats in self.stats.items() if stats['total_images'] > 0]
for dataset_name, stats in sorted(datasets_with_images, key=lambda x: x[1]['total_images'], reverse=True):
img_pct = 100 * stats['total_images'] / max(total_images, 1)
ann_pct = 100 * stats['total_annotations'] / max(total_annotations, 1)
print(f"{dataset_name:<25} {stats['total_images']:>12,} {stats['images_with_persons']:>12,} {img_pct:>6.1f}% {stats['total_annotations']:>12,} {ann_pct:>6.1f}%")
print("-"*100)
# Calculate totals for images with persons
total_with_persons = sum(stats['images_with_persons'] for stats in self.stats.values())
print(f"{'TOTAL':<25} {total_images:>12,} {total_with_persons:>12,} {'100.0%':>7} {total_annotations:>12,} {'100.0%':>7}")
print("\n" + "="*80)
def check_datasets(self):
"""Check which datasets are available."""
datasets = {
"RGBTDronePerson": Path("/mnt/archive/person_drone/RGBTDronePerson-20250828T031729Z-1-001/RGBTDronePerson"),
"search-and-rescue": Path("/mnt/archive/person_drone/search-and-rescue"),
"stanford_drone_coco": Path("/mnt/archive/person_drone/stanford_drone_coco"),
"vtsar_coco": Path("/mnt/archive/person_drone/vtsar_coco"),
"vtuav_coco": Path("/mnt/archive/person_drone/vtuav_coco"),
"wisard_coco": Path("/mnt/archive/person_drone/wisard_coco"),
"VisDrone2019-DET": Path("/mnt/archive/person_drone/VisDrone2019-DET"),
"SeaDronesSee": Path("/mnt/archive/person_drone/seadronessee"),
"LISA Alert": Path("/mnt/archive/person_drone/lisa_alert"),
"CrowdHuman": Path("/mnt/archive/person_drone/crowd_human"),
"Objects365": Path("/mnt/archive/datasets/OpenDataLab___Objects365")
}
print("\n" + "="*80)
print("CHECKING DATASET AVAILABILITY")
print("="*80)
available = []
missing = []
for name, path in datasets.items():
if path.exists():
available.append(name)
print(f"βœ… {name}: Found at {path}")
else:
missing.append(name)
print(f"❌ {name}: Not found at {path}")
print(f"\nSummary: {len(available)} available, {len(missing)} missing")
print("="*80 + "\n")
return available, missing
def run(self, visualize: bool = False, num_vis_samples: int = 10, vis_dataset: str = None):
"""Run the complete dataset combination pipeline."""
# Check dataset availability
available, missing = self.check_datasets()
if missing and not self.dry_run:
response = input(f"\n⚠️ {len(missing)} dataset(s) missing. Continue anyway? (y/n): ")
if response.lower() != 'y':
logger.info("Aborted by user")
return
logger.info("Starting dataset combination process...")
# Process each dataset
self.process_rgbt_drone_person()
self.process_search_and_rescue()
# bad annotations
# self.process_stanford_drone()
# Process COCO format datasets
self.process_coco_format_dataset("vtsar", Path("/mnt/archive/person_drone/vtsar_coco"))
# bad annotations
# self.process_coco_format_dataset("vtuav", Path("/mnt/archive/person_drone/vtuav_coco"))
# Process WISARD with special pseudolabel handling
self.process_wisard()
# Process VisDrone
self.process_visdrone()
# Process SeaDronesSee
self.process_seadronessee()
# Process LISA Alert
self.process_lisa_alert()
# Process CrowdHuman
self.process_crowd_human()
# # Process Objects365 (limited due to size)
# self.process_objects365()
# Save combined dataset
self.save_combined_dataset()
# Create visualizations if requested
if visualize:
self.visualize_samples(num_vis_samples, dataset_filter=vis_dataset)
# Print statistics
self.print_statistics()
logger.info("Dataset combination complete!")
def main():
parser = argparse.ArgumentParser(description="Combine multiple person detection datasets into a single COCO format dataset")
parser.add_argument(
"--output-dir",
type=str,
default="/mnt/archive/person_drone/combined_dataset",
help="Output directory for the combined dataset"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Run without copying images to check correctness"
)
parser.add_argument(
"--skip-objects365",
action="store_true",
help="Skip Objects365 dataset (it's very large)"
)
parser.add_argument(
"--visualize",
action="store_true",
help="Create visualization samples with bounding boxes"
)
parser.add_argument(
"--num-vis-samples",
type=int,
default=200,
help="Number of samples to visualize with cv2.imshow (default: 20)"
)
parser.add_argument(
"--vis-dataset",
type=str,
default=None,
help="Visualize samples only from specific daztaset (e.g., stanford_drone, visdrone2019)"
)
parser.add_argument(
"--images-per-folder",
type=int,
default=10000,
help="Number of images per folder for pagination (default: 10000)"
)
args = parser.parse_args()
combiner = DatasetCombiner(args.output_dir, dry_run=args.dry_run, images_per_folder=args.images_per_folder)
if args.skip_objects365:
# Override the process_objects365 method to skip it
combiner.process_objects365 = lambda: logger.info("Skipping Objects365 dataset")
combiner.run(visualize=args.visualize, num_vis_samples=args.num_vis_samples, vis_dataset=args.vis_dataset)
if __name__ == "__main__":
main()