lucid-hf's picture
CI: deploy Docker/PDM Space
98a3af2 verified
#!/usr/bin/env python3
"""
Convert VTSaR dataset from YOLO format to COCO format.
Dataset structure:
/mnt/archive/person_drone/VTSaR/VTSaR_Crop_640/
β”œβ”€β”€ rgb/
β”‚ β”œβ”€β”€ train/
β”‚ └── val/
β”œβ”€β”€ ir/
β”‚ β”œβ”€β”€ train/
β”‚ └── val/
└── labels/
β”œβ”€β”€ train/
└── val/
YOLO format: class_id center_x center_y width height (normalized 0-1)
COCO format: x y width height (absolute coordinates)
VTSaR is a single-class dataset for person detection (class 0 = person).
"""
import json
import os
import argparse
from pathlib import Path
from PIL import Image
from typing import Dict, List, Tuple
from datetime import datetime
def yolo_to_coco_bbox(yolo_bbox: List[float], img_width: int, img_height: int) -> List[float]:
"""
Convert YOLO bbox format to COCO format.
Args:
yolo_bbox: [center_x, center_y, width, height] (normalized 0-1)
img_width: Image width in pixels
img_height: Image height in pixels
Returns:
[x, y, width, height] in absolute coordinates for COCO format
"""
center_x, center_y, width, height = yolo_bbox
# Convert normalized coordinates to absolute
abs_center_x = center_x * img_width
abs_center_y = center_y * img_height
abs_width = width * img_width
abs_height = height * img_height
# Convert center coordinates to top-left corner
x = abs_center_x - abs_width / 2
y = abs_center_y - abs_height / 2
# Ensure coordinates are within image bounds
x = max(0, min(x, img_width - 1))
y = max(0, min(y, img_height - 1))
abs_width = min(abs_width, img_width - x)
abs_height = min(abs_height, img_height - y)
return [x, y, abs_width, abs_height]
def parse_yolo_annotation(label_file: str) -> List[List[float]]:
"""
Parse YOLO annotation file.
Args:
label_file: Path to .txt annotation file
Returns:
List of [class_id, center_x, center_y, width, height] for each detection
"""
annotations = []
if not os.path.exists(label_file):
return annotations
with open(label_file, 'r') as f:
content = f.read().strip()
if not content:
return annotations
lines = content.split('\n')
for line_num, line in enumerate(lines):
line = line.strip()
if not line:
continue
try:
# Parse space-separated values
values = list(map(float, line.split()))
# Each detection has 5 values: class_id, center_x, center_y, width, height
if len(values) != 5:
print(f"Warning: Unexpected format in {label_file} line {line_num + 1}, expected 5 values, got {len(values)}: {line}")
continue
annotations.append(values)
except ValueError as e:
print(f"Warning: Could not parse line {line_num + 1} in {label_file}: {line} - {e}")
continue
return annotations
def convert_split_to_coco(
dataset_root: str,
split_name: str,
modality: str,
category_mapping: Dict[int, Dict],
start_img_id: int = 1,
start_ann_id: int = 1
) -> Tuple[List[Dict], List[Dict], int, int]:
"""
Convert a VTSaR dataset split and modality to COCO format components.
Args:
dataset_root: Root directory of VTSaR dataset
split_name: Name of the split (train/val)
modality: Modality to use ('rgb' or 'ir')
category_mapping: Mapping of class_id to category info
start_img_id: Starting image ID for this split
start_ann_id: Starting annotation ID for this split
Returns:
images_list, annotations_list, next_img_id, next_ann_id
"""
dataset_root = Path(dataset_root)
images_dir = dataset_root / modality / split_name
labels_dir = dataset_root / "labels" / split_name
if not images_dir.exists() or not labels_dir.exists():
print(f"Warning: Missing images or labels directory for {split_name} {modality}")
return [], [], start_img_id, start_ann_id
images_list = []
annotations_list = []
img_id = start_img_id
ann_id = start_ann_id
# Get all image files
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
image_files = []
for ext in image_extensions:
image_files.extend(images_dir.glob(f"*{ext}"))
image_files.extend(images_dir.glob(f"*{ext.upper()}"))
image_files = sorted(image_files)
print(f"Processing {len(image_files)} images in {split_name} split ({modality} modality)...")
for img_file in image_files:
# Get corresponding label file
label_file = labels_dir / f"{img_file.stem}.txt"
# Open image to get dimensions
try:
with Image.open(img_file) as img:
img_width, img_height = img.size
except Exception as e:
print(f"Error opening image {img_file}: {e}")
continue
# Add image info with modality prefix to avoid filename conflicts
relative_path = f"{split_name}_{modality}_images/{img_file.name}"
images_list.append({
"id": img_id,
"file_name": relative_path,
"width": img_width,
"height": img_height,
"license": 1,
"modality": modality # Add modality info
})
# Parse annotations
yolo_annotations = parse_yolo_annotation(str(label_file))
for yolo_ann in yolo_annotations:
class_id, center_x, center_y, width, height = yolo_ann
class_id = int(class_id)
# Skip unknown classes
if class_id not in category_mapping:
continue
# Convert bbox to COCO format
coco_bbox = yolo_to_coco_bbox([center_x, center_y, width, height], img_width, img_height)
# Calculate area
area = coco_bbox[2] * coco_bbox[3]
if area > 0: # Only add valid annotations
annotations_list.append({
"id": ann_id,
"image_id": img_id,
"category_id": category_mapping[class_id]["id"],
"bbox": coco_bbox,
"area": area,
"iscrowd": 0,
"segmentation": [],
"modality": modality # Add modality info
})
ann_id += 1
img_id += 1
return images_list, annotations_list, img_id, ann_id
def main():
parser = argparse.ArgumentParser(description="Convert VTSaR dataset to COCO format")
parser.add_argument(
"--dataset_root",
type=str,
default="/mnt/archive/person_drone/VTSaR/VTSaR_Crop_640",
help="Path to VTSaR dataset root directory"
)
parser.add_argument(
"--output_dir",
type=str,
default="/home/svakhreev/projects/DEIM/data/vtsar_coco",
help="Output directory for COCO format files"
)
parser.add_argument(
"--modalities",
type=str,
nargs='+',
default=["rgb", "ir"],
choices=["rgb", "ir"],
help="Modalities to convert (default: both rgb and ir)"
)
args = parser.parse_args()
dataset_root = Path(args.dataset_root)
output_dir = Path(args.output_dir)
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)
# Create category mapping for VTSaR (single class: person)
category_mapping = {
0: {
"id": 1, # COCO categories start from 1
"name": "person",
"supercategory": "person"
}
}
print(f"Categories: {category_mapping}")
print(f"Converting modalities: {args.modalities}")
# Initialize combined COCO data structure
coco_data = {
"info": {
"year": 2024,
"version": "1.0",
"description": f"VTSaR Dataset - Combined train/val splits (RGB+IR modalities) in COCO format",
"contributor": "VTSaR Dataset",
"url": "",
"date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
},
"licenses": [
{
"id": 1,
"name": "Academic Use",
"url": ""
}
],
"categories": list(category_mapping.values()),
"images": [],
"annotations": []
}
# Process each split and modality combination
splits = ["train", "val"]
img_id = 1
ann_id = 1
for split in splits:
print(f"\nConverting {split} split...")
for modality in args.modalities:
split_images_dir = dataset_root / modality / split
if not split_images_dir.exists():
print(f"Warning: {split_images_dir} does not exist, skipping {split} {modality}")
continue
images_list, annotations_list, next_img_id, next_ann_id = convert_split_to_coco(
str(dataset_root),
split,
modality,
category_mapping,
img_id,
ann_id
)
# Add to combined dataset
coco_data["images"].extend(images_list)
coco_data["annotations"].extend(annotations_list)
# Update IDs for next modality/split
img_id = next_img_id
ann_id = next_ann_id
print(f"Added {len(images_list)} images and {len(annotations_list)} annotations from {split} {modality}")
# Save unified COCO annotation file
output_file = output_dir / "annotations.json"
with open(output_file, 'w') as f:
json.dump(coco_data, f, indent=2)
print(f"\nConversion complete!")
print(f"Total: {len(coco_data['images'])} images and {len(coco_data['annotations'])} annotations")
print(f"Saved unified COCO format to {output_file}")
# Create symlinks to original image directories for easy access
for split in splits:
for modality in args.modalities:
original_images_dir = dataset_root / modality / split
symlink_dir = output_dir / f"{split}_{modality}_images"
if original_images_dir.exists() and not symlink_dir.exists():
try:
symlink_dir.symlink_to(original_images_dir.resolve())
print(f"Created symlink: {symlink_dir} -> {original_images_dir}")
except Exception as e:
print(f"Warning: Could not create symlink for {split} {modality}: {e}")
# Save dataset information for reference
info_file = output_dir / "dataset_info.json"
with open(info_file, 'w') as f:
json.dump({
'modalities': args.modalities,
'splits_processed': [(split, modality) for split in splits for modality in args.modalities
if (dataset_root / modality / split).exists()],
'total_images': len(coco_data['images']),
'total_annotations': len(coco_data['annotations']),
'categories': category_mapping,
'args': vars(args)
}, f, indent=2)
print(f"Dataset info saved to {info_file}")
if __name__ == "__main__":
main()