File size: 4,430 Bytes
bdc0687 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
#!/usr/bin/env python3
"""
Compute top-1 accuracy for each model by comparing predictions with ground truth.
"""
import json
import os
from collections import OrderedDict
# Species mapping from demo/app.py
SPECIES_MAP = OrderedDict([
(24, "Jaguar"), # panthera onca
(10, "Ocelot"), # leopardus pardalis
(6, "Mountain Lion"), # puma concolor
(101, "Common Eland"), # tragelaphus oryx
(102, "Waterbuck"), # kobus ellipsiprymnus
])
def load_ground_truth():
"""Load ground truth labels from annotations."""
with open('iwildcam_demo_annotations.json', 'r') as f:
data = json.load(f)
# Create mapping from filename to true label
ground_truth = {}
for annotation in data['annotations']:
image_id = annotation['image_id']
category_id = annotation['category_id']
image_info = next((img for img in data['images'] if img['id'] == image_id), None)
if image_info:
filename = image_info['file_name']
true_label = SPECIES_MAP.get(category_id, "Unknown")
if true_label != "Unknown":
ground_truth[filename] = true_label
return ground_truth
def compute_accuracy(results_file, ground_truth):
"""Compute top-1 accuracy for a model's results."""
with open(results_file, 'r') as f:
data = json.load(f)
model_name = data['model']
results = data['results']
correct = 0
total = 0
for filename, scores in results.items():
if filename in ground_truth:
# Get predicted class (highest score)
predicted_class = max(scores, key=scores.get)
true_class = ground_truth[filename]
if predicted_class == true_class:
correct += 1
total += 1
accuracy = correct / total if total > 0 else 0.0
return accuracy, correct, total
def main():
"""Compute accuracy for all models."""
print("Computing top-1 accuracy for each model...\n")
# Load ground truth
ground_truth = load_ground_truth()
print(f"Loaded ground truth for {len(ground_truth)} images")
# Find all results files
results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')]
if not results_files:
print("No results files found!")
return
print(f"Found {len(results_files)} results files\n")
# Compute accuracy for each model
accuracies = {}
for results_file in sorted(results_files):
try:
accuracy, correct, total = compute_accuracy(results_file, ground_truth)
# Extract model name from filename
model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/')
accuracies[model_name] = {
'accuracy': accuracy,
'correct': correct,
'total': total
}
print(f"{model_name}:")
print(f" Accuracy: {accuracy:.4f} ({correct}/{total})")
print()
except Exception as e:
print(f"Error processing {results_file}: {e}")
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
# Sort by accuracy
sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True)
for i, (model_name, stats) in enumerate(sorted_models, 1):
print(f"{i}. {model_name}: {stats['accuracy']:.4f}")
# Show some example predictions vs ground truth
print("\n" + "="*60)
print("SAMPLE PREDICTIONS (first 10 images)")
print("="*60)
if results_files:
# Use the first model's results to show examples
with open(results_files[0], 'r') as f:
data = json.load(f)
results = data['results']
count = 0
for filename, scores in results.items():
if filename in ground_truth and count < 10:
predicted_class = max(scores, key=scores.get)
true_class = ground_truth[filename]
confidence = scores[predicted_class]
status = "β" if predicted_class == true_class else "β"
print(f"{filename}:")
print(f" True: {true_class}")
print(f" Pred: {predicted_class} ({confidence:.4f}) {status}")
print()
count += 1
if __name__ == "__main__":
main() |