File size: 4,430 Bytes
bdc0687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
"""
Compute top-1 accuracy for each model by comparing predictions with ground truth.
"""

import json
import os
from collections import OrderedDict

# Species mapping from demo/app.py
SPECIES_MAP = OrderedDict([
    (24, "Jaguar"),           # panthera onca
    (10, "Ocelot"),           # leopardus pardalis
    (6, "Mountain Lion"),     # puma concolor
    (101, "Common Eland"),    # tragelaphus oryx
    (102, "Waterbuck"),       # kobus ellipsiprymnus
])

def load_ground_truth():
    """Load ground truth labels from annotations."""
    with open('iwildcam_demo_annotations.json', 'r') as f:
        data = json.load(f)

    # Create mapping from filename to true label
    ground_truth = {}
    for annotation in data['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        image_info = next((img for img in data['images'] if img['id'] == image_id), None)
        if image_info:
            filename = image_info['file_name']
            true_label = SPECIES_MAP.get(category_id, "Unknown")
            if true_label != "Unknown":
                ground_truth[filename] = true_label

    return ground_truth

def compute_accuracy(results_file, ground_truth):
    """Compute top-1 accuracy for a model's results."""
    with open(results_file, 'r') as f:
        data = json.load(f)

    model_name = data['model']
    results = data['results']

    correct = 0
    total = 0

    for filename, scores in results.items():
        if filename in ground_truth:
            # Get predicted class (highest score)
            predicted_class = max(scores, key=scores.get)
            true_class = ground_truth[filename]

            if predicted_class == true_class:
                correct += 1
            total += 1

    accuracy = correct / total if total > 0 else 0.0
    return accuracy, correct, total

def main():
    """Compute accuracy for all models."""
    print("Computing top-1 accuracy for each model...\n")

    # Load ground truth
    ground_truth = load_ground_truth()
    print(f"Loaded ground truth for {len(ground_truth)} images")

    # Find all results files
    results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')]

    if not results_files:
        print("No results files found!")
        return

    print(f"Found {len(results_files)} results files\n")

    # Compute accuracy for each model
    accuracies = {}
    for results_file in sorted(results_files):
        try:
            accuracy, correct, total = compute_accuracy(results_file, ground_truth)

            # Extract model name from filename
            model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/')

            accuracies[model_name] = {
                'accuracy': accuracy,
                'correct': correct,
                'total': total
            }

            print(f"{model_name}:")
            print(f"  Accuracy: {accuracy:.4f} ({correct}/{total})")
            print()

        except Exception as e:
            print(f"Error processing {results_file}: {e}")

    # Summary
    print("="*60)
    print("SUMMARY")
    print("="*60)

    # Sort by accuracy
    sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True)

    for i, (model_name, stats) in enumerate(sorted_models, 1):
        print(f"{i}. {model_name}: {stats['accuracy']:.4f}")

    # Show some example predictions vs ground truth
    print("\n" + "="*60)
    print("SAMPLE PREDICTIONS (first 10 images)")
    print("="*60)

    if results_files:
        # Use the first model's results to show examples
        with open(results_files[0], 'r') as f:
            data = json.load(f)

        results = data['results']
        count = 0

        for filename, scores in results.items():
            if filename in ground_truth and count < 10:
                predicted_class = max(scores, key=scores.get)
                true_class = ground_truth[filename]
                confidence = scores[predicted_class]

                status = "βœ“" if predicted_class == true_class else "βœ—"

                print(f"{filename}:")
                print(f"  True: {true_class}")
                print(f"  Pred: {predicted_class} ({confidence:.4f}) {status}")
                print()

                count += 1

if __name__ == "__main__":
    main()