Spaces:

justinkay
/

coda

Sleeping

File size: 4,430 Bytes

bdc0687

#!/usr/bin/env python3
"""
Compute top-1 accuracy for each model by comparing predictions with ground truth.
"""

import json
import os
from collections import OrderedDict

# Species mapping from demo/app.py
SPECIES_MAP = OrderedDict([
    (24, "Jaguar"),           # panthera onca
    (10, "Ocelot"),           # leopardus pardalis
    (6, "Mountain Lion"),     # puma concolor
    (101, "Common Eland"),    # tragelaphus oryx
    (102, "Waterbuck"),       # kobus ellipsiprymnus
])

def load_ground_truth():
    """Load ground truth labels from annotations."""
    with open('iwildcam_demo_annotations.json', 'r') as f:
        data = json.load(f)

    # Create mapping from filename to true label
    ground_truth = {}
    for annotation in data['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        image_info = next((img for img in data['images'] if img['id'] == image_id), None)
        if image_info:
            filename = image_info['file_name']
            true_label = SPECIES_MAP.get(category_id, "Unknown")
            if true_label != "Unknown":
                ground_truth[filename] = true_label

    return ground_truth

def compute_accuracy(results_file, ground_truth):
    """Compute top-1 accuracy for a model's results."""
    with open(results_file, 'r') as f:
        data = json.load(f)

    model_name = data['model']
    results = data['results']

    correct = 0
    total = 0

    for filename, scores in results.items():
        if filename in ground_truth:
            # Get predicted class (highest score)
            predicted_class = max(scores, key=scores.get)
            true_class = ground_truth[filename]

            if predicted_class == true_class:
                correct += 1
            total += 1

    accuracy = correct / total if total > 0 else 0.0
    return accuracy, correct, total

def main():
    """Compute accuracy for all models."""
    print("Computing top-1 accuracy for each model...\n")

    # Load ground truth
    ground_truth = load_ground_truth()
    print(f"Loaded ground truth for {len(ground_truth)} images")

    # Find all results files
    results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')]

    if not results_files:
        print("No results files found!")
        return

    print(f"Found {len(results_files)} results files\n")

    # Compute accuracy for each model
    accuracies = {}
    for results_file in sorted(results_files):
        try:
            accuracy, correct, total = compute_accuracy(results_file, ground_truth)

            # Extract model name from filename
            model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/')

            accuracies[model_name] = {
                'accuracy': accuracy,
                'correct': correct,
                'total': total
            }

            print(f"{model_name}:")
            print(f"  Accuracy: {accuracy:.4f} ({correct}/{total})")
            print()

        except Exception as e:
            print(f"Error processing {results_file}: {e}")

    # Summary
    print("="*60)
    print("SUMMARY")
    print("="*60)

    # Sort by accuracy
    sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True)

    for i, (model_name, stats) in enumerate(sorted_models, 1):
        print(f"{i}. {model_name}: {stats['accuracy']:.4f}")

    # Show some example predictions vs ground truth
    print("\n" + "="*60)
    print("SAMPLE PREDICTIONS (first 10 images)")
    print("="*60)

    if results_files:
        # Use the first model's results to show examples
        with open(results_files[0], 'r') as f:
            data = json.load(f)

        results = data['results']
        count = 0

        for filename, scores in results.items():
            if filename in ground_truth and count < 10:
                predicted_class = max(scores, key=scores.get)
                true_class = ground_truth[filename]
                confidence = scores[predicted_class]

                status = "✓" if predicted_class == true_class else "✗"

                print(f"{filename}:")
                print(f"  True: {true_class}")
                print(f"  Pred: {predicted_class} ({confidence:.4f}) {status}")
                print()

                count += 1

if __name__ == "__main__":
    main()