Spaces:

justinkay
/

coda

Running

App Files Files Community

justinkay commited on Sep 30

Commit

c72fcf7

1 Parent(s): 3c26f17

Update hf zeroshot

Browse files

Files changed (1) hide show

hf_zeroshot.py +47 -32

hf_zeroshot.py CHANGED Viewed

@@ -71,51 +71,66 @@ def load_demo_annotations():
     return image_metadata
-def run_bioclip_inference(image_paths, class_names):
-    """Run zero-shot inference using BioCLIP with pybioclip."""
-    try:
-        from bioclip import CustomLabelsClassifier
-        print("Loading BioCLIP model...")
-        # Create classifier with custom labels
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        classifier = CustomLabelsClassifier(
-            cls_ary=class_names,
-            device=device
-        )
         results = {}
-        for i, image_path in enumerate(image_paths):
-            if i % 10 == 0:
-                print(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
-            try:
-                predictions = classifier.predict(image_path, k=len(class_names))
-                scores = {}
-                for class_name in class_names:
-                    scores[class_name] = 0.0
-                # Fill in the predictions - predictions is a list of dicts with format:
-                # [{'file_name': '...', 'classification': 'Ocelot', 'score': 0.999}, ...]
-                for pred in predictions:
-                    class_name = pred['classification']
-                    score = pred['score']
-                    if class_name in scores:
-                        scores[class_name] = score
-                results[os.path.basename(image_path)] = scores
-            except Exception as e:
-                print(f"Error processing {image_path}: {e}")
-                # Fill with uniform probabilities if processing fails
-                uniform_prob = 1.0 / len(class_names)
-                results[os.path.basename(image_path)] = {class_name: uniform_prob for class_name in class_names}
         return results
     except Exception as e:
         print(f"Error loading BioCLIP: {e}")
         return None
 def run_openclip_inference(model_name, image_paths, class_names):
@@ -317,7 +332,7 @@ def main():
         # Handle different models with appropriate methods
         if model_name in ["imageomics/bioclip", "imageomics/bioclip-2"]:
-            results = run_bioclip_inference(image_paths, CLASS_NAMES)
         elif model_name == "google/siglip2-so400m-patch16-naflex":
             results = run_siglip_inference(image_paths, CLASS_NAMES)
         elif model_name in ["facebook/PE-Core-L14-336", "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"]:

     return image_metadata
+def run_bioclip_inference(model_name, image_paths, class_names):
+    """Run zero-shot inference using BioCLIP via OpenCLIP."""
+    if not OPEN_CLIP_AVAILABLE:
+        print("open_clip is not available. Please install it with: pip install open_clip_torch")
+        return None
+    print(f"Loading BioCLIP model: {model_name}")
+    try:
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Load model using OpenCLIP with hf-hub prefix
+        model, _, preprocess = open_clip.create_model_and_transforms(f'hf-hub:{model_name}')
+        model = model.to(device)
+        model.eval()
+        tokenizer = open_clip.get_tokenizer(f'hf-hub:{model_name}')
+        # Prepare text prompts
+        prompts = [f"a photo of a {class_name.lower()}" for class_name in class_names]
+        text_tokens = tokenizer(prompts).to(device)
         results = {}
+        with torch.no_grad():
+            # Encode text once
+            text_features = model.encode_text(text_tokens)
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            for i, image_path in enumerate(image_paths):
+                if i % 10 == 0:
+                    print(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
+                try:
+                    image = Image.open(image_path).convert("RGB")
+                    image_tensor = preprocess(image).unsqueeze(0).to(device)
+                    # Encode image
+                    image_features = model.encode_image(image_tensor)
+                    image_features /= image_features.norm(dim=-1, keepdim=True)
+                    # Calculate similarity and convert to probabilities
+                    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+                    probabilities = similarity.squeeze(0).cpu().numpy()
+                    scores = {}
+                    for j, class_name in enumerate(class_names):
+                        scores[class_name] = float(probabilities[j])
+                    results[os.path.basename(image_path)] = scores
+                except Exception as e:
+                    print(f"Error processing {image_path}: {e}")
+                    uniform_prob = 1.0 / len(class_names)
+                    results[os.path.basename(image_path)] = {class_name: uniform_prob for class_name in class_names}
         return results
     except Exception as e:
         print(f"Error loading BioCLIP: {e}")
+        import traceback
+        traceback.print_exc()
         return None
 def run_openclip_inference(model_name, image_paths, class_names):
         # Handle different models with appropriate methods
         if model_name in ["imageomics/bioclip", "imageomics/bioclip-2"]:
+            results = run_bioclip_inference(model_name, image_paths, CLASS_NAMES)
         elif model_name == "google/siglip2-so400m-patch16-naflex":
             results = run_siglip_inference(image_paths, CLASS_NAMES)
         elif model_name in ["facebook/PE-Core-L14-336", "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"]: