Spaces:

VeuReu
/

svision

Running on Zero

App Files Files Community

VeuReu commited on 14 days ago

Commit

3dd54f8

verified ·

1 Parent(s): 6204f0e

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -1

app.py CHANGED Viewed

@@ -199,6 +199,61 @@ def _infer_one(
     return processor.decode(out[0], skip_special_tokens=True).strip()
 @spaces.GPU
 def _get_face_embedding(
     image: Image.Image
@@ -314,6 +369,28 @@ def _get_scenes_extraction(
                 "end": end_time.get_seconds()
             })
         cap.release()
         return images, scene_info
@@ -595,12 +672,13 @@ def _extract_keyframes_every_second(
         # Resize cropped frame back to original resolution
         cropped = cv2.resize(cropped, (w, h))
         timestamp = frame_number / fps  # Timestamp of the extracted frame
         # Save temporary image for debugging (not returned)
         tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
-        cv2.imwrite(str(tmp_path), cropped)
         # Append extracted frame and metadata
         images.append(cropped)
@@ -706,6 +784,25 @@ def describe_batch(
                                   temperature=temperature, context=context))
     return outputs
 def face_image_embedding(image: Image.Image) -> List[float] | None:
     """
@@ -963,6 +1060,32 @@ with gr.Blocks(title="Salamandra Vision 7B · ZeroGPU", css=custom_css,theme=gr.
     )
     gr.Markdown("---")
     # ---------------------
     # Section: Facial embeddings
     # ---------------------

     return processor.decode(out[0], skip_special_tokens=True).strip()
+@spaces.GPU
+def _get_face_embedding_casting(image: Image.Image) -> list[dict] | None:
+    """
+    Returns list of dicts:
+    [
+        {
+            "embedding": <list[float]>,
+            "face_crop": <PIL.Image>
+        },
+        ...
+    ]
+    """
+    try:
+        mtcnn, facenet = _load_face_models()
+        boxes, probs = mtcnn.detect(image)
+        if boxes is None:
+            return []
+        resultados = []
+        device = DEVICE if DEVICE == "cuda" and torch.cuda.is_available() else "cpu"
+        for box in boxes:
+            x1, y1, x2, y2 = map(int, box)
+            face_crop = image.crop((x1, y1, x2, y2))
+            face_tensor = mtcnn(face_crop)
+            if face_tensor is None:
+                continue
+            face_tensor = face_tensor.unsqueeze(0).to(device)
+            with torch.no_grad():
+                emb = facenet(face_tensor).cpu().numpy()[0]
+            emb = emb / np.linalg.norm(emb)
+            resultados.append({
+                "embedding": emb.astype(float).tolist(),
+                "face_crop": face_crop
+            })
+        del mtcnn
+        del facenet
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+        return resultados
+    except Exception as e:
+        print(f"Face embedding failed: {e}")
+        return None
 @spaces.GPU
 def _get_face_embedding(
     image: Image.Image
                 "end": end_time.get_seconds()
             })
+        if len(scene_info) == 0:
+            cap.set(cv2.CAP_PROP_POS_FRAMES, offset_frames)
+            ret, frame = cap.read()
+            if ret:
+                h, w = frame.shape[:2]
+                ch, cw = int(h * crop_ratio), int(w * crop_ratio)
+                cropped_frame = frame[ch:h-ch, cw:w-cw]
+                img_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
+                images.append(Image.fromarray(img_rgb))
+                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                fps = cap.get(cv2.CAP_PROP_FPS)
+                duration_seconds = total_frames / fps if fps > 0 else 0.0
+                scene_info.append({
+                    "index": 1,
+                    "start": 0.0,
+                    "end": duration_seconds
+                })
         cap.release()
         return images, scene_info
         # Resize cropped frame back to original resolution
         cropped = cv2.resize(cropped, (w, h))
+        cropped_rgb = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)
         timestamp = frame_number / fps  # Timestamp of the extracted frame
         # Save temporary image for debugging (not returned)
         tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
+        cv2.imwrite(str(tmp_path), cv2.cvtColor(cropped_rgb, cv2.COLOR_RGB2BGR))
         # Append extracted frame and metadata
         images.append(cropped)
                                   temperature=temperature, context=context))
     return outputs
+def face_image_embedding_casting(image):
+    results = _get_face_embedding_casting(image)
+    if not results:
+        return [], []
+    # 1) Lista de imágenes recortadas
+    face_crops = [r["face_crop"] for r in results]
+    # 2) Lista de embeddings (convertibles a JSON)
+    face_embeddings = [
+        {
+            "index": i,
+            "embedding": r["embedding"]
+        }
+        for i, r in enumerate(results)
+    ]
+    return face_crops, face_embeddings
 def face_image_embedding(image: Image.Image) -> List[float] | None:
     """
     )
     gr.Markdown("---")
+    # ---------------------
+    # Section: Facial embeddings casting
+    # ---------------------
+    gr.Markdown('<h2 style="text-align:center">Embeddings facials casting</h2>')
+    with gr.Row():
+        face_img = gr.Image(label="Imatge per embedding facial", type="pil")
+    with gr.Row():
+        face_btn = gr.Button("Obté embedding facial", variant="primary")
+    with gr.Row():
+        face_crops = gr.Gallery(label="Cares detectades", columns=3, height="auto")
+    with gr.Row():
+        face_embeddings = gr.JSON(label="Vectors d'embedding")
+    face_btn.click(
+        face_image_embedding_casting,           # tu función
+        [face_img],
+        [face_crops, face_embeddings],  # ahora 2 outputs
+        api_name="face_image_embedding_casting",
+        concurrency_limit=1
+    )
     # ---------------------
     # Section: Facial embeddings
     # ---------------------