VeuReu commited on
Commit
3dd54f8
verified
1 Parent(s): 6204f0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -1
app.py CHANGED
@@ -199,6 +199,61 @@ def _infer_one(
199
 
200
  return processor.decode(out[0], skip_special_tokens=True).strip()
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  @spaces.GPU
203
  def _get_face_embedding(
204
  image: Image.Image
@@ -314,6 +369,28 @@ def _get_scenes_extraction(
314
  "end": end_time.get_seconds()
315
  })
316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  cap.release()
318
  return images, scene_info
319
 
@@ -595,12 +672,13 @@ def _extract_keyframes_every_second(
595
 
596
  # Resize cropped frame back to original resolution
597
  cropped = cv2.resize(cropped, (w, h))
 
598
 
599
  timestamp = frame_number / fps # Timestamp of the extracted frame
600
 
601
  # Save temporary image for debugging (not returned)
602
  tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
603
- cv2.imwrite(str(tmp_path), cropped)
604
 
605
  # Append extracted frame and metadata
606
  images.append(cropped)
@@ -706,6 +784,25 @@ def describe_batch(
706
  temperature=temperature, context=context))
707
  return outputs
708
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
 
710
  def face_image_embedding(image: Image.Image) -> List[float] | None:
711
  """
@@ -963,6 +1060,32 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU", css=custom_css,theme=gr.
963
  )
964
  gr.Markdown("---")
965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966
  # ---------------------
967
  # Section: Facial embeddings
968
  # ---------------------
 
199
 
200
  return processor.decode(out[0], skip_special_tokens=True).strip()
201
 
202
+ @spaces.GPU
203
+ def _get_face_embedding_casting(image: Image.Image) -> list[dict] | None:
204
+ """
205
+ Returns list of dicts:
206
+ [
207
+ {
208
+ "embedding": <list[float]>,
209
+ "face_crop": <PIL.Image>
210
+ },
211
+ ...
212
+ ]
213
+ """
214
+ try:
215
+ mtcnn, facenet = _load_face_models()
216
+ boxes, probs = mtcnn.detect(image)
217
+
218
+ if boxes is None:
219
+ return []
220
+
221
+ resultados = []
222
+ device = DEVICE if DEVICE == "cuda" and torch.cuda.is_available() else "cpu"
223
+
224
+ for box in boxes:
225
+ x1, y1, x2, y2 = map(int, box)
226
+ face_crop = image.crop((x1, y1, x2, y2))
227
+
228
+ face_tensor = mtcnn(face_crop)
229
+ if face_tensor is None:
230
+ continue
231
+
232
+ face_tensor = face_tensor.unsqueeze(0).to(device)
233
+
234
+ with torch.no_grad():
235
+ emb = facenet(face_tensor).cpu().numpy()[0]
236
+
237
+ emb = emb / np.linalg.norm(emb)
238
+
239
+ resultados.append({
240
+ "embedding": emb.astype(float).tolist(),
241
+ "face_crop": face_crop
242
+ })
243
+
244
+ del mtcnn
245
+ del facenet
246
+
247
+ if torch.cuda.is_available():
248
+ torch.cuda.empty_cache()
249
+ torch.cuda.ipc_collect()
250
+
251
+ return resultados
252
+
253
+ except Exception as e:
254
+ print(f"Face embedding failed: {e}")
255
+ return None
256
+
257
  @spaces.GPU
258
  def _get_face_embedding(
259
  image: Image.Image
 
369
  "end": end_time.get_seconds()
370
  })
371
 
372
+ if len(scene_info) == 0:
373
+ cap.set(cv2.CAP_PROP_POS_FRAMES, offset_frames)
374
+ ret, frame = cap.read()
375
+ if ret:
376
+ h, w = frame.shape[:2]
377
+
378
+ ch, cw = int(h * crop_ratio), int(w * crop_ratio)
379
+ cropped_frame = frame[ch:h-ch, cw:w-cw]
380
+
381
+ img_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
382
+ images.append(Image.fromarray(img_rgb))
383
+
384
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
385
+ fps = cap.get(cv2.CAP_PROP_FPS)
386
+ duration_seconds = total_frames / fps if fps > 0 else 0.0
387
+
388
+ scene_info.append({
389
+ "index": 1,
390
+ "start": 0.0,
391
+ "end": duration_seconds
392
+ })
393
+
394
  cap.release()
395
  return images, scene_info
396
 
 
672
 
673
  # Resize cropped frame back to original resolution
674
  cropped = cv2.resize(cropped, (w, h))
675
+ cropped_rgb = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)
676
 
677
  timestamp = frame_number / fps # Timestamp of the extracted frame
678
 
679
  # Save temporary image for debugging (not returned)
680
  tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
681
+ cv2.imwrite(str(tmp_path), cv2.cvtColor(cropped_rgb, cv2.COLOR_RGB2BGR))
682
 
683
  # Append extracted frame and metadata
684
  images.append(cropped)
 
784
  temperature=temperature, context=context))
785
  return outputs
786
 
787
+ def face_image_embedding_casting(image):
788
+ results = _get_face_embedding_casting(image)
789
+
790
+ if not results:
791
+ return [], []
792
+
793
+ # 1) Lista de im谩genes recortadas
794
+ face_crops = [r["face_crop"] for r in results]
795
+
796
+ # 2) Lista de embeddings (convertibles a JSON)
797
+ face_embeddings = [
798
+ {
799
+ "index": i,
800
+ "embedding": r["embedding"]
801
+ }
802
+ for i, r in enumerate(results)
803
+ ]
804
+
805
+ return face_crops, face_embeddings
806
 
807
  def face_image_embedding(image: Image.Image) -> List[float] | None:
808
  """
 
1060
  )
1061
  gr.Markdown("---")
1062
 
1063
+ # ---------------------
1064
+ # Section: Facial embeddings casting
1065
+ # ---------------------
1066
+
1067
+ gr.Markdown('<h2 style="text-align:center">Embeddings facials casting</h2>')
1068
+
1069
+ with gr.Row():
1070
+ face_img = gr.Image(label="Imatge per embedding facial", type="pil")
1071
+
1072
+ with gr.Row():
1073
+ face_btn = gr.Button("Obt茅 embedding facial", variant="primary")
1074
+
1075
+ with gr.Row():
1076
+ face_crops = gr.Gallery(label="Cares detectades", columns=3, height="auto")
1077
+
1078
+ with gr.Row():
1079
+ face_embeddings = gr.JSON(label="Vectors d'embedding")
1080
+
1081
+ face_btn.click(
1082
+ face_image_embedding_casting, # tu funci贸n
1083
+ [face_img],
1084
+ [face_crops, face_embeddings], # ahora 2 outputs
1085
+ api_name="face_image_embedding_casting",
1086
+ concurrency_limit=1
1087
+ )
1088
+
1089
  # ---------------------
1090
  # Section: Facial embeddings
1091
  # ---------------------