VeuReu commited on
Commit
d7c7a67
·
verified ·
1 Parent(s): 8b6302b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -1
app.py CHANGED
@@ -1,7 +1,8 @@
1
  # app.py — veureu/svision (Salamandra Vision 7B · ZeroGPU) — compatible con ENGINE
2
  import os
3
  import json
4
- from typing import Dict, List, Optional, Tuple, Union
 
5
 
6
  import gradio as gr
7
  import spaces
@@ -11,6 +12,11 @@ import numpy as np
11
  from PIL import Image
12
  from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
13
 
 
 
 
 
 
14
  MODEL_ID = os.environ.get("MODEL_ID", "BSC-LT/salamandra-7b-vision")
15
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -131,6 +137,46 @@ def face_image_embedding(image: Image.Image) -> List[float] | None:
131
  print(f"Fallo embedding cara: {e}")
132
  return None
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # ----------------------------- UI & Endpoints --------------------------------
136
 
@@ -179,6 +225,15 @@ with gr.Blocks(title="Salamandra Vision 7B · ZeroGPU") as demo:
179
  face_out = gr.JSON(label="Embedding facial (vector)")
180
  face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
181
 
 
 
 
 
 
 
 
 
 
182
 
183
  demo.queue(max_size=16).launch()
184
 
 
1
  # app.py — veureu/svision (Salamandra Vision 7B · ZeroGPU) — compatible con ENGINE
2
  import os
3
  import json
4
+ from typing import Dict, List, Optional, Tuple, Union, Any
5
+ import io
6
 
7
  import gradio as gr
8
  import spaces
 
12
  from PIL import Image
13
  from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
14
 
15
+ import cv2
16
+ from scenedetect import VideoManager, SceneManager
17
+ from scenedetect.detectors import ContentDetector
18
+
19
+
20
  MODEL_ID = os.environ.get("MODEL_ID", "BSC-LT/salamandra-7b-vision")
21
  DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
22
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
137
  print(f"Fallo embedding cara: {e}")
138
  return None
139
 
140
+ @spaces.GPU
141
+ def scenes_extraction(video_file: io.IOBase, threshold: float, offset_frames: int, crop_ratio: float) -> Tuple[List[Image.Image], List[Dict]] | None:
142
+ # Detectamos las escenas
143
+ video_path = video_file.name
144
+ video_manager = VideoManager([video_path])
145
+ scene_manager = SceneManager()
146
+ scene_manager.add_detector(ContentDetector(threshold=threshold))
147
+ video_manager.start()
148
+ scene_manager.detect_scenes(video_manager)
149
+ scene_list = scene_manager.get_scene_list()
150
+
151
+ cap = cv2.VideoCapture(video_path)
152
+ images: List[Image.Image] = []
153
+ informacion_escenas: List[Dict] = []
154
+
155
+ for i, (start_time, end_time) in enumerate(scene_list):
156
+ frame_number = int(start_time.get_frames()) + offset_frames
157
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
158
+ ret, frame = cap.read()
159
+ if ret:
160
+ h, w = frame.shape[:2]
161
+
162
+ # Ahora realizamos el recorte
163
+ ch, cw = int(h * crop_ratio), int(w * crop_ratio)
164
+ frame = frame[ch:h-ch, cw:w-cw]
165
+
166
+ # Guardamos la escena obtenida
167
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
168
+ img = Image.fromarray(frame_rgb)
169
+ images.append(img)
170
+
171
+ # Guardamos la información de la escena
172
+ informacion_escenas.append({
173
+ "index": i+1,
174
+ "start": start_time.get_seconds(),
175
+ "end": end_time.get_seconds()
176
+ })
177
+
178
+ cap.release()
179
+ return images, informacion_escenas
180
 
181
  # ----------------------------- UI & Endpoints --------------------------------
182
 
 
225
  face_out = gr.JSON(label="Embedding facial (vector)")
226
  face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
227
 
228
+ with gr.Row():
229
+ video_file = gr.Video(label="Sube un vídeo", type="file")
230
+ threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Threshold")
231
+ offset_frames = gr.Slider(0, 30, value=5, step=1, label="Offset frames")
232
+ crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Crop ratio")
233
+ scenes_btn = gr.Button("Extraer escenas")
234
+ scenes_gallery_out = gr.Gallery(label="Keyframes de escenas", show_label=False, columns=4, height="auto")
235
+ scenes_info_out = gr.JSON(label="Información de escenas")
236
+ scenes_btn.click(scenes_extraction, inputs=[video_file, threshold, offset_frames, crop_ratio], outputs=[scenes_gallery_out, scenes_info_out], api_name="scenes_extraction", concurrency_limit=1)
237
 
238
  demo.queue(max_size=16).launch()
239