VeuReu commited on
Commit
af1ccf1
verified
1 Parent(s): b928e73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -30
app.py CHANGED
@@ -9,8 +9,6 @@ import torch
9
  from facenet_pytorch import MTCNN, InceptionResnetV1
10
  import numpy as np
11
  from PIL import Image
12
- import base64
13
- import tempfile
14
  from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
15
 
16
  import cv2
@@ -143,64 +141,41 @@ def scenes_extraction(video_file: str, threshold: float, offset_frames: int, cro
143
  # video_file es un str ya que aunque realmente el usuario subi贸 un archivo desde la UI, Gradio lo guarda temporalmente como ruta
144
 
145
  try:
146
- print("1")
147
- video_bytes = base64.b64decode(video_file)
148
- print("2")
149
- # archivo temporal en /tmp
150
- temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
151
- temp_video.write(video_bytes)
152
- temp_video.flush()
153
- temp_video.close()
154
- print("3")
155
- video_path = temp_video.name
156
- print("4")
157
  # Detectamos las escenas
158
- video_manager = VideoManager([video_path])
159
- print("5")
160
  scene_manager = SceneManager()
161
- print("6")
162
  scene_manager.add_detector(ContentDetector(threshold=threshold))
163
- print("7")
164
  video_manager.start()
165
- print("8")
166
  scene_manager.detect_scenes(video_manager)
167
- print("9")
168
  scene_list = scene_manager.get_scene_list()
169
 
170
  cap = cv2.VideoCapture(video_file)
171
- print("10")
172
  images: List[Image.Image] = []
173
  informacion_escenas: List[Dict] = []
174
 
175
  for i, (start_time, end_time) in enumerate(scene_list):
176
- print("11")
177
  frame_number = int(start_time.get_frames()) + offset_frames
178
- print("12")
179
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
180
- print("13")
181
  ret, frame = cap.read()
182
  if ret:
183
- print("14")
184
  h, w = frame.shape[:2]
185
 
186
  # Ahora realizamos el recorte
187
- print("15")
188
  ch, cw = int(h * crop_ratio), int(w * crop_ratio)
189
  frame = frame[ch:h-ch, cw:w-cw]
190
- print("16")
191
  # Guardamos la escena obtenida
192
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
193
  img = Image.fromarray(frame_rgb)
194
- print("17")
195
  images.append(img)
196
- print("18")
197
  # Guardamos la informaci贸n de la escena
198
  informacion_escenas.append({
199
  "index": i+1,
200
  "start": start_time.get_seconds(),
201
  "end": end_time.get_seconds()
202
  })
203
- print("19")
204
  cap.release()
205
  return images, informacion_escenas
206
 
@@ -256,7 +231,7 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU") as demo:
256
  face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
257
 
258
  with gr.Row():
259
- video_file = gr.Textbox(label="Texto/prompt", value="Base64 del video")
260
  threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Threshold")
261
  offset_frames = gr.Slider(0, 30, value=5, step=1, label="Offset frames")
262
  crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Crop ratio")
 
9
  from facenet_pytorch import MTCNN, InceptionResnetV1
10
  import numpy as np
11
  from PIL import Image
 
 
12
  from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
13
 
14
  import cv2
 
141
  # video_file es un str ya que aunque realmente el usuario subi贸 un archivo desde la UI, Gradio lo guarda temporalmente como ruta
142
 
143
  try:
 
 
 
 
 
 
 
 
 
 
 
144
  # Detectamos las escenas
145
+ video_manager = VideoManager([video_file])
 
146
  scene_manager = SceneManager()
 
147
  scene_manager.add_detector(ContentDetector(threshold=threshold))
 
148
  video_manager.start()
 
149
  scene_manager.detect_scenes(video_manager)
 
150
  scene_list = scene_manager.get_scene_list()
151
 
152
  cap = cv2.VideoCapture(video_file)
 
153
  images: List[Image.Image] = []
154
  informacion_escenas: List[Dict] = []
155
 
156
  for i, (start_time, end_time) in enumerate(scene_list):
 
157
  frame_number = int(start_time.get_frames()) + offset_frames
 
158
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
 
159
  ret, frame = cap.read()
160
  if ret:
 
161
  h, w = frame.shape[:2]
162
 
163
  # Ahora realizamos el recorte
 
164
  ch, cw = int(h * crop_ratio), int(w * crop_ratio)
165
  frame = frame[ch:h-ch, cw:w-cw]
166
+
167
  # Guardamos la escena obtenida
168
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
169
  img = Image.fromarray(frame_rgb)
 
170
  images.append(img)
171
+
172
  # Guardamos la informaci贸n de la escena
173
  informacion_escenas.append({
174
  "index": i+1,
175
  "start": start_time.get_seconds(),
176
  "end": end_time.get_seconds()
177
  })
178
+
179
  cap.release()
180
  return images, informacion_escenas
181
 
 
231
  face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
232
 
233
  with gr.Row():
234
+ video_file = gr.Video(label="Sube un v铆deo")
235
  threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Threshold")
236
  offset_frames = gr.Slider(0, 30, value=5, step=1, label="Offset frames")
237
  crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Crop ratio")