Spaces:

VeuReu
/

svision

Running on Zero

App Files Files Community

VeuReu commited on 28 days ago

Commit

af1ccf1

verified ·

1 Parent(s): b928e73

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -30

app.py CHANGED Viewed

@@ -9,8 +9,6 @@ import torch
 from facenet_pytorch import MTCNN, InceptionResnetV1
 import numpy as np
 from PIL import Image
-import base64
-import tempfile
 from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
 import cv2
@@ -143,64 +141,41 @@ def scenes_extraction(video_file: str, threshold: float, offset_frames: int, cro
     # video_file es un str ya que aunque realmente el usuario subió un archivo desde la UI, Gradio lo guarda temporalmente como ruta
     try:
-        print("1")
-        video_bytes = base64.b64decode(video_file)
-        print("2")
-        # archivo temporal en /tmp
-        temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
-        temp_video.write(video_bytes)
-        temp_video.flush()
-        temp_video.close()
-        print("3")
-        video_path = temp_video.name
-        print("4")
         # Detectamos las escenas
-        video_manager = VideoManager([video_path])
-        print("5")
         scene_manager = SceneManager()
-        print("6")
         scene_manager.add_detector(ContentDetector(threshold=threshold))
-        print("7")
         video_manager.start()
-        print("8")
         scene_manager.detect_scenes(video_manager)
-        print("9")
         scene_list = scene_manager.get_scene_list()
         cap = cv2.VideoCapture(video_file)
-        print("10")
         images: List[Image.Image] = []
         informacion_escenas: List[Dict] = []
         for i, (start_time, end_time) in enumerate(scene_list):
-            print("11")
             frame_number = int(start_time.get_frames()) + offset_frames
-            print("12")
             cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
-            print("13")
             ret, frame = cap.read()
             if ret:
-                print("14")
                 h, w = frame.shape[:2]
                 # Ahora realizamos el recorte
-                print("15")
                 ch, cw = int(h * crop_ratio), int(w * crop_ratio)
                 frame = frame[ch:h-ch, cw:w-cw]
-                print("16")
                 # Guardamos la escena obtenida
                 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 img = Image.fromarray(frame_rgb)
-                print("17")
                 images.append(img)
-                print("18")
                 # Guardamos la información de la escena
                 informacion_escenas.append({
                     "index": i+1,
                     "start": start_time.get_seconds(),
                     "end": end_time.get_seconds()
                 })
-                print("19")
         cap.release()
         return images, informacion_escenas
@@ -256,7 +231,7 @@ with gr.Blocks(title="Salamandra Vision 7B · ZeroGPU") as demo:
     face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
     with gr.Row():
-        video_file = gr.Textbox(label="Texto/prompt", value="Base64 del video")
         threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Threshold")
         offset_frames = gr.Slider(0, 30, value=5, step=1, label="Offset frames")
         crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Crop ratio")

 from facenet_pytorch import MTCNN, InceptionResnetV1
 import numpy as np
 from PIL import Image
 from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
 import cv2
     # video_file es un str ya que aunque realmente el usuario subió un archivo desde la UI, Gradio lo guarda temporalmente como ruta
     try:
         # Detectamos las escenas
+        video_manager = VideoManager([video_file])
         scene_manager = SceneManager()
         scene_manager.add_detector(ContentDetector(threshold=threshold))
         video_manager.start()
         scene_manager.detect_scenes(video_manager)
         scene_list = scene_manager.get_scene_list()
         cap = cv2.VideoCapture(video_file)
         images: List[Image.Image] = []
         informacion_escenas: List[Dict] = []
         for i, (start_time, end_time) in enumerate(scene_list):
             frame_number = int(start_time.get_frames()) + offset_frames
             cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
             ret, frame = cap.read()
             if ret:
                 h, w = frame.shape[:2]
                 # Ahora realizamos el recorte
                 ch, cw = int(h * crop_ratio), int(w * crop_ratio)
                 frame = frame[ch:h-ch, cw:w-cw]
                 # Guardamos la escena obtenida
                 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 img = Image.fromarray(frame_rgb)
                 images.append(img)
                 # Guardamos la información de la escena
                 informacion_escenas.append({
                     "index": i+1,
                     "start": start_time.get_seconds(),
                     "end": end_time.get_seconds()
                 })
         cap.release()
         return images, informacion_escenas
     face_btn.click(face_image_embedding, [face_img], face_out, api_name="face_image_embedding", concurrency_limit=1)
     with gr.Row():
+        video_file = gr.Video(label="Sube un vídeo")
         threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Threshold")
         offset_frames = gr.Slider(0, 30, value=5, step=1, label="Offset frames")
         crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Crop ratio")