Spaces:

VeuReu
/

svision

Running on Zero

App Files Files Community

VeuReu commited on 22 days ago

Commit

6288dfd

verified ·

1 Parent(s): e3b4a4b

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -0

app.py CHANGED Viewed

@@ -30,9 +30,11 @@ import json
 import os
 import re
 from typing import Any, Dict, List, Optional, Tuple, Union
 # Third-party libraries
 import cv2
 import gradio as gr
 import numpy as np
 import spaces
@@ -518,6 +520,88 @@ def _get_ocr_characters_to_image(
     return informacion_image_completo
 """
 # ==============================================================================
 # API Helpers
@@ -715,6 +799,50 @@ def add_ocr_characters_to_image(
     return _get_ocr_characters_to_image(image,informacion_image,face_col)
 """
 # ==============================================================================
 # UI & Endpoints
@@ -840,6 +968,7 @@ with gr.Blocks(title="Salamandra Vision 7B · ZeroGPU", css=custom_css,theme=gr.
     gr.Markdown('<h2 style="text-align:center">Extracció d’escenes de vídeo</h2>')
     with gr.Row():
         video_file = gr.Video(label="Puja un vídeo")
         threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
         offset_frames = gr.Slider(0, 30, value=5, step=1, label="Desplaçament de frames")
         crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Raó de retall")
@@ -858,6 +987,29 @@ with gr.Blocks(title="Salamandra Vision 7B · ZeroGPU", css=custom_css,theme=gr.
     )
     gr.Markdown("---")
     # ---------------------
     # Section: Batch description with Salamandra Vision
     # ---------------------

 import os
 import re
 from typing import Any, Dict, List, Optional, Tuple, Union
+from pathlib import Path
 # Third-party libraries
 import cv2
+import tempfile
 import gradio as gr
 import numpy as np
 import spaces
     return informacion_image_completo
+@spaces.GPU
+def _extract_keyframes_every_second(
+    video: str,
+    crop_ratio: float = 0.1
+) -> Tuple[List[np.ndarray], List[dict]]:
+    """
+    Extracts one keyframe per second from a video file.
+    Parameters
+    ----------
+    video : str
+        Path to the input video file.
+    crop_ratio : float, optional
+        Percentage of the frame to crop from each border before resizing
+        back to the original dimensions. Default is 0.1 (10%).
+    Returns
+    -------
+    images : List[np.ndarray]
+        List of extracted frames as NumPy arrays.
+    frames_info : List[dict]
+        List of metadata dictionaries for each extracted frame. Each dictionary contains:
+            - "index": sequential index starting from 1
+            - "start": starting second of the interval represented by the frame
+            - "end": ending second of the interval represented by the frame
+    Notes
+    -----
+    A temporary directory is automatically created to store intermediate
+    images. These images are not returned but can be useful for debugging.
+    The directory is cleaned up after the function finishes.
+    """
+    # Temporary directory for storing intermediate images (auto-cleaned afterwards)
+    tmp_dir = Path(tempfile.mkdtemp())
+    # Open the video capture
+    cap = cv2.VideoCapture(str(video))
+    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration = total_frames / fps
+    images = []
+    frames_info = []
+    # Loop through the video extracting one frame per second
+    for sec in range(int(duration)):
+        frame_number = int(sec * fps)
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Crop the frame by the given ratio on all borders
+        h, w = frame.shape[:2]
+        ch, cw = int(h * crop_ratio), int(w * crop_ratio)
+        cropped = frame[ch:h-ch, cw:w-cw]
+        # Resize cropped frame back to original resolution
+        cropped = cv2.resize(cropped, (w, h))
+        timestamp = frame_number / fps  # Timestamp of the extracted frame
+        # Save temporary image for debugging (not returned)
+        tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
+        cv2.imwrite(str(tmp_path), cropped)
+        # Append extracted frame and metadata
+        images.append(cropped)
+        frames_info.append({
+            "index": sec + 1,
+            "start": sec,
+            "end": sec + 1
+        })
+    # Release the video capture object
+    cap.release()
+    return images, frames_info
 """
 # ==============================================================================
 # API Helpers
     return _get_ocr_characters_to_image(image,informacion_image,face_col)
+def extract_keyframes_endpoint(
+    video_path: str,
+    crop_ratio: float = 0.1
+) -> Dict[str, Any]:
+    """
+    Endpoint wrapper for extracting one keyframe per second from a video.
+    This function serves as a wrapper around the internal
+    `_extract_keyframes_every_second` function. It receives a path to a
+    video file and an optional cropping ratio, and delegates the extraction
+    of frames to the internal function. The wrapped internal function
+    performs the following:
+    1. Loads the video and determines its duration and FPS.
+    2. Extracts exactly one frame per second of video playback.
+    3. Crops each frame by a proportional margin and resizes it back to the
+       original resolution.
+    4. Optionally stores intermediate images in a temporary directory for
+       debugging purposes.
+    5. Returns the frames as NumPy arrays along with structured metadata
+       describing the extracted intervals.
+    Parameters
+    ----------
+    video_path : str
+        Path to the input video file.
+    crop_ratio : float, optional
+        Percentage of the frame to crop from each border before resizing
+        (default is 0.1, equivalent to 10%).
+    Returns
+    -------
+    Dict[str, Any]
+        A dictionary containing:
+            - frames: list of extracted frames represented as NumPy arrays
+            - metadata: list of dictionaries with:
+                * index: sequential frame identifier
+                * start: starting timestamp of the 1-second interval
+                * end: ending timestamp of the interval
+    """
+    images, frames_info = _extract_keyframes_every_second(video_path, crop_ratio)
+    return images, frames_info
 """
 # ==============================================================================
 # UI & Endpoints
     gr.Markdown('<h2 style="text-align:center">Extracció d’escenes de vídeo</h2>')
     with gr.Row():
         video_file = gr.Video(label="Puja un vídeo")
+    with gr.Row():
         threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
         offset_frames = gr.Slider(0, 30, value=5, step=1, label="Desplaçament de frames")
         crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Raó de retall")
     )
     gr.Markdown("---")
+    # ---------------------
+    # Section: Video all frame extraction
+    # ---------------------
+    gr.Markdown('<h2 style="text-align:center">Extracció d’frames de vídeo</h2>')
+    with gr.Row():
+        video_file = gr.Video(label="Puja un vídeo")
+    with gr.Row():
+        scenes_btn = gr.Button("Extreu frames", variant="primary")
+    with gr.Row():
+        scenes_gallery_out = gr.Gallery(label="Fotogrames clau de l’escena", show_label=False, columns=4, height="auto")
+        scenes_info_out = gr.JSON(label="Informació de l’escena")
+    scenes_btn.click(
+        extract_keyframes_endpoint,
+        inputs=[video_file],
+        outputs=[scenes_gallery_out, scenes_info_out],
+        api_name="scenes_extraction",
+        concurrency_limit=1
+    )
+    gr.Markdown("---")
     # ---------------------
     # Section: Batch description with Salamandra Vision
     # ---------------------