Update app.py
Browse files
app.py
CHANGED
|
@@ -30,9 +30,11 @@ import json
|
|
| 30 |
import os
|
| 31 |
import re
|
| 32 |
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
|
|
| 33 |
|
| 34 |
# Third-party libraries
|
| 35 |
import cv2
|
|
|
|
| 36 |
import gradio as gr
|
| 37 |
import numpy as np
|
| 38 |
import spaces
|
|
@@ -518,6 +520,88 @@ def _get_ocr_characters_to_image(
|
|
| 518 |
|
| 519 |
return informacion_image_completo
|
| 520 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
"""
|
| 522 |
# ==============================================================================
|
| 523 |
# API Helpers
|
|
@@ -715,6 +799,50 @@ def add_ocr_characters_to_image(
|
|
| 715 |
return _get_ocr_characters_to_image(image,informacion_image,face_col)
|
| 716 |
|
| 717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 718 |
"""
|
| 719 |
# ==============================================================================
|
| 720 |
# UI & Endpoints
|
|
@@ -840,6 +968,7 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU", css=custom_css,theme=gr.
|
|
| 840 |
gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檈scenes de v铆deo</h2>')
|
| 841 |
with gr.Row():
|
| 842 |
video_file = gr.Video(label="Puja un v铆deo")
|
|
|
|
| 843 |
threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
|
| 844 |
offset_frames = gr.Slider(0, 30, value=5, step=1, label="Despla莽ament de frames")
|
| 845 |
crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Ra贸 de retall")
|
|
@@ -858,6 +987,29 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU", css=custom_css,theme=gr.
|
|
| 858 |
)
|
| 859 |
gr.Markdown("---")
|
| 860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
# ---------------------
|
| 862 |
# Section: Batch description with Salamandra Vision
|
| 863 |
# ---------------------
|
|
|
|
| 30 |
import os
|
| 31 |
import re
|
| 32 |
from typing import Any, Dict, List, Optional, Tuple, Union
|
| 33 |
+
from pathlib import Path
|
| 34 |
|
| 35 |
# Third-party libraries
|
| 36 |
import cv2
|
| 37 |
+
import tempfile
|
| 38 |
import gradio as gr
|
| 39 |
import numpy as np
|
| 40 |
import spaces
|
|
|
|
| 520 |
|
| 521 |
return informacion_image_completo
|
| 522 |
|
| 523 |
+
|
| 524 |
+
@spaces.GPU
|
| 525 |
+
def _extract_keyframes_every_second(
|
| 526 |
+
video: str,
|
| 527 |
+
crop_ratio: float = 0.1
|
| 528 |
+
) -> Tuple[List[np.ndarray], List[dict]]:
|
| 529 |
+
"""
|
| 530 |
+
Extracts one keyframe per second from a video file.
|
| 531 |
+
|
| 532 |
+
Parameters
|
| 533 |
+
----------
|
| 534 |
+
video : str
|
| 535 |
+
Path to the input video file.
|
| 536 |
+
crop_ratio : float, optional
|
| 537 |
+
Percentage of the frame to crop from each border before resizing
|
| 538 |
+
back to the original dimensions. Default is 0.1 (10%).
|
| 539 |
+
|
| 540 |
+
Returns
|
| 541 |
+
-------
|
| 542 |
+
images : List[np.ndarray]
|
| 543 |
+
List of extracted frames as NumPy arrays.
|
| 544 |
+
frames_info : List[dict]
|
| 545 |
+
List of metadata dictionaries for each extracted frame. Each dictionary contains:
|
| 546 |
+
- "index": sequential index starting from 1
|
| 547 |
+
- "start": starting second of the interval represented by the frame
|
| 548 |
+
- "end": ending second of the interval represented by the frame
|
| 549 |
+
|
| 550 |
+
Notes
|
| 551 |
+
-----
|
| 552 |
+
A temporary directory is automatically created to store intermediate
|
| 553 |
+
images. These images are not returned but can be useful for debugging.
|
| 554 |
+
The directory is cleaned up after the function finishes.
|
| 555 |
+
"""
|
| 556 |
+
|
| 557 |
+
# Temporary directory for storing intermediate images (auto-cleaned afterwards)
|
| 558 |
+
tmp_dir = Path(tempfile.mkdtemp())
|
| 559 |
+
|
| 560 |
+
# Open the video capture
|
| 561 |
+
cap = cv2.VideoCapture(str(video))
|
| 562 |
+
fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 563 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 564 |
+
duration = total_frames / fps
|
| 565 |
+
|
| 566 |
+
images = []
|
| 567 |
+
frames_info = []
|
| 568 |
+
|
| 569 |
+
# Loop through the video extracting one frame per second
|
| 570 |
+
for sec in range(int(duration)):
|
| 571 |
+
frame_number = int(sec * fps)
|
| 572 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
| 573 |
+
ret, frame = cap.read()
|
| 574 |
+
if not ret:
|
| 575 |
+
break
|
| 576 |
+
|
| 577 |
+
# Crop the frame by the given ratio on all borders
|
| 578 |
+
h, w = frame.shape[:2]
|
| 579 |
+
ch, cw = int(h * crop_ratio), int(w * crop_ratio)
|
| 580 |
+
cropped = frame[ch:h-ch, cw:w-cw]
|
| 581 |
+
|
| 582 |
+
# Resize cropped frame back to original resolution
|
| 583 |
+
cropped = cv2.resize(cropped, (w, h))
|
| 584 |
+
|
| 585 |
+
timestamp = frame_number / fps # Timestamp of the extracted frame
|
| 586 |
+
|
| 587 |
+
# Save temporary image for debugging (not returned)
|
| 588 |
+
tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
|
| 589 |
+
cv2.imwrite(str(tmp_path), cropped)
|
| 590 |
+
|
| 591 |
+
# Append extracted frame and metadata
|
| 592 |
+
images.append(cropped)
|
| 593 |
+
frames_info.append({
|
| 594 |
+
"index": sec + 1,
|
| 595 |
+
"start": sec,
|
| 596 |
+
"end": sec + 1
|
| 597 |
+
})
|
| 598 |
+
|
| 599 |
+
# Release the video capture object
|
| 600 |
+
cap.release()
|
| 601 |
+
|
| 602 |
+
return images, frames_info
|
| 603 |
+
|
| 604 |
+
|
| 605 |
"""
|
| 606 |
# ==============================================================================
|
| 607 |
# API Helpers
|
|
|
|
| 799 |
return _get_ocr_characters_to_image(image,informacion_image,face_col)
|
| 800 |
|
| 801 |
|
| 802 |
+
def extract_keyframes_endpoint(
|
| 803 |
+
video_path: str,
|
| 804 |
+
crop_ratio: float = 0.1
|
| 805 |
+
) -> Dict[str, Any]:
|
| 806 |
+
"""
|
| 807 |
+
Endpoint wrapper for extracting one keyframe per second from a video.
|
| 808 |
+
|
| 809 |
+
This function serves as a wrapper around the internal
|
| 810 |
+
`_extract_keyframes_every_second` function. It receives a path to a
|
| 811 |
+
video file and an optional cropping ratio, and delegates the extraction
|
| 812 |
+
of frames to the internal function. The wrapped internal function
|
| 813 |
+
performs the following:
|
| 814 |
+
|
| 815 |
+
1. Loads the video and determines its duration and FPS.
|
| 816 |
+
2. Extracts exactly one frame per second of video playback.
|
| 817 |
+
3. Crops each frame by a proportional margin and resizes it back to the
|
| 818 |
+
original resolution.
|
| 819 |
+
4. Optionally stores intermediate images in a temporary directory for
|
| 820 |
+
debugging purposes.
|
| 821 |
+
5. Returns the frames as NumPy arrays along with structured metadata
|
| 822 |
+
describing the extracted intervals.
|
| 823 |
+
|
| 824 |
+
Parameters
|
| 825 |
+
----------
|
| 826 |
+
video_path : str
|
| 827 |
+
Path to the input video file.
|
| 828 |
+
crop_ratio : float, optional
|
| 829 |
+
Percentage of the frame to crop from each border before resizing
|
| 830 |
+
(default is 0.1, equivalent to 10%).
|
| 831 |
+
|
| 832 |
+
Returns
|
| 833 |
+
-------
|
| 834 |
+
Dict[str, Any]
|
| 835 |
+
A dictionary containing:
|
| 836 |
+
- frames: list of extracted frames represented as NumPy arrays
|
| 837 |
+
- metadata: list of dictionaries with:
|
| 838 |
+
* index: sequential frame identifier
|
| 839 |
+
* start: starting timestamp of the 1-second interval
|
| 840 |
+
* end: ending timestamp of the interval
|
| 841 |
+
"""
|
| 842 |
+
images, frames_info = _extract_keyframes_every_second(video_path, crop_ratio)
|
| 843 |
+
|
| 844 |
+
return images, frames_info
|
| 845 |
+
|
| 846 |
"""
|
| 847 |
# ==============================================================================
|
| 848 |
# UI & Endpoints
|
|
|
|
| 968 |
gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檈scenes de v铆deo</h2>')
|
| 969 |
with gr.Row():
|
| 970 |
video_file = gr.Video(label="Puja un v铆deo")
|
| 971 |
+
with gr.Row():
|
| 972 |
threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
|
| 973 |
offset_frames = gr.Slider(0, 30, value=5, step=1, label="Despla莽ament de frames")
|
| 974 |
crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Ra贸 de retall")
|
|
|
|
| 987 |
)
|
| 988 |
gr.Markdown("---")
|
| 989 |
|
| 990 |
+
|
| 991 |
+
# ---------------------
|
| 992 |
+
# Section: Video all frame extraction
|
| 993 |
+
# ---------------------
|
| 994 |
+
gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檉rames de v铆deo</h2>')
|
| 995 |
+
with gr.Row():
|
| 996 |
+
video_file = gr.Video(label="Puja un v铆deo")
|
| 997 |
+
with gr.Row():
|
| 998 |
+
scenes_btn = gr.Button("Extreu frames", variant="primary")
|
| 999 |
+
with gr.Row():
|
| 1000 |
+
scenes_gallery_out = gr.Gallery(label="Fotogrames clau de l鈥檈scena", show_label=False, columns=4, height="auto")
|
| 1001 |
+
scenes_info_out = gr.JSON(label="Informaci贸 de l鈥檈scena")
|
| 1002 |
+
|
| 1003 |
+
scenes_btn.click(
|
| 1004 |
+
extract_keyframes_endpoint,
|
| 1005 |
+
inputs=[video_file],
|
| 1006 |
+
outputs=[scenes_gallery_out, scenes_info_out],
|
| 1007 |
+
api_name="scenes_extraction",
|
| 1008 |
+
concurrency_limit=1
|
| 1009 |
+
)
|
| 1010 |
+
gr.Markdown("---")
|
| 1011 |
+
|
| 1012 |
+
|
| 1013 |
# ---------------------
|
| 1014 |
# Section: Batch description with Salamandra Vision
|
| 1015 |
# ---------------------
|