VeuReu commited on
Commit
6288dfd
verified
1 Parent(s): e3b4a4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py CHANGED
@@ -30,9 +30,11 @@ import json
30
  import os
31
  import re
32
  from typing import Any, Dict, List, Optional, Tuple, Union
 
33
 
34
  # Third-party libraries
35
  import cv2
 
36
  import gradio as gr
37
  import numpy as np
38
  import spaces
@@ -518,6 +520,88 @@ def _get_ocr_characters_to_image(
518
 
519
  return informacion_image_completo
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  """
522
  # ==============================================================================
523
  # API Helpers
@@ -715,6 +799,50 @@ def add_ocr_characters_to_image(
715
  return _get_ocr_characters_to_image(image,informacion_image,face_col)
716
 
717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  """
719
  # ==============================================================================
720
  # UI & Endpoints
@@ -840,6 +968,7 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU", css=custom_css,theme=gr.
840
  gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檈scenes de v铆deo</h2>')
841
  with gr.Row():
842
  video_file = gr.Video(label="Puja un v铆deo")
 
843
  threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
844
  offset_frames = gr.Slider(0, 30, value=5, step=1, label="Despla莽ament de frames")
845
  crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Ra贸 de retall")
@@ -858,6 +987,29 @@ with gr.Blocks(title="Salamandra Vision 7B 路 ZeroGPU", css=custom_css,theme=gr.
858
  )
859
  gr.Markdown("---")
860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  # ---------------------
862
  # Section: Batch description with Salamandra Vision
863
  # ---------------------
 
30
  import os
31
  import re
32
  from typing import Any, Dict, List, Optional, Tuple, Union
33
+ from pathlib import Path
34
 
35
  # Third-party libraries
36
  import cv2
37
+ import tempfile
38
  import gradio as gr
39
  import numpy as np
40
  import spaces
 
520
 
521
  return informacion_image_completo
522
 
523
+
524
+ @spaces.GPU
525
+ def _extract_keyframes_every_second(
526
+ video: str,
527
+ crop_ratio: float = 0.1
528
+ ) -> Tuple[List[np.ndarray], List[dict]]:
529
+ """
530
+ Extracts one keyframe per second from a video file.
531
+
532
+ Parameters
533
+ ----------
534
+ video : str
535
+ Path to the input video file.
536
+ crop_ratio : float, optional
537
+ Percentage of the frame to crop from each border before resizing
538
+ back to the original dimensions. Default is 0.1 (10%).
539
+
540
+ Returns
541
+ -------
542
+ images : List[np.ndarray]
543
+ List of extracted frames as NumPy arrays.
544
+ frames_info : List[dict]
545
+ List of metadata dictionaries for each extracted frame. Each dictionary contains:
546
+ - "index": sequential index starting from 1
547
+ - "start": starting second of the interval represented by the frame
548
+ - "end": ending second of the interval represented by the frame
549
+
550
+ Notes
551
+ -----
552
+ A temporary directory is automatically created to store intermediate
553
+ images. These images are not returned but can be useful for debugging.
554
+ The directory is cleaned up after the function finishes.
555
+ """
556
+
557
+ # Temporary directory for storing intermediate images (auto-cleaned afterwards)
558
+ tmp_dir = Path(tempfile.mkdtemp())
559
+
560
+ # Open the video capture
561
+ cap = cv2.VideoCapture(str(video))
562
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
563
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
564
+ duration = total_frames / fps
565
+
566
+ images = []
567
+ frames_info = []
568
+
569
+ # Loop through the video extracting one frame per second
570
+ for sec in range(int(duration)):
571
+ frame_number = int(sec * fps)
572
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
573
+ ret, frame = cap.read()
574
+ if not ret:
575
+ break
576
+
577
+ # Crop the frame by the given ratio on all borders
578
+ h, w = frame.shape[:2]
579
+ ch, cw = int(h * crop_ratio), int(w * crop_ratio)
580
+ cropped = frame[ch:h-ch, cw:w-cw]
581
+
582
+ # Resize cropped frame back to original resolution
583
+ cropped = cv2.resize(cropped, (w, h))
584
+
585
+ timestamp = frame_number / fps # Timestamp of the extracted frame
586
+
587
+ # Save temporary image for debugging (not returned)
588
+ tmp_path = tmp_dir / f"frame_{sec:03d}.jpg"
589
+ cv2.imwrite(str(tmp_path), cropped)
590
+
591
+ # Append extracted frame and metadata
592
+ images.append(cropped)
593
+ frames_info.append({
594
+ "index": sec + 1,
595
+ "start": sec,
596
+ "end": sec + 1
597
+ })
598
+
599
+ # Release the video capture object
600
+ cap.release()
601
+
602
+ return images, frames_info
603
+
604
+
605
  """
606
  # ==============================================================================
607
  # API Helpers
 
799
  return _get_ocr_characters_to_image(image,informacion_image,face_col)
800
 
801
 
802
+ def extract_keyframes_endpoint(
803
+ video_path: str,
804
+ crop_ratio: float = 0.1
805
+ ) -> Dict[str, Any]:
806
+ """
807
+ Endpoint wrapper for extracting one keyframe per second from a video.
808
+
809
+ This function serves as a wrapper around the internal
810
+ `_extract_keyframes_every_second` function. It receives a path to a
811
+ video file and an optional cropping ratio, and delegates the extraction
812
+ of frames to the internal function. The wrapped internal function
813
+ performs the following:
814
+
815
+ 1. Loads the video and determines its duration and FPS.
816
+ 2. Extracts exactly one frame per second of video playback.
817
+ 3. Crops each frame by a proportional margin and resizes it back to the
818
+ original resolution.
819
+ 4. Optionally stores intermediate images in a temporary directory for
820
+ debugging purposes.
821
+ 5. Returns the frames as NumPy arrays along with structured metadata
822
+ describing the extracted intervals.
823
+
824
+ Parameters
825
+ ----------
826
+ video_path : str
827
+ Path to the input video file.
828
+ crop_ratio : float, optional
829
+ Percentage of the frame to crop from each border before resizing
830
+ (default is 0.1, equivalent to 10%).
831
+
832
+ Returns
833
+ -------
834
+ Dict[str, Any]
835
+ A dictionary containing:
836
+ - frames: list of extracted frames represented as NumPy arrays
837
+ - metadata: list of dictionaries with:
838
+ * index: sequential frame identifier
839
+ * start: starting timestamp of the 1-second interval
840
+ * end: ending timestamp of the interval
841
+ """
842
+ images, frames_info = _extract_keyframes_every_second(video_path, crop_ratio)
843
+
844
+ return images, frames_info
845
+
846
  """
847
  # ==============================================================================
848
  # UI & Endpoints
 
968
  gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檈scenes de v铆deo</h2>')
969
  with gr.Row():
970
  video_file = gr.Video(label="Puja un v铆deo")
971
+ with gr.Row():
972
  threshold = gr.Slider(0.0, 100.0, value=30.0, step=1.0, label="Llindar")
973
  offset_frames = gr.Slider(0, 30, value=5, step=1, label="Despla莽ament de frames")
974
  crop_ratio = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Ra贸 de retall")
 
987
  )
988
  gr.Markdown("---")
989
 
990
+
991
+ # ---------------------
992
+ # Section: Video all frame extraction
993
+ # ---------------------
994
+ gr.Markdown('<h2 style="text-align:center">Extracci贸 d鈥檉rames de v铆deo</h2>')
995
+ with gr.Row():
996
+ video_file = gr.Video(label="Puja un v铆deo")
997
+ with gr.Row():
998
+ scenes_btn = gr.Button("Extreu frames", variant="primary")
999
+ with gr.Row():
1000
+ scenes_gallery_out = gr.Gallery(label="Fotogrames clau de l鈥檈scena", show_label=False, columns=4, height="auto")
1001
+ scenes_info_out = gr.JSON(label="Informaci贸 de l鈥檈scena")
1002
+
1003
+ scenes_btn.click(
1004
+ extract_keyframes_endpoint,
1005
+ inputs=[video_file],
1006
+ outputs=[scenes_gallery_out, scenes_info_out],
1007
+ api_name="scenes_extraction",
1008
+ concurrency_limit=1
1009
+ )
1010
+ gr.Markdown("---")
1011
+
1012
+
1013
  # ---------------------
1014
  # Section: Batch description with Salamandra Vision
1015
  # ---------------------