alexnasa commited on
Commit
608bbec
·
verified ·
1 Parent(s): b91a63e

memory leak fixed

Browse files
Files changed (1) hide show
  1. humo/models/utils/utils.py +81 -57
humo/models/utils/utils.py CHANGED
@@ -1,58 +1,82 @@
1
- # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
- import argparse
3
- import binascii
4
- import os
5
- import os.path as osp
6
- import json
7
- from omegaconf import OmegaConf
8
-
9
- import imageio
10
- import torch
11
- import torchvision
12
- from moviepy.editor import AudioFileClip, VideoClip
13
-
14
- __all__ = ['tensor_to_video', 'prepare_json_dataset']
15
-
16
-
17
- def tensor_to_video(tensor, output_video_path, input_audio_path, fps=25):
18
- """
19
- Converts a Tensor with shape [c, f, h, w] into a video and adds an audio track from the specified audio file.
20
-
21
- Args:
22
- tensor (numpy): The Tensor to be converted, shaped [f, h, w, c].
23
- output_video_path (str): The file path where the output video will be saved.
24
- input_audio_path (str): The path to the audio file (WAV file) that contains the audio track to be added.
25
- fps (int): The frame rate of the output video. Default is 30 fps.
26
- """
27
- def make_frame(t):
28
- frame_index = min(int(t * fps), tensor.shape[0] - 1)
29
- return tensor[frame_index]
30
-
31
- video_duration = tensor.shape[0] / fps
32
- audio_clip = AudioFileClip(input_audio_path)
33
- audio_duration = audio_clip.duration
34
- final_duration = min(video_duration, audio_duration)
35
- audio_clip = audio_clip.subclip(0, final_duration)
36
- new_video_clip = VideoClip(make_frame, duration=final_duration)
37
- new_video_clip = new_video_clip.set_audio(audio_clip)
38
- new_video_clip.write_videofile(output_video_path, fps=fps, audio_codec="aac")
39
-
40
-
41
- def prepare_json_dataset(json_path):
42
- samples = []
43
- with open(json_path, "rb") as f:
44
- data = json.load(f)
45
- for itemname, row in data.items():
46
- text = row['prompt'].strip().replace("_", " ").strip('"')
47
- audio_path = row['audio_path']
48
- ref_img_path = [x for x in row['img_paths']]
49
-
50
- samples.append({
51
- "text": text,
52
- "ref_img": ref_img_path,
53
- "audio": audio_path,
54
- "itemname": itemname
55
- })
56
- samples = OmegaConf.create(samples)
57
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return samples
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import argparse
3
+ import binascii
4
+ import os
5
+ import os.path as osp
6
+ import json
7
+ from omegaconf import OmegaConf
8
+
9
+ import imageio
10
+ import torch
11
+ import torchvision
12
+ from moviepy.editor import AudioFileClip, VideoClip
13
+
14
+ __all__ = ['tensor_to_video', 'prepare_json_dataset']
15
+
16
+
17
+ from moviepy.editor import AudioFileClip, VideoClip
18
+
19
+ def tensor_to_video(tensor, output_video_path, input_audio_path, fps=25):
20
+ """
21
+ tensor: shape [f, h, w, c] (as your code expects)
22
+ """
23
+
24
+ def make_frame(t):
25
+ frame_index = min(int(t * fps), tensor.shape[0] - 1)
26
+ return tensor[frame_index]
27
+
28
+ video_duration = tensor.shape[0] / fps
29
+
30
+ audio_clip = None
31
+ audio_subclip = None
32
+ video_clip = None
33
+
34
+ try:
35
+ # Load audio
36
+ audio_clip = AudioFileClip(input_audio_path)
37
+ audio_duration = audio_clip.duration
38
+ final_duration = min(video_duration, audio_duration)
39
+
40
+ # Trim audio
41
+ audio_subclip = audio_clip.subclip(0, final_duration)
42
+
43
+ # Build video
44
+ video_clip = VideoClip(make_frame, duration=final_duration)
45
+ video_clip = video_clip.set_audio(audio_subclip)
46
+
47
+ # Write file (this can spawn ffmpeg)
48
+ video_clip.write_videofile(
49
+ output_video_path,
50
+ fps=fps,
51
+ audio_codec="aac"
52
+ )
53
+
54
+ finally:
55
+ # Make absolutely sure everything is closed
56
+ if video_clip is not None:
57
+ video_clip.close()
58
+ if audio_subclip is not None:
59
+ audio_subclip.close()
60
+ if audio_clip is not None:
61
+ audio_clip.close()
62
+
63
+
64
+
65
+ def prepare_json_dataset(json_path):
66
+ samples = []
67
+ with open(json_path, "rb") as f:
68
+ data = json.load(f)
69
+ for itemname, row in data.items():
70
+ text = row['prompt'].strip().replace("_", " ").strip('"')
71
+ audio_path = row['audio_path']
72
+ ref_img_path = [x for x in row['img_paths']]
73
+
74
+ samples.append({
75
+ "text": text,
76
+ "ref_img": ref_img_path,
77
+ "audio": audio_path,
78
+ "itemname": itemname
79
+ })
80
+ samples = OmegaConf.create(samples)
81
+
82
  return samples