Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
cf172ac
1
Parent(s):
09b9691
update splits, reqs
Browse files- scripts/utils/split.py +17 -4
- setup.py +2 -1
scripts/utils/split.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import random
|
| 3 |
import shutil
|
|
|
|
|
|
|
| 4 |
|
| 5 |
import argbind
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from audiotools.core import util
|
| 8 |
|
|
@@ -12,8 +16,13 @@ def train_test_split(
|
|
| 12 |
audio_folder: str = ".",
|
| 13 |
test_size: float = 0.2,
|
| 14 |
seed: int = 42,
|
|
|
|
| 15 |
):
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# split according to test_size
|
| 19 |
n_test = int(len(audio_files) * test_size)
|
|
@@ -37,10 +46,14 @@ def train_test_split(
|
|
| 37 |
for split, files in (
|
| 38 |
("train", train_files), ("test", test_files)
|
| 39 |
):
|
| 40 |
-
for file in files:
|
| 41 |
-
out_file =
|
| 42 |
out_file.parent.mkdir(exist_ok=True, parents=True)
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import random
|
| 3 |
import shutil
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
|
| 7 |
import argbind
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
from tqdm.contrib.concurrent import thread_map
|
| 10 |
|
| 11 |
from audiotools.core import util
|
| 12 |
|
|
|
|
| 16 |
audio_folder: str = ".",
|
| 17 |
test_size: float = 0.2,
|
| 18 |
seed: int = 42,
|
| 19 |
+
pattern: str = "**/*.mp3",
|
| 20 |
):
|
| 21 |
+
print(f"finding audio")
|
| 22 |
+
|
| 23 |
+
audio_folder = Path(audio_folder)
|
| 24 |
+
audio_files = list(tqdm(audio_folder.glob(pattern)))
|
| 25 |
+
print(f"found {len(audio_files)} audio files")
|
| 26 |
|
| 27 |
# split according to test_size
|
| 28 |
n_test = int(len(audio_files) * test_size)
|
|
|
|
| 46 |
for split, files in (
|
| 47 |
("train", train_files), ("test", test_files)
|
| 48 |
):
|
| 49 |
+
for file in tqdm(files):
|
| 50 |
+
out_file = audio_folder.parent / f"{audio_folder.name}-{split}" / Path(file).name
|
| 51 |
out_file.parent.mkdir(exist_ok=True, parents=True)
|
| 52 |
+
os.symlink(file, out_file)
|
| 53 |
+
|
| 54 |
+
# save split as json
|
| 55 |
+
with open(Path(audio_folder) / f"{split}.json", "w") as f:
|
| 56 |
+
json.dump([str(f) for f in files], f)
|
| 57 |
|
| 58 |
|
| 59 |
|
setup.py
CHANGED
|
@@ -39,6 +39,7 @@ setup(
|
|
| 39 |
"google-cloud-logging==2.2.0",
|
| 40 |
"einops",
|
| 41 |
# "frechet_audio_distance",
|
| 42 |
-
"gradio"
|
|
|
|
| 43 |
],
|
| 44 |
)
|
|
|
|
| 39 |
"google-cloud-logging==2.2.0",
|
| 40 |
"einops",
|
| 41 |
# "frechet_audio_distance",
|
| 42 |
+
"gradio",
|
| 43 |
+
"tensorboardX",
|
| 44 |
],
|
| 45 |
)
|