Spaces:
Running
Running
Update from GitHub Actions - 2025-08-21 11:46:08
Browse files- README.md +2 -2
- app.py +79 -11
- requirements.txt +2 -3
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 🎙️
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
|
@@ -67,4 +67,4 @@ python app.py
|
|
| 67 |
This project is licensed under the MIT License. See the original [Piper repository](https://github.com/rhasspy/piper) for more details.
|
| 68 |
|
| 69 |
---
|
| 70 |
-
_Last updated: 2025-01-21 -
|
|
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.0.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
|
|
|
| 67 |
This project is licensed under the MIT License. See the original [Piper repository](https://github.com/rhasspy/piper) for more details.
|
| 68 |
|
| 69 |
---
|
| 70 |
+
_Last updated: 2025-01-21 - Upgraded to Gradio 5.0.0 for security fixes_
|
app.py
CHANGED
|
@@ -45,6 +45,57 @@ MODELS = {
|
|
| 45 |
},
|
| 46 |
}
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Japanese multi-character phoneme to Unicode PUA mapping
|
| 49 |
# This mapping must match the C++ implementation and training data
|
| 50 |
PHONEME_TO_PUA = {
|
|
@@ -113,7 +164,7 @@ def text_to_phonemes(text: str, language: str) -> list[str]:
|
|
| 113 |
|
| 114 |
# Add sentence markers
|
| 115 |
phonemes = ["^"] + phonemes + ["$"]
|
| 116 |
-
|
| 117 |
# Convert multi-character phonemes to Unicode PUA
|
| 118 |
phonemes = map_phonemes(phonemes)
|
| 119 |
else:
|
|
@@ -127,10 +178,28 @@ def text_to_phonemes(text: str, language: str) -> list[str]:
|
|
| 127 |
# Convert phoneme string to list
|
| 128 |
phonemes = ["^"] + list(phoneme_str.replace(" ", "")) + ["$"]
|
| 129 |
else:
|
| 130 |
-
logger.warning("espeak_phonemizer not available, using
|
| 131 |
-
#
|
| 132 |
-
|
| 133 |
-
phonemes = ["^"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
return phonemes
|
| 136 |
|
|
@@ -261,7 +330,8 @@ def create_interface():
|
|
| 261 |
speaker_id = gr.Number(
|
| 262 |
label="Speaker ID (for multi-speaker models)",
|
| 263 |
value=0,
|
| 264 |
-
|
|
|
|
| 265 |
)
|
| 266 |
|
| 267 |
length_scale = gr.Slider(
|
|
@@ -383,15 +453,13 @@ interface = None
|
|
| 383 |
if __name__ == "__main__":
|
| 384 |
# Create interface inside main block
|
| 385 |
interface = create_interface()
|
| 386 |
-
|
| 387 |
try:
|
| 388 |
-
# Launch with
|
| 389 |
interface.launch(
|
| 390 |
server_name="0.0.0.0",
|
| 391 |
server_port=7860,
|
| 392 |
-
show_api=False, # Disable API documentation
|
| 393 |
-
show_error=True,
|
| 394 |
-
quiet=False,
|
| 395 |
)
|
| 396 |
except Exception as e:
|
| 397 |
logger.error(f"Failed to launch interface: {e}")
|
|
|
|
| 45 |
},
|
| 46 |
}
|
| 47 |
|
| 48 |
+
# Basic English word to IPA mapping for common words
|
| 49 |
+
# This is a simplified fallback when espeak-ng is not available
|
| 50 |
+
ENGLISH_IPA_MAP = {
|
| 51 |
+
"hello": "hɛloʊ",
|
| 52 |
+
"world": "wɜrld",
|
| 53 |
+
"this": "ðɪs",
|
| 54 |
+
"is": "ɪz",
|
| 55 |
+
"a": "ə",
|
| 56 |
+
"test": "tɛst",
|
| 57 |
+
"text": "tɛkst",
|
| 58 |
+
"to": "tu",
|
| 59 |
+
"speech": "spitʃ",
|
| 60 |
+
"demo": "dɛmoʊ",
|
| 61 |
+
"welcome": "wɛlkəm",
|
| 62 |
+
"piper": "paɪpər",
|
| 63 |
+
"tts": "titiɛs",
|
| 64 |
+
"enjoy": "ɛndʒɔɪ",
|
| 65 |
+
"high": "haɪ",
|
| 66 |
+
"quality": "kwɑləti",
|
| 67 |
+
"synthesis": "sɪnθəsɪs",
|
| 68 |
+
"the": "ðə",
|
| 69 |
+
"and": "ænd",
|
| 70 |
+
"for": "fɔr",
|
| 71 |
+
"with": "wɪð",
|
| 72 |
+
"you": "ju",
|
| 73 |
+
"can": "kæn",
|
| 74 |
+
"it": "ɪt",
|
| 75 |
+
"that": "ðæt",
|
| 76 |
+
"have": "hæv",
|
| 77 |
+
"from": "frʌm",
|
| 78 |
+
"or": "ɔr",
|
| 79 |
+
"which": "wɪtʃ",
|
| 80 |
+
"one": "wʌn",
|
| 81 |
+
"would": "wʊd",
|
| 82 |
+
"all": "ɔl",
|
| 83 |
+
"will": "wɪl",
|
| 84 |
+
"there": "ðɛr",
|
| 85 |
+
"say": "seɪ",
|
| 86 |
+
"who": "hu",
|
| 87 |
+
"make": "meɪk",
|
| 88 |
+
"when": "wɛn",
|
| 89 |
+
"time": "taɪm",
|
| 90 |
+
"if": "ɪf",
|
| 91 |
+
"no": "noʊ",
|
| 92 |
+
"way": "weɪ",
|
| 93 |
+
"has": "hæz",
|
| 94 |
+
"yes": "jɛs",
|
| 95 |
+
"good": "gʊd",
|
| 96 |
+
"very": "vɛri",
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
# Japanese multi-character phoneme to Unicode PUA mapping
|
| 100 |
# This mapping must match the C++ implementation and training data
|
| 101 |
PHONEME_TO_PUA = {
|
|
|
|
| 164 |
|
| 165 |
# Add sentence markers
|
| 166 |
phonemes = ["^"] + phonemes + ["$"]
|
| 167 |
+
|
| 168 |
# Convert multi-character phonemes to Unicode PUA
|
| 169 |
phonemes = map_phonemes(phonemes)
|
| 170 |
else:
|
|
|
|
| 178 |
# Convert phoneme string to list
|
| 179 |
phonemes = ["^"] + list(phoneme_str.replace(" ", "")) + ["$"]
|
| 180 |
else:
|
| 181 |
+
logger.warning("espeak_phonemizer not available, using IPA fallback")
|
| 182 |
+
# IPA-based fallback for better English pronunciation
|
| 183 |
+
words = text.lower().split()
|
| 184 |
+
phonemes = ["^"]
|
| 185 |
+
|
| 186 |
+
for i, word in enumerate(words):
|
| 187 |
+
# Add space between words
|
| 188 |
+
if i > 0:
|
| 189 |
+
phonemes.append(" ")
|
| 190 |
+
|
| 191 |
+
# Remove punctuation from word
|
| 192 |
+
clean_word = "".join(c for c in word if c.isalpha())
|
| 193 |
+
|
| 194 |
+
if clean_word in ENGLISH_IPA_MAP:
|
| 195 |
+
# Use IPA mapping if available
|
| 196 |
+
ipa = ENGLISH_IPA_MAP[clean_word]
|
| 197 |
+
phonemes.extend(list(ipa))
|
| 198 |
+
else:
|
| 199 |
+
# Fall back to character-by-character for unknown words
|
| 200 |
+
phonemes.extend(list(clean_word))
|
| 201 |
+
|
| 202 |
+
phonemes.append("$")
|
| 203 |
|
| 204 |
return phonemes
|
| 205 |
|
|
|
|
| 330 |
speaker_id = gr.Number(
|
| 331 |
label="Speaker ID (for multi-speaker models)",
|
| 332 |
value=0,
|
| 333 |
+
minimum=0,
|
| 334 |
+
maximum=10,
|
| 335 |
)
|
| 336 |
|
| 337 |
length_scale = gr.Slider(
|
|
|
|
| 453 |
if __name__ == "__main__":
|
| 454 |
# Create interface inside main block
|
| 455 |
interface = create_interface()
|
| 456 |
+
|
| 457 |
try:
|
| 458 |
+
# Launch with settings for Gradio 5.x
|
| 459 |
interface.launch(
|
| 460 |
server_name="0.0.0.0",
|
| 461 |
server_port=7860,
|
| 462 |
+
show_api=False, # Disable API documentation for cleaner UI
|
|
|
|
|
|
|
| 463 |
)
|
| 464 |
except Exception as e:
|
| 465 |
logger.error(f"Failed to launch interface: {e}")
|
requirements.txt
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
# Piper TTS Demo Requirements
|
| 2 |
-
gradio
|
| 3 |
-
pydantic==1.10.18 # Compatible with Gradio 3.x
|
| 4 |
numpy>=1.24.0,<3.0
|
| 5 |
onnxruntime>=1.16.0
|
| 6 |
pyopenjtalk>=0.3.0
|
| 7 |
onnx>=1.14.0
|
| 8 |
# Note: espeak-phonemizer requires system espeak-ng library
|
| 9 |
-
# For simplified deployment, using
|
|
|
|
| 1 |
# Piper TTS Demo Requirements
|
| 2 |
+
gradio>=5.0.0 # Latest Gradio 5.x with security fixes
|
|
|
|
| 3 |
numpy>=1.24.0,<3.0
|
| 4 |
onnxruntime>=1.16.0
|
| 5 |
pyopenjtalk>=0.3.0
|
| 6 |
onnx>=1.14.0
|
| 7 |
# Note: espeak-phonemizer requires system espeak-ng library
|
| 8 |
+
# For simplified deployment, using IPA-based fallback for English
|