Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,17 +11,16 @@ from google.api_core.exceptions import ResourceExhausted
|
|
| 11 |
# -----------------------
|
| 12 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
| 13 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
|
| 14 |
-
HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts")
|
| 15 |
AUDIO_TMP_DIR = "/tmp"
|
| 16 |
|
| 17 |
if not GEMINI_API_KEY:
|
| 18 |
-
raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets
|
| 19 |
|
| 20 |
if not HF_API_TOKEN:
|
| 21 |
-
# we'll still run text-only, but audio will fail until HF_API_TOKEN is set
|
| 22 |
print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
|
| 23 |
|
| 24 |
-
# Configure Gemini
|
| 25 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 26 |
gemini_model = genai.GenerativeModel("gemini-2.5-flash")
|
| 27 |
|
|
@@ -108,40 +107,69 @@ def generate_text_with_gemini(user_message):
|
|
| 108 |
return None, f"Gemini error: {repr(efinal)}"
|
| 109 |
|
| 110 |
# -----------------------
|
| 111 |
-
# Hugging Face
|
|
|
|
| 112 |
# Returns (path, error)
|
| 113 |
# -----------------------
|
| 114 |
def generate_audio_hf_inference(text):
|
| 115 |
if not HF_API_TOKEN:
|
| 116 |
return "", "HF_API_TOKEN not configured for TTS."
|
| 117 |
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
| 119 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 120 |
payload = {"inputs": text}
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
try:
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
path = os.path.join(AUDIO_TMP_DIR, filename)
|
| 133 |
-
with open(path, "wb") as f:
|
| 134 |
-
f.write(resp.content)
|
| 135 |
-
print(f"HuggingFace TTS: audio saved to {path} using model {HF_TTS_MODEL}")
|
| 136 |
-
return path, ""
|
| 137 |
except Exception as e:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
# -----------------------
|
| 147 |
# Convert memory -> messages list for Gradio
|
|
@@ -158,7 +186,6 @@ def convert_memory_to_messages(history):
|
|
| 158 |
# Returns (messages_list, audio_path, error)
|
| 159 |
# -----------------------
|
| 160 |
def process_user_message(user_message):
|
| 161 |
-
# 1) generate text
|
| 162 |
text, gen_err = generate_text_with_gemini(user_message)
|
| 163 |
if gen_err:
|
| 164 |
memory.add("user", user_message)
|
|
@@ -166,11 +193,9 @@ def process_user_message(user_message):
|
|
| 166 |
memory.add("bot", fallback)
|
| 167 |
return convert_memory_to_messages(memory.history), "", gen_err
|
| 168 |
|
| 169 |
-
# 2) update memory
|
| 170 |
memory.add("user", user_message)
|
| 171 |
memory.add("bot", text)
|
| 172 |
|
| 173 |
-
# 3) generate audio via Hugging Face
|
| 174 |
audio_path, audio_err = generate_audio_hf_inference(text)
|
| 175 |
if audio_err:
|
| 176 |
print("Audio generation error (HF):", audio_err)
|
|
@@ -181,7 +206,7 @@ def process_user_message(user_message):
|
|
| 181 |
# Gradio UI (Blocks) with debug UI
|
| 182 |
# -----------------------
|
| 183 |
with gr.Blocks() as demo:
|
| 184 |
-
gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated
|
| 185 |
chatbot = gr.Chatbot()
|
| 186 |
with gr.Row():
|
| 187 |
txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
|
|
|
|
| 11 |
# -----------------------
|
| 12 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
| 13 |
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
|
| 14 |
+
HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts")
|
| 15 |
AUDIO_TMP_DIR = "/tmp"
|
| 16 |
|
| 17 |
if not GEMINI_API_KEY:
|
| 18 |
+
raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets.")
|
| 19 |
|
| 20 |
if not HF_API_TOKEN:
|
|
|
|
| 21 |
print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
|
| 22 |
|
| 23 |
+
# Configure Gemini SDK
|
| 24 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 25 |
gemini_model = genai.GenerativeModel("gemini-2.5-flash")
|
| 26 |
|
|
|
|
| 107 |
return None, f"Gemini error: {repr(efinal)}"
|
| 108 |
|
| 109 |
# -----------------------
|
| 110 |
+
# Hugging Face Router-aware TTS
|
| 111 |
+
# Tries legacy api-inference endpoint, then router.huggingface.co
|
| 112 |
# Returns (path, error)
|
| 113 |
# -----------------------
|
| 114 |
def generate_audio_hf_inference(text):
|
| 115 |
if not HF_API_TOKEN:
|
| 116 |
return "", "HF_API_TOKEN not configured for TTS."
|
| 117 |
|
| 118 |
+
model = HF_TTS_MODEL # e.g. "microsoft/speecht5_tts"
|
| 119 |
+
router_url = f"https://router.huggingface.co/models/{model}"
|
| 120 |
+
legacy_url = f"https://api-inference.huggingface.co/models/{model}"
|
| 121 |
+
|
| 122 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
| 123 |
payload = {"inputs": text}
|
| 124 |
|
| 125 |
+
def _save_bytes(content, content_type_hint=""):
|
| 126 |
+
ct = content_type_hint or ""
|
| 127 |
+
ext = ".mp3" if "mpeg" in ct or "audio/mpeg" in ct else ".wav"
|
| 128 |
+
filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}{ext}"
|
| 129 |
+
path = os.path.join(AUDIO_TMP_DIR, filename)
|
| 130 |
+
with open(path, "wb") as f:
|
| 131 |
+
f.write(content)
|
| 132 |
+
return path
|
| 133 |
+
|
| 134 |
+
last_err = None
|
| 135 |
+
for url in [legacy_url, router_url]:
|
| 136 |
try:
|
| 137 |
+
h = headers.copy()
|
| 138 |
+
h["Accept"] = "audio/mpeg, audio/wav, */*"
|
| 139 |
+
resp = requests.post(url, headers=h, json=payload, timeout=60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
except Exception as e:
|
| 141 |
+
last_err = f"HuggingFace request to {url} failed: {e}"
|
| 142 |
+
print(last_err)
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
if resp.status_code == 410:
|
| 146 |
+
last_err = f"HuggingFace returned 410 for {url}: {resp.text}"
|
| 147 |
+
print(last_err)
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
+
if resp.status_code == 200:
|
| 151 |
+
try:
|
| 152 |
+
content_type = resp.headers.get("content-type", "")
|
| 153 |
+
path = _save_bytes(resp.content, content_type)
|
| 154 |
+
print(f"HuggingFace TTS: audio saved to {path} using URL {url} (content-type={content_type})")
|
| 155 |
+
return path, ""
|
| 156 |
+
except Exception as e:
|
| 157 |
+
last_err = f"Failed to save HF audio from {url}: {e}"
|
| 158 |
+
print(last_err)
|
| 159 |
+
continue
|
| 160 |
+
else:
|
| 161 |
+
try:
|
| 162 |
+
body = resp.json()
|
| 163 |
+
except Exception:
|
| 164 |
+
body = resp.text
|
| 165 |
+
last_err = f"HuggingFace TTS error {resp.status_code} from {url}: {body}"
|
| 166 |
+
print(last_err)
|
| 167 |
+
if resp.status_code in (401, 403):
|
| 168 |
+
# auth problem — break early
|
| 169 |
+
break
|
| 170 |
+
continue
|
| 171 |
+
|
| 172 |
+
return "", last_err or "Unknown HuggingFace error"
|
| 173 |
|
| 174 |
# -----------------------
|
| 175 |
# Convert memory -> messages list for Gradio
|
|
|
|
| 186 |
# Returns (messages_list, audio_path, error)
|
| 187 |
# -----------------------
|
| 188 |
def process_user_message(user_message):
|
|
|
|
| 189 |
text, gen_err = generate_text_with_gemini(user_message)
|
| 190 |
if gen_err:
|
| 191 |
memory.add("user", user_message)
|
|
|
|
| 193 |
memory.add("bot", fallback)
|
| 194 |
return convert_memory_to_messages(memory.history), "", gen_err
|
| 195 |
|
|
|
|
| 196 |
memory.add("user", user_message)
|
| 197 |
memory.add("bot", text)
|
| 198 |
|
|
|
|
| 199 |
audio_path, audio_err = generate_audio_hf_inference(text)
|
| 200 |
if audio_err:
|
| 201 |
print("Audio generation error (HF):", audio_err)
|
|
|
|
| 206 |
# Gradio UI (Blocks) with debug UI
|
| 207 |
# -----------------------
|
| 208 |
with gr.Blocks() as demo:
|
| 209 |
+
gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated via Hugging Face Inference (router).")
|
| 210 |
chatbot = gr.Chatbot()
|
| 211 |
with gr.Row():
|
| 212 |
txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
|