Spaces:

shanusherly
/

audio

Running

App Files Files Community

shanusherly commited on 5 days ago

Commit

f6fc212

verified ·

1 Parent(s): 3e5f5e4

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -31

app.py CHANGED Viewed

@@ -11,17 +11,16 @@ from google.api_core.exceptions import ResourceExhausted
 # -----------------------
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")  # required for TTS
-HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts")  # default fallback HF model
 AUDIO_TMP_DIR = "/tmp"
 if not GEMINI_API_KEY:
-    raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
 if not HF_API_TOKEN:
-    # we'll still run text-only, but audio will fail until HF_API_TOKEN is set
     print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
-# Configure Gemini
 genai.configure(api_key=GEMINI_API_KEY)
 gemini_model = genai.GenerativeModel("gemini-2.5-flash")
@@ -108,40 +107,69 @@ def generate_text_with_gemini(user_message):
         return None, f"Gemini error: {repr(efinal)}"
 # -----------------------
-# Hugging Face Inference API TTS
 # Returns (path, error)
 # -----------------------
 def generate_audio_hf_inference(text):
     if not HF_API_TOKEN:
         return "", "HF_API_TOKEN not configured for TTS."
-    hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
     payload = {"inputs": text}
-    try:
-        resp = requests.post(hf_url, headers=headers, json=payload, timeout=60)
-    except Exception as e:
-        return "", f"HuggingFace request failed: {e}"
-    if resp.status_code == 200:
         try:
-            ct = resp.headers.get("content-type", "")
-            ext = ".mp3" if "mpeg" in ct or "audio/mpeg" in ct else ".wav"
-            filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}{ext}"
-            path = os.path.join(AUDIO_TMP_DIR, filename)
-            with open(path, "wb") as f:
-                f.write(resp.content)
-            print(f"HuggingFace TTS: audio saved to {path} using model {HF_TTS_MODEL}")
-            return path, ""
         except Exception as e:
-            return "", f"Failed to save HF audio: {e}"
-    else:
-        try:
-            body = resp.json()
-        except Exception:
-            body = resp.text
-        return "", f"HuggingFace TTS error {resp.status_code}: {body}"
 # -----------------------
 # Convert memory -> messages list for Gradio
@@ -158,7 +186,6 @@ def convert_memory_to_messages(history):
 # Returns (messages_list, audio_path, error)
 # -----------------------
 def process_user_message(user_message):
-    # 1) generate text
     text, gen_err = generate_text_with_gemini(user_message)
     if gen_err:
         memory.add("user", user_message)
@@ -166,11 +193,9 @@ def process_user_message(user_message):
         memory.add("bot", fallback)
         return convert_memory_to_messages(memory.history), "", gen_err
-    # 2) update memory
     memory.add("user", user_message)
     memory.add("bot", text)
-    # 3) generate audio via Hugging Face
     audio_path, audio_err = generate_audio_hf_inference(text)
     if audio_err:
         print("Audio generation error (HF):", audio_err)
@@ -181,7 +206,7 @@ def process_user_message(user_message):
 # Gradio UI (Blocks) with debug UI
 # -----------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated using Hugging Face Inference API.")
     chatbot = gr.Chatbot()
     with gr.Row():
         txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")

 # -----------------------
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")  # required for TTS
+HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts")
 AUDIO_TMP_DIR = "/tmp"
 if not GEMINI_API_KEY:
+    raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets.")
 if not HF_API_TOKEN:
     print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
+# Configure Gemini SDK
 genai.configure(api_key=GEMINI_API_KEY)
 gemini_model = genai.GenerativeModel("gemini-2.5-flash")
         return None, f"Gemini error: {repr(efinal)}"
 # -----------------------
+# Hugging Face Router-aware TTS
+# Tries legacy api-inference endpoint, then router.huggingface.co
 # Returns (path, error)
 # -----------------------
 def generate_audio_hf_inference(text):
     if not HF_API_TOKEN:
         return "", "HF_API_TOKEN not configured for TTS."
+    model = HF_TTS_MODEL  # e.g. "microsoft/speecht5_tts"
+    router_url = f"https://router.huggingface.co/models/{model}"
+    legacy_url = f"https://api-inference.huggingface.co/models/{model}"
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
     payload = {"inputs": text}
+    def _save_bytes(content, content_type_hint=""):
+        ct = content_type_hint or ""
+        ext = ".mp3" if "mpeg" in ct or "audio/mpeg" in ct else ".wav"
+        filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}{ext}"
+        path = os.path.join(AUDIO_TMP_DIR, filename)
+        with open(path, "wb") as f:
+            f.write(content)
+        return path
+    last_err = None
+    for url in [legacy_url, router_url]:
         try:
+            h = headers.copy()
+            h["Accept"] = "audio/mpeg, audio/wav, */*"
+            resp = requests.post(url, headers=h, json=payload, timeout=60)
         except Exception as e:
+            last_err = f"HuggingFace request to {url} failed: {e}"
+            print(last_err)
+            continue
+        if resp.status_code == 410:
+            last_err = f"HuggingFace returned 410 for {url}: {resp.text}"
+            print(last_err)
+            continue
+        if resp.status_code == 200:
+            try:
+                content_type = resp.headers.get("content-type", "")
+                path = _save_bytes(resp.content, content_type)
+                print(f"HuggingFace TTS: audio saved to {path} using URL {url} (content-type={content_type})")
+                return path, ""
+            except Exception as e:
+                last_err = f"Failed to save HF audio from {url}: {e}"
+                print(last_err)
+                continue
+        else:
+            try:
+                body = resp.json()
+            except Exception:
+                body = resp.text
+            last_err = f"HuggingFace TTS error {resp.status_code} from {url}: {body}"
+            print(last_err)
+            if resp.status_code in (401, 403):
+                # auth problem — break early
+                break
+            continue
+    return "", last_err or "Unknown HuggingFace error"
 # -----------------------
 # Convert memory -> messages list for Gradio
 # Returns (messages_list, audio_path, error)
 # -----------------------
 def process_user_message(user_message):
     text, gen_err = generate_text_with_gemini(user_message)
     if gen_err:
         memory.add("user", user_message)
         memory.add("bot", fallback)
         return convert_memory_to_messages(memory.history), "", gen_err
     memory.add("user", user_message)
     memory.add("bot", text)
     audio_path, audio_err = generate_audio_hf_inference(text)
     if audio_err:
         print("Audio generation error (HF):", audio_err)
 # Gradio UI (Blocks) with debug UI
 # -----------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated via Hugging Face Inference (router).")
     chatbot = gr.Chatbot()
     with gr.Row():
         txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")