shanusherly commited on
Commit
f6fc212
·
verified ·
1 Parent(s): 3e5f5e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -31
app.py CHANGED
@@ -11,17 +11,16 @@ from google.api_core.exceptions import ResourceExhausted
11
  # -----------------------
12
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
14
- HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts") # default fallback HF model
15
  AUDIO_TMP_DIR = "/tmp"
16
 
17
  if not GEMINI_API_KEY:
18
- raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets as GEMINI_API_KEY.")
19
 
20
  if not HF_API_TOKEN:
21
- # we'll still run text-only, but audio will fail until HF_API_TOKEN is set
22
  print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
23
 
24
- # Configure Gemini
25
  genai.configure(api_key=GEMINI_API_KEY)
26
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
27
 
@@ -108,40 +107,69 @@ def generate_text_with_gemini(user_message):
108
  return None, f"Gemini error: {repr(efinal)}"
109
 
110
  # -----------------------
111
- # Hugging Face Inference API TTS
 
112
  # Returns (path, error)
113
  # -----------------------
114
  def generate_audio_hf_inference(text):
115
  if not HF_API_TOKEN:
116
  return "", "HF_API_TOKEN not configured for TTS."
117
 
118
- hf_url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
 
 
 
119
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
120
  payload = {"inputs": text}
121
 
122
- try:
123
- resp = requests.post(hf_url, headers=headers, json=payload, timeout=60)
124
- except Exception as e:
125
- return "", f"HuggingFace request failed: {e}"
126
-
127
- if resp.status_code == 200:
 
 
 
 
 
128
  try:
129
- ct = resp.headers.get("content-type", "")
130
- ext = ".mp3" if "mpeg" in ct or "audio/mpeg" in ct else ".wav"
131
- filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}{ext}"
132
- path = os.path.join(AUDIO_TMP_DIR, filename)
133
- with open(path, "wb") as f:
134
- f.write(resp.content)
135
- print(f"HuggingFace TTS: audio saved to {path} using model {HF_TTS_MODEL}")
136
- return path, ""
137
  except Exception as e:
138
- return "", f"Failed to save HF audio: {e}"
139
- else:
140
- try:
141
- body = resp.json()
142
- except Exception:
143
- body = resp.text
144
- return "", f"HuggingFace TTS error {resp.status_code}: {body}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  # -----------------------
147
  # Convert memory -> messages list for Gradio
@@ -158,7 +186,6 @@ def convert_memory_to_messages(history):
158
  # Returns (messages_list, audio_path, error)
159
  # -----------------------
160
  def process_user_message(user_message):
161
- # 1) generate text
162
  text, gen_err = generate_text_with_gemini(user_message)
163
  if gen_err:
164
  memory.add("user", user_message)
@@ -166,11 +193,9 @@ def process_user_message(user_message):
166
  memory.add("bot", fallback)
167
  return convert_memory_to_messages(memory.history), "", gen_err
168
 
169
- # 2) update memory
170
  memory.add("user", user_message)
171
  memory.add("bot", text)
172
 
173
- # 3) generate audio via Hugging Face
174
  audio_path, audio_err = generate_audio_hf_inference(text)
175
  if audio_err:
176
  print("Audio generation error (HF):", audio_err)
@@ -181,7 +206,7 @@ def process_user_message(user_message):
181
  # Gradio UI (Blocks) with debug UI
182
  # -----------------------
183
  with gr.Blocks() as demo:
184
- gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated using Hugging Face Inference API.")
185
  chatbot = gr.Chatbot()
186
  with gr.Row():
187
  txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")
 
11
  # -----------------------
12
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # required for TTS
14
+ HF_TTS_MODEL = os.environ.get("HF_TTS_MODEL", "microsoft/speecht5_tts")
15
  AUDIO_TMP_DIR = "/tmp"
16
 
17
  if not GEMINI_API_KEY:
18
+ raise RuntimeError("Missing GEMINI_API_KEY in environment. Add it to HF Space Secrets.")
19
 
20
  if not HF_API_TOKEN:
 
21
  print("Warning: HF_API_TOKEN not set. Audio will be unavailable until set in Space Secrets.")
22
 
23
+ # Configure Gemini SDK
24
  genai.configure(api_key=GEMINI_API_KEY)
25
  gemini_model = genai.GenerativeModel("gemini-2.5-flash")
26
 
 
107
  return None, f"Gemini error: {repr(efinal)}"
108
 
109
  # -----------------------
110
+ # Hugging Face Router-aware TTS
111
+ # Tries legacy api-inference endpoint, then router.huggingface.co
112
  # Returns (path, error)
113
  # -----------------------
114
  def generate_audio_hf_inference(text):
115
  if not HF_API_TOKEN:
116
  return "", "HF_API_TOKEN not configured for TTS."
117
 
118
+ model = HF_TTS_MODEL # e.g. "microsoft/speecht5_tts"
119
+ router_url = f"https://router.huggingface.co/models/{model}"
120
+ legacy_url = f"https://api-inference.huggingface.co/models/{model}"
121
+
122
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
123
  payload = {"inputs": text}
124
 
125
+ def _save_bytes(content, content_type_hint=""):
126
+ ct = content_type_hint or ""
127
+ ext = ".mp3" if "mpeg" in ct or "audio/mpeg" in ct else ".wav"
128
+ filename = f"audio_{int(time.time()*1000)}_{abs(hash(text))%100000}{ext}"
129
+ path = os.path.join(AUDIO_TMP_DIR, filename)
130
+ with open(path, "wb") as f:
131
+ f.write(content)
132
+ return path
133
+
134
+ last_err = None
135
+ for url in [legacy_url, router_url]:
136
  try:
137
+ h = headers.copy()
138
+ h["Accept"] = "audio/mpeg, audio/wav, */*"
139
+ resp = requests.post(url, headers=h, json=payload, timeout=60)
 
 
 
 
 
140
  except Exception as e:
141
+ last_err = f"HuggingFace request to {url} failed: {e}"
142
+ print(last_err)
143
+ continue
144
+
145
+ if resp.status_code == 410:
146
+ last_err = f"HuggingFace returned 410 for {url}: {resp.text}"
147
+ print(last_err)
148
+ continue
149
+
150
+ if resp.status_code == 200:
151
+ try:
152
+ content_type = resp.headers.get("content-type", "")
153
+ path = _save_bytes(resp.content, content_type)
154
+ print(f"HuggingFace TTS: audio saved to {path} using URL {url} (content-type={content_type})")
155
+ return path, ""
156
+ except Exception as e:
157
+ last_err = f"Failed to save HF audio from {url}: {e}"
158
+ print(last_err)
159
+ continue
160
+ else:
161
+ try:
162
+ body = resp.json()
163
+ except Exception:
164
+ body = resp.text
165
+ last_err = f"HuggingFace TTS error {resp.status_code} from {url}: {body}"
166
+ print(last_err)
167
+ if resp.status_code in (401, 403):
168
+ # auth problem — break early
169
+ break
170
+ continue
171
+
172
+ return "", last_err or "Unknown HuggingFace error"
173
 
174
  # -----------------------
175
  # Convert memory -> messages list for Gradio
 
186
  # Returns (messages_list, audio_path, error)
187
  # -----------------------
188
  def process_user_message(user_message):
 
189
  text, gen_err = generate_text_with_gemini(user_message)
190
  if gen_err:
191
  memory.add("user", user_message)
 
193
  memory.add("bot", fallback)
194
  return convert_memory_to_messages(memory.history), "", gen_err
195
 
 
196
  memory.add("user", user_message)
197
  memory.add("bot", text)
198
 
 
199
  audio_path, audio_err = generate_audio_hf_inference(text)
200
  if audio_err:
201
  print("Audio generation error (HF):", audio_err)
 
206
  # Gradio UI (Blocks) with debug UI
207
  # -----------------------
208
  with gr.Blocks() as demo:
209
+ gr.Markdown("## 🤖 Gemini + Hugging Face TTS Chatbot\n\nAudio generated via Hugging Face Inference (router).")
210
  chatbot = gr.Chatbot()
211
  with gr.Row():
212
  txt = gr.Textbox(show_label=False, placeholder="Type your message and press Enter")