Spaces:

angelsg213
/

TESTING22

Sleeping

App Files Files Community

angelsg213 commited on 17 days ago

Commit

b9e50c8

verified ·

1 Parent(s): 4f6c6e5

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -140

app.py CHANGED Viewed

@@ -16,122 +16,105 @@ import time
 import numpy as np
 import wave
-# Para TTS emocional
-try:
-    from gtts import gTTS
-    GTTS_AVAILABLE = True
-except ImportError:
-    GTTS_AVAILABLE = False
-    print("⚠️ gTTS no disponible. Instala con: pip install gtts")
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN Y ANÁLISIS DE SENTIMIENTO =============
-# ============= GENERAR AUDIO CON EMOCIÓN - VERSIÓN CORREGIDA =============
 def generar_audio_respuesta(texto, client):
-    """TTS emocional FUNCIONAL con gTTS (Google Text-to-Speech) - Diciembre 2024"""
-    try:
-        # Limpiar y preparar texto
-        texto_limpio = texto.replace("*", "").replace("#", "").replace("`", "").replace("€", " euros").strip()
-        oraciones = re.split(r'[.!?]+', texto_limpio)
-        oraciones = [o.strip() for o in oraciones if o.strip() and len(o.strip()) > 10]
-        texto_audio = ". ".join(oraciones[:5]) + "." if len(oraciones) > 5 else ". ".join(oraciones) + "."
-        if len(texto_audio) > 500:
-            texto_audio = texto_audio[:497] + "..."
-        print(f"🎤 Generando audio para: '{texto_audio[:100]}...'")
-        # PASO 1: Análisis emocional
-        emocion_detectada = "neutral"
-        confianza = 0.5
-        try:
-            print("🧠 Analizando emoción...")
-            emotion_response = client.text_classification(
-                text=texto_audio[:512],
-                model="finiteautomata/beto-sentiment-analysis"
-            )
-            if emotion_response and len(emotion_response) > 0:
-                label = emotion_response[0]['label'].lower()
-                sentiment_to_emotion = {
-                    'pos': 'joy',
-                    'positive': 'joy',
-                    'neu': 'neutral',
-                    'neutral': 'neutral',
-                    'neg': 'sadness',
-                    'negative': 'sadness'
-                }
-                emocion_detectada = sentiment_to_emotion.get(label, 'neutral')
-                confianza = emotion_response[0]['score']
-                print(f"😊 Emoción: {emocion_detectada} (confianza: {confianza:.2%})")
-        except Exception as e:
-            print(f"⚠️ Error en análisis emocional: {str(e)[:100]}")
-        # PASO 2: Generar audio con gTTS
-        print("🔊 Generando audio con Google TTS...")
-        if GTTS_AVAILABLE:
-            tts = gTTS(text=texto_audio, lang='es', slow=False)
-            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-            audio_path = f"audio_emocional_{emocion_detectada}_{timestamp}.mp3"
-            tts.save(audio_path)
-            if os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
-                print(f"✅ Audio generado: {audio_path} ({os.path.getsize(audio_path)} bytes)")
-                return audio_path, emocion_detectada, confianza
-        print("⚠️ Intentando método alternativo...")
-        return generar_audio_alternativo(texto, client)
-    except Exception as e:
-        print(f"❌ Error general: {str(e)}")
-        return None, "neutral", 0.5
-def generar_audio_alternativo(texto, client):
-    """Método alternativo usando HuggingFace TTS"""
-    emocion_detectada = "neutral"
-    confianza = 0.5
     texto_limpio = texto.replace("*", "").replace("#", "").replace("`", "").replace("€", " euros").strip()
     oraciones = re.split(r'[.!?]+', texto_limpio)
     oraciones = [o.strip() for o in oraciones if o.strip() and len(o.strip()) > 10]
-    texto_audio = ". ".join(oraciones[:3]) + "."
     if len(texto_audio) > 400:
         texto_audio = texto_audio[:397] + "..."
-    modelos_tts = ["facebook/mms-tts-spa"]
     for modelo in modelos_tts:
         try:
             print(f"🔊 Probando: {modelo}")
-            audio_data = client.text_to_speech(text=texto_audio, model=modelo)
             timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-            audio_path = f"audio_{timestamp}.wav"
             with open(audio_path, "wb") as f:
                 if isinstance(audio_data, bytes):
                     f.write(audio_data)
                 elif hasattr(audio_data, 'read'):
                     f.write(audio_data.read())
                 else:
                     for chunk in audio_data:
                         if chunk:
                             f.write(chunk if isinstance(chunk, bytes) else bytes(chunk))
-            if os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
-                print(f"✅ Audio generado con {modelo}")
-                return audio_path, emocion_detectada, confianza
-            else:
-                if os.path.exists(audio_path):
                     os.remove(audio_path)
         except Exception as e:
-            print(f"❌ Error con {modelo}: {str(e)[:100]}")
     return None, emocion_detectada, confianza
 # ============= ASISTENTE IA CONVERSACIONAL =============
@@ -213,16 +196,12 @@ Responde ahora:"""
                 f.write(f"\nArchivo de audio: {audio_path if audio_path else 'No generado'}\n")
                 f.write("=" * 60 + "\n")
-            if audio_path and os.path.exists(audio_path):
                 print(f"✅ Audio generado correctamente: {audio_path}")
-                return respuesta, audio_path, transcripcion_path, emocion, confianza
             else:
                 print("⚠️ No se pudo generar el audio, pero la respuesta está disponible")
-                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-                audio_vacio = f"audio_no_disponible_{timestamp}.mp3"
-                with open(audio_vacio, "w") as f:
-                    f.write("")
-                return respuesta, audio_vacio, transcripcion_path, emocion, confianza
         except Exception as e:
             print(f"❌ Error con {modelo}: {str(e)}")
@@ -1551,7 +1530,7 @@ with gr.Blocks(title="Extractor de Facturas con IA Avanzada") as demo:
                                 label="🎧 Reproducir respuesta en audio",
                                 type="filepath",
                                 visible=True,
-                                autoplay=True
                             )
                         with gr.Column():
                             transcripcion_output = gr.File(
@@ -1772,56 +1751,42 @@ with gr.Blocks(title="Extractor de Facturas con IA Avanzada") as demo:
         outputs=[pdf_output, pdf_status]
     )
-    # Asistente IA con análisis emocional
     def consultar_ia_con_loading(texto, pregunta):
-            if not texto:
-                return ("❌ Por favor, procesa una factura primero", None, None, "", gr.update(visible=False))
-            yield ("🔄 El asistente está analizando tu pregunta...", None, None, "", gr.update(visible=True))
-            time.sleep(0.3)
-            try:
-                respuesta, audio, transcripcion, emocion, confianza = asistente_ia_factura(texto, pregunta)
-                emotion_map = {
-                    "joy": ("😊", "#4CAF50", "Alegría"),
-                    "excitement": ("🎉", "#FF9800", "Emoción"),
-                    "anger": ("😠", "#F44336", "Enfado"),
-                    "sadness": ("😢", "#2196F3", "Tristeza"),
-                    "fear": ("😰", "#9C27B0", "Miedo"),
-                    "surprise": ("😮", "#FF5722", "Sorpresa"),
-                    "neutral": ("😐", "#607D8B", "Neutral")
-                }
-                emoji, color, nombre = emotion_map.get(emocion, ("😐", "#607D8B", "Neutral"))
-                emocion_info = f"""
-    ### 🎭 Análisis Emocional
-    <div style="background: linear-gradient(135deg, {color}22 0%, {color}44 100%); padding: 15px; border-radius: 10px; border-left: 4px solid {color};">
-        <p style="font-size: 18px; margin: 0;">
-            <strong style="color: {color};">{emoji} Emoción detectada: {nombre}</strong>
-        </p>
-        <p style="margin: 5px 0 0 0; color: #666;">
-            Nivel de confianza: {confianza:.1%}
-        </p>
-    </div>
-    """
-                audio_final = audio if (audio and os.path.exists(audio) and os.path.getsize(audio) > 100) else None
-                if audio_final:
-                    print(f"✅ Audio disponible: {audio_final}")
-                else:
-                    print("⚠️ Audio no disponible")
-                    emocion_info += "\n\n⚠️ *El audio no pudo generarse, pero la respuesta está en texto.*"
-                yield (respuesta, audio_final, transcripcion, emocion_info, gr.update(visible=False))
-            except Exception as e:
-                error_msg = f"❌ Error: {str(e)[:200]}"
-                print(f"Error completo: {str(e)}")
-                yield (error_msg, None, None, "", gr.update(visible=False))
     btn_consulta_ia.click(
         fn=consultar_ia_con_loading,

 import numpy as np
 import wave
+# ============= EXTRAER TEXTO DEL PDF =============
+def extraer_texto_pdf(pdf_file):
+    try:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        texto = ""
+        for pagina in pdf_reader.pages:
+            texto += pagina.extract_text() + "\n"
+        return texto
+    except Exception as e:
+        return f"Error: {str(e)}"
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN MEJORADO =============
 # ============= GENERAR AUDIO CON EMOCIÓN Y ANÁLISIS DE SENTIMIENTO =============
 def generar_audio_respuesta(texto, client):
+    """TTS emocional FUNCIONAL para español - Actualizado diciembre 2025"""
+    # Limpiar y preparar texto (mismo que antes)
     texto_limpio = texto.replace("*", "").replace("#", "").replace("`", "").replace("€", " euros").strip()
     oraciones = re.split(r'[.!?]+', texto_limpio)
     oraciones = [o.strip() for o in oraciones if o.strip() and len(o.strip()) > 10]
+    texto_audio = ". ".join(oraciones[:3]) + "." if len(oraciones) > 3 else ". ".join(oraciones) + "."
     if len(texto_audio) > 400:
         texto_audio = texto_audio[:397] + "..."
+    print(f"🎤 Generando audio para: '{texto_audio[:80]}...'")
+    # PASO 1: Análisis emocional (modelo español que SÍ funciona)
+    try:
+        print("🧠 Analizando emoción...")
+        emotion_response = client.text_classification(
+            text=texto_audio,
+            model="dariolopez/roberta-base-bne-finetuned-EmotionAnalysisSpanish"  # Español nativo
+        )
+        if emotion_response and len(emotion_response) > 0:
+            emocion_detectada = emotion_response[0]['label']
+            confianza = emotion_response[0]['score']
+            print(f"😊 Emoción: {emocion_detectada} (confianza: {confianza:.2%})")
+        else:
+            emocion_detectada = "neutral"
+            confianza = 0.5
+    except Exception as e:
+        print(f"⚠️ Error emocional: {str(e)[:100]}. Usando neutral.")
+        emocion_detectada = "neutral"
+        confianza = 0.5
+    # PASO 2: Modelos TTS que SÍ funcionan en 2025 (español prioritario)
+    modelos_tts = [
+        "facebook/mms-tts-spa",      # Español oficial de Meta - Siempre funciona
+        "myshell-ai/MeloTTS-Spanish", # Alta calidad, multi-idioma
+        "coqui/XTTS-v2"              # Fallback versátil (soporta español)
+    ]
     for modelo in modelos_tts:
         try:
             print(f"🔊 Probando: {modelo}")
+            # Generar audio
+            audio_data = client.text_to_speech(
+                text=texto_audio,
+                model=modelo
+            )
+            # Guardar archivo (mejorado para streams/bytes)
             timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+            audio_path = f"audio_emocional_{emocion_detectada}_{timestamp}.wav"
             with open(audio_path, "wb") as f:
                 if isinstance(audio_data, bytes):
                     f.write(audio_data)
                 elif hasattr(audio_data, 'read'):
                     f.write(audio_data.read())
+                elif hasattr(audio_data, 'content'):
+                    f.write(audio_data.content)
                 else:
+                    # Para iteradores/chunks
                     for chunk in audio_data:
                         if chunk:
                             f.write(chunk if isinstance(chunk, bytes) else bytes(chunk))
+            # Verificar
+            if os.path.exists(audio_path):
+                size = os.path.getsize(audio_path)
+                print(f"📁 Creado: {audio_path} ({size} bytes)")
+                if size > 2000:  # Umbral más bajo para MMS
+                    print(f"✅ ¡AUDIO GENERADO EXITOSAMENTE!")
+                    return audio_path, emocion_detectada, confianza
+                else:
+                    print(f"⚠️ Archivo pequeño ({size} bytes), borrando...")
                     os.remove(audio_path)
         except Exception as e:
+            error_msg = str(e)
+            print(f"❌ Error con {modelo}: {error_msg[:100]}")
+            continue
+    print("⚠️ No se generó audio. Verifica límites de API o conexión.")
     return None, emocion_detectada, confianza
 # ============= ASISTENTE IA CONVERSACIONAL =============
                 f.write(f"\nArchivo de audio: {audio_path if audio_path else 'No generado'}\n")
                 f.write("=" * 60 + "\n")
+            if audio_path:
                 print(f"✅ Audio generado correctamente: {audio_path}")
             else:
                 print("⚠️ No se pudo generar el audio, pero la respuesta está disponible")
+            return respuesta, audio_path, transcripcion_path, emocion, confianza
         except Exception as e:
             print(f"❌ Error con {modelo}: {str(e)}")
                                 label="🎧 Reproducir respuesta en audio",
                                 type="filepath",
                                 visible=True,
+                                autoplay=False
                             )
                         with gr.Column():
                             transcripcion_output = gr.File(
         outputs=[pdf_output, pdf_status]
     )
+# Asistente IA con análisis emocional
     def consultar_ia_con_loading(texto, pregunta):
+        if not texto:
+            return ("❌ Por favor, procesa una factura primero", None, None, "", gr.update(visible=False))
+        yield ("🔄 El asistente está analizando tu pregunta...", None, None, "", gr.update(visible=True))
+        time.sleep(0.3)
+        respuesta, audio, transcripcion, emocion, confianza = asistente_ia_factura(texto, pregunta)
+        # Mapeo de emociones a emojis y colores
+        emotion_map = {
+            "joy": ("😊", "#4CAF50", "Alegría"),
+            "excitement": ("🎉", "#FF9800", "Emoción"),
+            "anger": ("😠", "#F44336", "Enfado"),
+            "sadness": ("😢", "#2196F3", "Tristeza"),
+            "fear": ("😰", "#9C27B0", "Miedo"),
+            "surprise": ("😮", "#FF5722", "Sorpresa"),
+            "neutral": ("😐", "#607D8B", "Neutral")
+        }
+        emoji, color, nombre = emotion_map.get(emocion, ("😐", "#607D8B", "Neutral"))
+        emocion_info = f"""
+### 🎭 Análisis Emocional
+<div style="background: linear-gradient(135deg, {color}22 0%, {color}44 100%); padding: 15px; border-radius: 10px; border-left: 4px solid {color};">
+    <p style="font-size: 18px; margin: 0;">
+        <strong style="color: {color};">{emoji} Emoción detectada: {nombre}</strong>
+    </p>
+    <p style="margin: 5px 0 0 0; color: #666;">
+        Nivel de confianza: {confianza:.1%}
+    </p>
+</div>
+"""
+        yield (respuesta, audio, transcripcion, emocion_info, gr.update(visible=False))
     btn_consulta_ia.click(
         fn=consultar_ia_con_loading,