""" VoiceKit - MCP Server for Voice Analysis 6 MCP tools for voice processing (all accept base64 audio): - Embedding extraction, voice comparison, acoustic analysis - Speech-to-text, voice isolation, similarity analysis MCP Endpoint: https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse """ import gradio as gr import base64 import os import json import tempfile import math import re # Set Gradio temp directory to current directory GRADIO_TEMP_DIR = os.path.join(os.getcwd(), "gradio_temp") os.makedirs(GRADIO_TEMP_DIR, exist_ok=True) os.environ['GRADIO_TEMP_DIR'] = GRADIO_TEMP_DIR tempfile.tempdir = GRADIO_TEMP_DIR # Modal connection (requires MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets) try: import modal AudioAnalyzer = modal.Cls.from_name("voice-semantle", "AudioAnalyzer") analyzer = AudioAnalyzer() modal_available = True print("Modal connected!") except Exception as e: modal_available = False analyzer = None print(f"Modal not available: {e}") # Load README.md and convert to HTML def load_readme_as_html(): """Load README.md and convert markdown to HTML""" try: with open("README.md", "r", encoding="utf-8") as f: content = f.read() # Remove YAML front matter content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL) html = content # Headers html = re.sub(r'^### (.+)$', r'

\1

', html, flags=re.MULTILINE) html = re.sub(r'^## (.+)$', r'

\1

', html, flags=re.MULTILINE) html = re.sub(r'^# (.+)$', r'

\1

', html, flags=re.MULTILINE) # Code blocks - preserve content without adding extra newlines def format_code_block(match): code = match.group(2).strip() # Replace internal newlines with a placeholder, then restore after processing # This prevents the paragraph logic from adding extra breaks code_escaped = code.replace('\n', '') return f'

{code_escaped}

' html = re.sub(r'```(\w*)\n(.*?)```', format_code_block, html, flags=re.DOTALL) # Images - convert relative paths to HuggingFace raw file URLs # Handle both tags and markdown image syntax HF_BASE_URL = "https://huggingface.co/spaces/MCP-1st-Birthday/voicekit/resolve/main" def convert_image_path(match): src = match.group(1) # If it's a relative path (not starting with http), convert to HF URL if not src.startswith('http'): src = f"{HF_BASE_URL}/{src}" return f'

' html = re.sub(r'

]*>', convert_image_path, html) # Inline code (but not inside

 blocks)
        html = re.sub(r'`([^`]+)`', r'\1', html)

        # Bold
        html = re.sub(r'\*\*(.+?)\*\*', r'\1', html)

        # Links
        html = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', html)

        # Tables
        lines = html.split('\n')
        in_table = False
        table_html = []
        new_lines = []

        for line in lines:
            if '|' in line and line.strip().startswith('|'):
                if not in_table:
                    in_table = True
                    table_html = ['']

                if re.match(r'^\|[\s\-:|]+\|$', line.strip()):
                    continue

                cells = [c.strip() for c in line.strip().split('|')[1:-1]]
                if len(table_html) == 1:
                    table_html.append('')
                    for cell in cells:
                        table_html.append(f'')
                    table_html.append('')
                else:
                    table_html.append('')
                    for cell in cells:
                        table_html.append(f'')
                    table_html.append('')
            else:
                if in_table:
                    table_html.append('{cell}
{cell}')
                    new_lines.append(''.join(table_html))
                    table_html = []
                    in_table = False
                new_lines.append(line)

        if in_table:
            table_html.append('')
            new_lines.append(''.join(table_html))

        html = '\n'.join(new_lines)

        # Lists
        html = re.sub(r'^- (.+)$', r'\1', html, flags=re.MULTILINE)
        html = re.sub(r'(.*
\n?)+', r'\g<0>', html)

        # Paragraphs - skip lines that are inside pre/code blocks
        lines = html.split('\n')
        result = []
        for line in lines:
            stripped = line.strip()
            if stripped and not stripped.startswith('<') and not stripped.startswith('```'):
                result.append(f'{stripped}')
            else:
                result.append(line)

        # Join and restore newlines in code blocks
        final_html = '\n'.join(result)
        final_html = final_html.replace('', '\n')

        # Escape curly braces for f-string compatibility
        final_html = final_html.replace('{', '{{').replace('}', '}}')
        return final_html
    except Exception as e:
        return f"Error loading README: {e}"

readme_html = load_readme_as_html()


def file_to_base64(file_path: str) -> str:
    """Convert file path to base64 string"""
    if not file_path:
        return ""
    with open(file_path, "rb") as f:
        return base64.b64encode(f.read()).decode()


# ============================================================================
# MCP Tools (all accept base64 directly)
# ============================================================================

def extract_embedding(audio_base64: str) -> str:
    """
    Extract voice embedding using Wav2Vec2.

    Returns a 768-dimensional vector representing voice characteristics.
    Useful for voice comparison, speaker identification, etc.

    Args:
        audio_base64: Audio file as base64 encoded string

    Returns:
        embedding (768-dim list), model, dim
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not audio_base64:
        return json.dumps({"error": "No audio provided"})

    try:
        result = analyzer.extract_embedding.remote(audio_base64)
        if "embedding" in result:
            result["embedding_preview"] = result["embedding"][:5] + ["..."]
            result["embedding_length"] = len(result["embedding"])
            del result["embedding"]
        return json.dumps(result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


def match_voice(audio1_base64: str, audio2_base64: str) -> str:
    """
    Compare similarity between two voices.

    Extracts Wav2Vec2 embeddings and calculates cosine similarity.
    Useful for checking if the same person spoke with similar tone.

    Args:
        audio1_base64: First audio as base64 encoded string
        audio2_base64: Second audio as base64 encoded string

    Returns:
        similarity (0-1), tone_score (0-100)
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not audio1_base64 or not audio2_base64:
        return json.dumps({"error": "Both audio files required"})

    try:
        result = analyzer.compare_voices.remote(audio1_base64, audio2_base64)
        return json.dumps(result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


def analyze_acoustics(audio_base64: str) -> str:
    """
    Analyze acoustic features of audio.

    Extracts pitch, energy, rhythm, tempo, and spectral characteristics.
    Useful for understanding voice expressiveness and characteristics.

    Args:
        audio_base64: Audio file as base64 encoded string

    Returns:
        pitch, energy, rhythm, tempo, spectral information
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not audio_base64:
        return json.dumps({"error": "No audio provided"})

    try:
        result = analyzer.analyze_acoustic_features.remote(audio_base64)
        return json.dumps(result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


def transcribe_audio(audio_base64: str, language: str = "en") -> str:
    """
    Convert audio to text (Speech-to-Text).

    Uses ElevenLabs Scribe v1 model for high-quality speech recognition.
    Supports various languages.

    Args:
        audio_base64: Audio file as base64 encoded string
        language: Language code (e.g., "en", "ko", "ja"). Default is "en"

    Returns:
        text, language, model
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not audio_base64:
        return json.dumps({"error": "No audio provided"})

    try:
        result = analyzer.transcribe_audio.remote(audio_base64, language)
        return json.dumps(result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


def isolate_voice(audio_base64: str) -> str:
    """
    Remove background music (BGM) and extract voice only.

    Uses ElevenLabs Voice Isolator to remove music, noise, etc.
    Useful for memes, songs, and other audio with background sounds.

    Args:
        audio_base64: Audio file as base64 encoded string

    Returns:
        isolated_audio_base64, metadata (bgm_detected, sizes, duration)
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not audio_base64:
        return json.dumps({"error": "No audio provided"})

    try:
        result = analyzer.isolate_voice.remote(audio_base64)
        return json.dumps(result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


def grade_voice(
    user_audio_base64: str,
    reference_audio_base64: str,
    reference_text: str = "",
    category: str = "meme"
) -> str:
    """
    Comprehensively compare and analyze user voice with reference voice.

    Evaluates with 5 metrics:
    - pronunciation: Pronunciation accuracy (STT-based)
    - tone: Voice timbre similarity (Wav2Vec2 embedding)
    - pitch: Pitch matching
    - rhythm: Rhythm sense
    - energy: Energy expressiveness

    Args:
        user_audio_base64: User audio as base64 encoded string
        reference_audio_base64: Reference audio as base64 encoded string
        reference_text: Reference text (optional, enables pronunciation scoring)
        category: Category (meme, song, movie) - determines weights

    Returns:
        overall_score, metrics, weak_points, strong_points, feedback
    """
    if not modal_available:
        return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
    if not user_audio_base64 or not reference_audio_base64:
        return json.dumps({"error": "Both user and reference audio required"})

    try:
        result = analyzer.analyze_audio.remote(
            user_audio_base64=user_audio_base64,
            reference_audio_base64=reference_audio_base64,
            reference_text=reference_text if reference_text else None,
            challenge_id="mcp_analysis",
            category=category,
        )
        # Simplify output for backend/API use
        metrics = result.get("metrics", {})
        simple_result = {
            "pitch": metrics.get("pitch", 0),
            "rhythm": metrics.get("rhythm", 0),
            "energy": metrics.get("energy", 0),
            "pronunciation": metrics.get("pronunciation", 0),
            "transcript": metrics.get("transcript", 0),
            "overall": result.get("overall_score", 0),
            "user_text": result.get("user_text", "")
        }
        return json.dumps(simple_result, ensure_ascii=False, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})


# ============================================================================
# Demo Functions for UI
# ============================================================================

def demo_acoustic_analysis(audio_file):
    """Acoustic Analysis - Analyze pitch, energy, rhythm, tempo"""
    if not audio_file:
        return create_acoustic_empty()

    audio_b64 = file_to_base64(audio_file)
    result_json = analyze_acoustics(audio_b64)

    try:
        result = json.loads(result_json)
        if "error" in result:
            return f'''
                Error in result:
{result.get("error", "Unknown error")}
            '''
        return create_acoustic_visualization(result)
    except Exception as e:
        return f'''
            Parsing Error: {str(e)}


            Raw Result (first 500 chars):

            {result_json[:500]}
        '''


def demo_transcribe_audio(audio_file, language):
    """Audio Transcription"""
    if not audio_file:
        return create_transcription_empty()

    audio_b64 = file_to_base64(audio_file)
    result_json = transcribe_audio(audio_b64, language)

    try:
        result = json.loads(result_json)
        if "error" in result:
            return create_transcription_empty()
        text = result.get("text", "")
        return create_transcription_visualization(text)
    except:
        return create_transcription_empty()


def demo_clean_extraction(audio_file):
    """Clean Audio Extraction - returns audio file only"""
    if not audio_file:
        return None

    audio_b64 = file_to_base64(audio_file)
    result_json = isolate_voice(audio_b64)

    try:
        result = json.loads(result_json)
        if "error" in result:
            return None

        # Convert isolated audio base64 back to file
        import tempfile
        isolated_audio_bytes = base64.b64decode(result["isolated_audio_base64"])
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            tmp.write(isolated_audio_bytes)
            isolated_audio_path = tmp.name

        return isolated_audio_path
    except:
        return None


def demo_extract_embedding(audio_file):
    """Extract Embedding - extract voice fingerprint"""
    if not audio_file:
        return create_embedding_empty()

    audio_b64 = file_to_base64(audio_file)
    result_json = extract_embedding(audio_b64)

    try:
        result = json.loads(result_json)
        if "error" in result:
            return f'''
                Error in result:
{result.get("error", "Unknown error")}
            '''
        return create_embedding_visualization(result)
    except Exception as e:
        return f'''
            Parsing Error: {str(e)}


            Raw Result (first 500 chars):

            {result_json[:500]}
        '''


def demo_match_voice(audio1, audio2):
    """Compare Voices - compare two voice similarities"""
    if not audio1 or not audio2:
        return create_compare_empty()

    audio1_b64 = file_to_base64(audio1)
    audio2_b64 = file_to_base64(audio2)
    result_json = match_voice(audio1_b64, audio2_b64)

    try:
        result = json.loads(result_json)
        if "error" in result:
            return create_compare_empty()
        return create_compare_visualization(result)
    except:
        return create_compare_empty()


def demo_voice_similarity(user_audio, ref_audio):
    """Voice Similarity - comprehensive 5-metric analysis"""
    if not user_audio or not ref_audio:
        return create_similarity_empty()

    user_b64 = file_to_base64(user_audio)
    ref_b64 = file_to_base64(ref_audio)
    result_json = grade_voice(user_b64, ref_b64, "", "meme")

    try:
        result = json.loads(result_json)
        if "error" in result:
            return create_similarity_empty()
        return create_similarity_visualization(result)
    except:
        return create_similarity_empty()


# ============================================================================
# Visualization Functions
# ============================================================================

def create_acoustic_empty():
    """Empty state for acoustic analysis"""
    return """
    
        
            
        
        
            Upload audio to analyze acoustic features
        
    
    """


def create_acoustic_visualization(result):
    """Acoustic analysis visualization with radar chart"""
    pitch = result.get("pitch", {})
    energy = result.get("energy", {})
    rhythm = result.get("rhythm", {})
    tempo = result.get("tempo", 0)
    spectral = result.get("spectral", {})

    # Use pre-calculated scores from Modal backend (already 0-100)
    pitch_norm = pitch.get("score", 0)
    energy_norm = energy.get("score", 0)
    rhythm_norm = rhythm.get("score", 0)
    spectral_norm = spectral.get("score", 0)

    # Tempo: normalize BPM to 0-100 (60-180 BPM range)
    tempo_bpm = tempo
    tempo_norm = min(100, max(0, (tempo_bpm - 60) / 120 * 100)) if tempo_bpm > 0 else 0

    # Radar chart calculation
    center_x, center_y = 150, 150
    radius = 110

    # 5 metrics in order: Pitch(top), Energy(top-right), Rhythm(bottom-right), Tempo(bottom-left), Spectral(top-left)
    metrics = [
        ("Pitch", pitch_norm, -90),      # 0° - 90° = -90° (top)
        ("Energy", energy_norm, -18),    # 72° - 90° = -18° (top-right)
        ("Rhythm", rhythm_norm, 54),     # 144° - 90° = 54° (bottom-right)
        ("Tempo", tempo_norm, 126),      # 216° - 90° = 126° (bottom-left)
        ("Spectral", spectral_norm, 198) # 288° - 90° = 198° (top-left)
    ]

    # Calculate polygon points for data
    data_points = []
    for _, value, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        point_radius = (value / 100) * radius
        x = center_x + point_radius * math.cos(angle_rad)
        y = center_y + point_radius * math.sin(angle_rad)
        data_points.append(f"{x:.2f},{y:.2f}")

    # Background concentric pentagons (20, 40, 60, 80, 100)
    def create_pentagon_points(scale):
        points = []
        for _, _, angle_deg in metrics:
            angle_rad = math.radians(angle_deg)
            r = radius * scale
            x = center_x + r * math.cos(angle_rad)
            y = center_y + r * math.sin(angle_rad)
            points.append(f"{x:.2f},{y:.2f}")
        return " ".join(points)

    background_pentagons = ""
    for scale in [0.2, 0.4, 0.6, 0.8, 1.0]:
        background_pentagons += f''

    # Axis lines from center to vertices
    axis_lines = ""
    for _, _, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        x = center_x + radius * math.cos(angle_rad)
        y = center_y + radius * math.sin(angle_rad)
        axis_lines += f''

    # Labels at vertices
    labels = ""
    for label, value, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        # Position label outside the pentagon
        label_radius = radius + 25
        x = center_x + label_radius * math.cos(angle_rad)
        y = center_y + label_radius * math.sin(angle_rad)
        labels += f'''
            {label}
            {int(value)}
        '''

    return f"""
    
        
    
    """


def create_mimicry_empty():
    """Empty state for voice mimicry game"""
    return """
    
        
            
        
        
            Upload reference and your voice to see similarity scores
        
    
    """


def create_mimicry_visualization(result):
    """Voice mimicry score visualization with progress bars"""
    pronunciation = result.get("pronunciation", 0)
    tone = result.get("transcript", 0)  # Tone score
    pitch = result.get("pitch", 0)
    rhythm = result.get("rhythm", 0)
    energy = result.get("energy", 0)

    def create_progress_bar(label, value):
        return f"""
        
            
                {label}
                
                    
                
            
            {value}
        
        """

    return f"""
    
        
            
                
            
            
                CLAUDE
                
                    Wow, that voice input, takes analytical skills of course but I'll handle it
                
            
        

        
            {create_progress_bar("Pronunciation", pronunciation)}
            {create_progress_bar("Tone", tone)}
            {create_progress_bar("Pitch", pitch)}
            {create_progress_bar("Rhythm", rhythm)}
            {create_progress_bar("Energy", energy)}
        
    
    """


def create_transcription_empty():
    """Empty state for transcription"""
    return """
    
        Upload audio to transcribe
    
    """


def create_transcription_visualization(text):
    """Simple text display for transcription result"""
    return f"""
    
        {text if text else "Transcription completed"}
    
    """


def create_embedding_empty():
    """Empty state for embedding extraction"""
    return """
    
        
            
        
        
            Upload audio to extract voice embedding
        
    
    """


def create_embedding_visualization(result):
    """Embedding extraction visualization"""
    model = result.get("model", "Wav2Vec2")
    dim = result.get("embedding_length", result.get("dim", 768))
    preview = result.get("embedding_preview", [])

    # Filter only numeric values to avoid format errors with strings like "..."
    if preview:
        numeric_preview = [v for v in preview if isinstance(v, (int, float))]
        preview_str = ", ".join([f"{v:.4f}" for v in numeric_preview]) if numeric_preview else "..."
    else:
        preview_str = "..."

    return f"""
    
        
            Model
            {model}
        
        
            Dimensions
            {dim}
        
        
            Preview
            
                [{preview_str}]
            
        
    
    """


def create_compare_empty():
    """Empty state for voice comparison"""
    return """
    
        
            
        
        
            Upload two audio files to compare voices
        
    
    """


def create_compare_visualization(result):
    """Voice comparison visualization with similarity score"""
    similarity = result.get("similarity", 0)
    tone_score = result.get("tone_score", 0)

    # Convert similarity to percentage
    similarity_pct = int(similarity * 100)

    # Color based on similarity - Purple theme matching VOICE SIMILARITY
    if similarity_pct >= 80:
        color = "#c084fc"  # Light purple (high score)
    elif similarity_pct >= 60:
        color = "#a855f7"  # Medium purple (medium score)
    else:
        color = "#7c3aed"  # Dark purple (low score)

    return f"""
    
        
            
                {similarity_pct}
                SIMILARITY
            
        
    
    """


def create_similarity_empty():
    """Empty state for voice similarity analysis"""
    return """
    
        
            
        
        
            Upload audio files for comprehensive similarity analysis
        
    
    """


def create_similarity_visualization(result):
    """Voice similarity visualization with radar chart"""
    overall = result.get("overall", 0)

    pronunciation = result.get("pronunciation", 0)
    transcript = result.get("transcript", 0)
    pitch = result.get("pitch", 0)
    rhythm = result.get("rhythm", 0)
    energy = result.get("energy", 0)

    # Color based on overall score - Purple theme
    if overall >= 80:
        color = "#c084fc"  # Light purple (high score)
    elif overall >= 60:
        color = "#a855f7"  # Medium purple (medium score)
    else:
        color = "#7c3aed"  # Dark purple (low score)

    # Radar chart calculation
    center_x, center_y = 150, 150
    radius = 110

    # 5 metrics in order: Pronunciation(top), Transcript(top-right), Pitch(bottom-right), Energy(bottom-left), Rhythm(top-left)
    metrics = [
        ("Pronunciation", pronunciation, -90),   # 0° - 90° = -90° (top)
        ("Transcript", transcript, -18),         # 72° - 90° = -18° (top-right)
        ("Pitch", pitch, 54),                    # 144° - 90° = 54° (bottom-right)
        ("Energy", energy, 126),                 # 216° - 90° = 126° (bottom-left)
        ("Rhythm", rhythm, 198)                  # 288° - 90° = 198° (top-left)
    ]

    # Calculate polygon points for data
    data_points = []
    for _, value, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        point_radius = (value / 100) * radius
        x = center_x + point_radius * math.cos(angle_rad)
        y = center_y + point_radius * math.sin(angle_rad)
        data_points.append(f"{x:.2f},{y:.2f}")

    # Background concentric pentagons (20, 40, 60, 80, 100)
    def create_pentagon_points(scale):
        points = []
        for _, _, angle_deg in metrics:
            angle_rad = math.radians(angle_deg)
            r = radius * scale
            x = center_x + r * math.cos(angle_rad)
            y = center_y + r * math.sin(angle_rad)
            points.append(f"{x:.2f},{y:.2f}")
        return " ".join(points)

    background_pentagons = ""
    for scale in [0.2, 0.4, 0.6, 0.8, 1.0]:
        background_pentagons += f''

    # Axis lines from center to vertices
    axis_lines = ""
    for _, _, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        x = center_x + radius * math.cos(angle_rad)
        y = center_y + radius * math.sin(angle_rad)
        axis_lines += f''

    # Labels at vertices
    labels = ""
    for label, value, angle_deg in metrics:
        angle_rad = math.radians(angle_deg)
        # Position label outside the pentagon
        label_radius = radius + 25
        x = center_x + label_radius * math.cos(angle_rad)
        y = center_y + label_radius * math.sin(angle_rad)
        labels += f'''
            {label}
            {value}
        '''

    return f"""
    
        
        
            
                
                    {overall}
                    OVERALL
                
            
        

        
        
            
        
    
    """


# Clean audio functions removed - using gr.Audio component directly




# ============================================================================
# Gradio Interface with Dark Theme
# ============================================================================

custom_css = """
/* ===== DARK THEME STYLING (CSS-ONLY) ===== */
/* This CSS forces dark mode appearance regardless of system/Gradio theme */
/* All colors are SOLID (not rgba/transparent) to ensure consistent appearance */

:root {
    color-scheme: dark !important;
    --body-background-fill: #0a0a1a !important;
    --background-fill-primary: #0d0d1a !important;
    --background-fill-secondary: #12122a !important;
    --block-background-fill: #0d0d1a !important;
    --input-background-fill: #1a1a35 !important;
    --body-text-color: #e0e7ff !important;
    --block-title-text-color: #a5b4fc !important;
    --block-label-text-color: #a5b4fc !important;
    --input-text-color: #e0e7ff !important;
    --neutral-50: #0a0a1a !important;
    --neutral-100: #0d0d1a !important;
    --neutral-200: #12122a !important;
    --neutral-300: #1a1a35 !important;
    --neutral-400: #2d2d4a !important;
    --neutral-500: #4a4a6a !important;
    --neutral-600: #7c7c9a !important;
    --neutral-700: #a5b4fc !important;
    --neutral-800: #c7d2fe !important;
    --neutral-900: #e0e7ff !important;
    --neutral-950: #ffffff !important;
}

/* Force dark mode on html and body */
html, body {
    background: #0a0a1a !important;
    background-color: #0a0a1a !important;
    color: #e0e7ff !important;
}

/* ===== GLOBAL STYLES ===== */
body {
    background: linear-gradient(180deg, #0a0a1a 0%, #0f0f23 100%) !important;
    background-color: #0a0a1a !important;
    color: #ffffff !important;
    font-family: system-ui, -apple-system, sans-serif;
}

/* Override Gradio's light mode backgrounds AND text colors */
.dark, .light, [data-theme="light"], [data-theme="dark"],
html[data-theme="light"], html[data-theme="dark"],
body.light, body.dark {
    --body-background-fill: #0a0a1a !important;
    --background-fill-primary: #0d0d1a !important;
    --background-fill-secondary: #12122a !important;
    --block-background-fill: #0d0d1a !important;
    --input-background-fill: #1a1a35 !important;
    --body-text-color: #e0e7ff !important;
    --block-title-text-color: #a5b4fc !important;
    --block-label-text-color: #a5b4fc !important;
    --input-text-color: #e0e7ff !important;
    --neutral-50: #0a0a1a !important;
    --neutral-100: #0d0d1a !important;
    --neutral-200: #12122a !important;
    --neutral-300: #1a1a35 !important;
    --neutral-400: #2d2d4a !important;
    --neutral-500: #4a4a6a !important;
    --neutral-600: #7c7c9a !important;
    --neutral-700: #a5b4fc !important;
    --neutral-800: #c7d2fe !important;
    --neutral-900: #e0e7ff !important;
    --neutral-950: #ffffff !important;
    color: #e0e7ff !important;
    background: #0a0a1a !important;
    background-color: #0a0a1a !important;
}

.gradio-container {
    max-width: 100% !important;
    width: 100% !important;
    padding: 0px 16px 20px 16px !important;
    background: #0a0a1a !important;
    background-color: #0a0a1a !important;
    margin: 0 !important;
}

.gradio-container > .main,
.gradio-container .main,
.main {
    max-width: 100% !important;
    width: 100% !important;
    padding-left: 0 !important;
    padding-right: 0 !important;
    margin: 0 auto !important;
}

.contain {
    max-width: 100% !important;
    padding: 0 !important;
}

/* Force full width on all Gradio internal containers */
.gradio-container > div,
.gradio-container > div > div,
#component-0,
.wrap,
.app,
.contain,
footer,
.gradio-row,
.gradio-column,
.svelte-1gfkn6j,
[class*="svelte-"] {
    max-width: 100% !important;
}

.gradio-row {
    max-width: 100% !important;
    width: 100% !important;
    margin: 0 !important;
    padding: 0 !important;
}

/* ===== HEADER (FLOATING, NO CARD) ===== */
.header-main {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 0;
    padding: 0;
}

.header-left {
    display: flex;
    align-items: center;
    gap: 16px;
}

.header-icon {
    font-size: 48px;
    filter: drop-shadow(0 4px 12px rgba(99, 102, 241, 0.6));
}

.header-title {
    font-size: 42px;
    font-weight: 900;
    color: #e0e7ff;
    margin: 0;
    letter-spacing: -0.5px;
}

.header-subtitle {
    color: #c7d2fe;
    font-size: 20px;
    font-weight: 700;
    margin-left: 6px;
}

/* ===== DOCS BUTTON ===== */
.docs-button {
    display: flex;
    align-items: center;
    gap: 8px;
    padding: 10px 20px;
    background: linear-gradient(135deg, rgba(124, 58, 237, 0.3), rgba(99, 102, 241, 0.3));
    border: 1px solid rgba(124, 58, 237, 0.5);
    border-radius: 12px;
    color: #e0e7ff;
    font-size: 14px;
    font-weight: 600;
    cursor: pointer;
    transition: all 0.3s ease;
    text-transform: uppercase;
    letter-spacing: 0.5px;
}

.docs-button:hover {
    background: linear-gradient(135deg, rgba(124, 58, 237, 0.5), rgba(99, 102, 241, 0.5));
    border-color: rgba(124, 58, 237, 0.8);
    transform: translateY(-2px);
    box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4);
}

.docs-button svg {
    width: 18px;
    height: 18px;
}

/* ===== DOCS MODAL ===== */
.docs-modal-overlay {
    display: none;
    position: fixed !important;
    inset: 0 !important;
    width: 100vw !important;
    height: 100vh !important;
    background: rgba(0, 0, 0, 0.85) !important;
    backdrop-filter: blur(10px) !important;
    z-index: 99999 !important;
    justify-content: center !important;
    align-items: flex-start !important;
    padding-top: 60px !important;
    box-sizing: border-box !important;
    /* Modal positioned near top of viewport */
    overflow: hidden !important;
}

.docs-modal-overlay.active {
    display: flex !important;
}

.docs-modal {
    position: relative !important;
    background: #0d0d1a !important;
    border: 2px solid #7c3aed !important;
    border-radius: 20px !important;
    width: calc(100vw - 80px) !important;
    max-width: 1200px !important;
    max-height: 55vh !important;
    overflow: hidden !important;
    box-shadow: 0 25px 80px rgba(0, 0, 0, 0.9) !important;
    /* Remove margin that could affect centering */
    margin: 0 !important;
    /* Prevent any transform inheritance issues */
    transform: none !important;
}

.docs-modal-header {
    display: flex !important;
    justify-content: space-between !important;
    align-items: center !important;
    padding: 20px 24px !important;
    border-bottom: 2px solid #7c3aed !important;
    background: #1a1a2e !important;
}

.docs-modal-title {
    font-size: 20px;
    font-weight: 700;
    color: #e0e7ff;
    display: flex;
    align-items: center;
    gap: 10px;
}

.docs-modal-close {
    background: rgba(124, 58, 237, 0.3);
    border: 2px solid rgba(124, 58, 237, 0.5);
    border-radius: 12px;
    color: #e0e7ff;
    font-size: 28px;
    font-weight: 300;
    cursor: pointer;
    padding: 4px 14px;
    line-height: 1;
    transition: all 0.2s;
}

.docs-modal-close:hover {
    background: rgba(124, 58, 237, 0.4);
    border-color: rgba(124, 58, 237, 0.6);
}

.docs-modal-content {
    padding: 24px !important;
    overflow-y: auto !important;
    max-height: calc(55vh - 80px) !important;
    color: #c7d2fe !important;
    font-size: 15px !important;
    line-height: 1.7 !important;
    background: #0d0d1a !important;
}

.docs-modal-content h1 { font-size: 28px; color: #e0e7ff; margin: 0 0 16px 0; padding-bottom: 12px; border-bottom: 2px solid rgba(124, 58, 237, 0.3); }
.docs-modal-content h2 { font-size: 22px; color: #e0e7ff; margin: 24px 0 12px 0; }
.docs-modal-content h3 { font-size: 18px; color: #a5b4fc; margin: 20px 0 10px 0; }
.docs-modal-content p { margin: 12px 0; }
.docs-modal-content ul, .docs-modal-content ol { margin: 12px 0; padding-left: 24px; }
.docs-modal-content li { margin: 6px 0; }
.docs-modal-content code { background: rgba(124, 58, 237, 0.2); padding: 2px 6px; border-radius: 4px; font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; font-size: 13px; color: #c4b5fd; }
.docs-modal-content pre { background: rgba(0, 0, 0, 0.4); border: 1px solid rgba(124, 58, 237, 0.2); border-radius: 12px; padding: 16px; overflow-x: auto; margin: 16px 0; white-space: pre; }
.docs-modal-content pre code { background: transparent; padding: 0; color: #a5b4fc; white-space: pre; display: block; }
.docs-modal-content table { width: 100%; border-collapse: collapse; margin: 16px 0; }
.docs-modal-content th, .docs-modal-content td { padding: 10px 12px; text-align: left; border: 1px solid rgba(124, 58, 237, 0.2); }
.docs-modal-content th { background: rgba(124, 58, 237, 0.15); color: #e0e7ff; font-weight: 600; }
.docs-modal-content td { color: #c7d2fe; }
.docs-modal-content a { color: #a78bfa; text-decoration: none; }
.docs-modal-content a:hover { text-decoration: underline; }
.docs-modal-content strong { color: #e0e7ff; }
.docs-modal-content img { max-width: 100%; max-height: 400px; height: auto; border-radius: 8px; margin: 12px 0; object-fit: contain; }

/* ===== CARD STYLES ===== */
.card {
    background: #0f0f23 !important;
    background-color: #0f0f23 !important;
    border: 1px solid #3d2a6b !important;
    border-radius: 20px;
    padding: 30px;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
    transition: all 0.3s ease;
    height: 100%;
    display: flex;
    flex-direction: column;
}

.card:hover {
    border-color: #5b3d99 !important;
    box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3);
}

/* Ensure columns in top row have equal height */
.gradio-row:first-of-type .gradio-column {
    display: flex !important;
    flex-direction: column !important;
}

.gradio-row:first-of-type .gradio-column > div {
    flex: 1 !important;
    display: flex !important;
    flex-direction: column !important;
}

/* Set minimum height for top row cards */
.gradio-row:first-of-type .card {
    min-height: 550px;
}

.card-title {
    font-size: 16px;
    font-weight: 700;
    color: #a5b4fc;
    text-transform: uppercase;
    letter-spacing: 1px;
    margin-bottom: 20px;
    display: flex;
    align-items: center;
}

/* ===== ROW SPACING ===== */
.gradio-row {
    gap: 24px !important;
}

/* ===== QUICK START - CODE BLOCK (TERMINAL/IDE STYLE) ===== */
.terminal-window {
    background: #1a1b26;
    border: 1px solid rgba(124, 58, 237, 0.3);
    border-radius: 12px;
    overflow: hidden;
    margin-bottom: 16px;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.6);
}

.terminal-header {
    background: #16161e;
    padding: 12px 16px;
    display: flex;
    align-items: center;
    justify-content: space-between;
    border-bottom: 1px solid rgba(124, 58, 237, 0.2);
}

.terminal-dots {
    display: flex;
    gap: 8px;
}

.terminal-dot {
    width: 12px;
    height: 12px;
    border-radius: 50%;
}

.terminal-dot.red {
    background: #ff5f56 !important;
    box-shadow: 0 0 8px rgba(255, 95, 86, 0.8) !important;
}

.terminal-dot.yellow {
    background: #ffbd2e !important;
    box-shadow: 0 0 8px rgba(255, 189, 46, 0.8) !important;
}

.terminal-dot.green {
    background: #27c93f !important;
    box-shadow: 0 0 8px rgba(39, 201, 63, 0.8) !important;
}

.terminal-title {
    font-size: 12px;
    color: #6b7280;
    font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
    font-weight: 500;
}

.terminal-body {
    background: #1a1b26;
    padding: 0;
    display: flex;
}

.line-numbers {
    background: #16161e;
    padding: 16px 12px;
    border-right: 1px solid rgba(124, 58, 237, 0.15);
    user-select: none;
    text-align: right;
    min-width: 48px;
}

.line-num {
    display: block;
    color: #4a5568;
    font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
    font-size: 14px;
    line-height: 1.8;
}

.code-content {
    flex: 1;
    padding: 16px 20px;
    overflow-x: auto;
}

.code-line {
    display: block;
    white-space: pre;
    font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
    font-size: 14px;
    line-height: 1.8;
    color: #a9b1d6;
}

.json-key {
    color: #7dcfff;
    font-weight: 500;
}

.json-string {
    color: #9ece6a;
}

.json-bracket {
    color: #bb9af7;
    font-weight: 600;
}

.json-colon {
    color: #c0caf5;
}

.json-comma {
    color: #c0caf5;
}

.copy-button {
    width: 100%;
    background: linear-gradient(135deg, #7c3aed, #6366f1) !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 14px 24px !important;
    font-weight: 700 !important;
    font-size: 13px !important;
    color: white !important;
    text-transform: uppercase;
    letter-spacing: 1px;
    cursor: pointer;
    box-shadow: 0 4px 16px rgba(124, 58, 237, 0.4) !important;
    transition: all 0.3s ease !important;
    display: flex;
    align-items: center;
    justify-content: center;
    gap: 8px;
}

.copy-button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 6px 24px rgba(124, 58, 237, 0.6) !important;
}

/* ===== TOOLS TABLE ===== */
.tools-table,
table.tools-table,
.light .tools-table,
.dark .tools-table,
[data-theme="light"] .tools-table,
[data-theme="dark"] .tools-table {
    width: 100%;
    border-collapse: separate;
    border-spacing: 0;
    background: #0d0d1f !important;
    background-color: #0d0d1f !important;
    border-radius: 12px;
    overflow: hidden;
    border: 1px solid #3d2a6b !important;
    margin-bottom: 0;
    flex: 1;
    color: #cbd5e1 !important;
}

.tools-table th,
table.tools-table th,
.light .tools-table th,
.dark .tools-table th,
[data-theme="light"] .tools-table th,
[data-theme="dark"] .tools-table th {
    background: #1f1545 !important;
    background-color: #1f1545 !important;
    color: #a5b4fc !important;
    font-weight: 700;
    font-size: 16px;
    text-transform: uppercase;
    letter-spacing: 1.5px;
    padding: 20px 14px;
    text-align: left;
    border-bottom: 1px solid #3d2a6b !important;
}

.tools-table td,
table.tools-table td,
.light .tools-table td,
.dark .tools-table td,
[data-theme="light"] .tools-table td,
[data-theme="dark"] .tools-table td {
    padding: 20px 14px;
    color: #cbd5e1 !important;
    background: #0d0d1f !important;
    background-color: #0d0d1f !important;
    font-size: 16px;
    border-bottom: 1px solid #1a1535 !important;
}

.tools-table tr:last-child td {
    border-bottom: none;
}

.tools-table tr:hover,
.tools-table tr:hover td {
    background: #1a1540 !important;
    background-color: #1a1540 !important;
}

.tool-name,
.light .tool-name,
.dark .tool-name,
[data-theme="light"] .tool-name,
[data-theme="dark"] .tool-name {
    color: #22d3ee !important;
    font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
    font-weight: 600;
    font-size: 13px;
    vertical-align: middle;
}

/* ===== COMPOSITE SECTION ===== */
.composite-section,
.light .composite-section,
.dark .composite-section,
[data-theme="light"] .composite-section,
[data-theme="dark"] .composite-section {
    background: #0d0d1f !important;
    background-color: #0d0d1f !important;
    border: 1px solid #3d2a6b !important;
    border-radius: 12px;
    padding: 20px;
    color: #cbd5e1 !important;
}

.composite-header,
.light .composite-header,
.dark .composite-header,
[data-theme="light"] .composite-header,
[data-theme="dark"] .composite-header {
    font-size: 11px;
    font-weight: 700;
    color: #a5b4fc !important;
    text-transform: uppercase;
    letter-spacing: 1.5px;
    margin-bottom: 12px;
}

.composite-content,
.light .composite-content,
.dark .composite-content,
[data-theme="light"] .composite-content,
[data-theme="dark"] .composite-content {
    color: #cbd5e1 !important;
    font-size: 12px;
    line-height: 1.6;
    margin-bottom: 16px;
}

.try-demo-button {
    width: 100%;
    background: transparent !important;
    border: 2px solid #7c3aed !important;
    border-radius: 12px !important;
    padding: 12px 24px !important;
    font-weight: 700 !important;
    font-size: 12px !important;
    color: #7c3aed !important;
    text-transform: uppercase;
    letter-spacing: 1px;
    cursor: pointer;
    transition: all 0.3s ease !important;
}

.try-demo-button:hover {
    background: rgba(124, 58, 237, 0.1) !important;
    border-color: #7c3aed !important;
    color: #8b5cf6 !important;
}

/* ===== BUTTONS ===== */
button[variant="primary"] {
    background: linear-gradient(135deg, #7c3aed, #6366f1) !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 14px 32px !important;
    font-weight: 700 !important;
    font-size: 14px !important;
    color: white !important;
    box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important;
    transition: all 0.3s ease !important;
}

button[variant="primary"]:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important;
}

/* ===== AUDIO COMPONENT ===== */
.gradio-audio {
    background: rgba(30, 27, 75, 0.6) !important;
    border: 1px solid rgba(124, 58, 237, 0.3) !important;
    border-radius: 12px !important;
}

/* ===== TEXTBOX ===== */
textarea {
    background: rgba(30, 27, 75, 0.6) !important;
    border: 1px solid rgba(124, 58, 237, 0.3) !important;
    border-radius: 12px !important;
    color: #e0e7ff !important;
    font-size: 13px !important;
}

/* ===== DROPDOWN ===== */
select {
    background: rgba(30, 27, 75, 0.6) !important;
    border: 1px solid rgba(124, 58, 237, 0.3) !important;
    border-radius: 12px !important;
    color: #e0e7ff !important;
}

/* ===== LABELS ===== */
label {
    color: #a5b4fc !important;
    font-weight: 600 !important;
    font-size: 12px !important;
    text-transform: uppercase;
    letter-spacing: 0.5px;
}

/* ===== HTML OUTPUT ===== */
.gradio-html {
    background: transparent !important;
    border: none !important;
}

/* ===== DEMO ROW LAYOUT ===== */
.demo-row {
    display: flex !important;
    gap: 24px !important;
    align-items: stretch !important;
}

/* Only apply card style to the outer column (demo-card-column) */
.demo-card-column {
    display: flex !important;
    flex-direction: column !important;
    height: 700px !important;
    min-height: 700px !important;
    max-height: 700px !important;
    background: rgba(15, 15, 35, 0.8) !important;
    backdrop-filter: blur(20px) !important;
    border: 1px solid rgba(124, 58, 237, 0.3) !important;
    border-radius: 20px !important;
    padding: 4px 4px 2px 4px !important;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4) !important;
    transition: all 0.3s ease !important;
    gap: 2px !important;
    overflow-y: auto !important;
}

.demo-card-column:hover {
    border-color: rgba(124, 58, 237, 0.5) !important;
    box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3) !important;
}

/* Remove any border/background from inner elements */
.demo-card-column > div,
.demo-card-column > div > div,
.demo-row > div > div {
    background: transparent !important;
    border: none !important;
    box-shadow: none !important;
    padding: 0 !important;
    border-radius: 0 !important;
}

/* Remove card background from inner HTML - we use column background instead */
.demo-row .card {
    background: transparent !important;
    backdrop-filter: none !important;
    border: none !important;
    border-radius: 0 !important;
    padding: 0 !important;
    box-shadow: none !important;
    margin-bottom: 12px !important;
}

.demo-row .card:hover {
    border: none !important;
    box-shadow: none !important;
}

/* Ensure all inner components have transparent background */
.demo-row .gradio-audio,
.demo-row .gradio-dropdown,
.demo-row .gradio-textbox,
.demo-row .gradio-button {
    background: transparent !important;
}

/* Create a wrapper for input elements (flex container) */
.demo-card-column > div:not(:last-child) {
    flex: 0 0 auto !important;
}

/* Adjust spacing for input elements in demo cards */
.demo-row .gradio-audio {
    margin-top: 6px !important;
    margin-bottom: 0px !important;
    max-height: 50px !important;
    min-height: 40px !important;
    height: 45px !important;
}

/* Target all child elements of audio component */
.demo-row .gradio-audio > div,
.demo-row .gradio-audio .wrap,
.demo-row .gradio-audio .upload-container,
.demo-row .gradio-audio .record-container,
.demo-row .gradio-audio * {
    max-height: 50px !important;
}

/* Audio player specific height reduction */
.demo-row .gradio-audio audio {
    height: 26px !important;
    max-height: 26px !important;
    min-height: 26px !important;
}

/* Upload/record button container height */
.demo-row .gradio-audio .upload-container,
.demo-row .gradio-audio .record-container {
    min-height: 38px !important;
    max-height: 38px !important;
    padding: 4px !important;
}

/* Audio component buttons */
.demo-row .gradio-audio button {
    height: 28px !important;
    min-height: 28px !important;
    max-height: 28px !important;
    padding: 4px 10px !important;
    font-size: 10px !important;
}

/* Hide text nodes in audio upload area - keep icons */
.demo-row .gradio-audio .upload-text {
    display: none !important;
}

.demo-row .gradio-audio .placeholder {
    display: none !important;
}

.demo-row .gradio-audio span:not(:has(svg)) {
    font-size: 0 !important;
}

.demo-row .gradio-audio p {
    display: none !important;
}

/* Hide "Drop Audio Here", "- or -", "Click to Upload" text */
.demo-row .gradio-audio .upload-container span,
.demo-row .gradio-audio .upload-container p {
    font-size: 0 !important;
    line-height: 0 !important;
}

/* Keep SVG icons visible */
.demo-row .gradio-audio svg {
    font-size: initial !important;
}

/* ADDITIONAL METHODS: Hide all text in audio upload area */
.demo-row .gradio-audio label {
    font-size: 0 !important;
}

.demo-row .gradio-audio label span:not(:has(svg)) {
    display: none !important;
}

.demo-row .gradio-audio .file-preview {
    font-size: 0 !important;
}

.demo-row .gradio-audio .file-preview span {
    font-size: 0 !important;
    display: none !important;
}

.demo-row .gradio-audio [data-testid="upload-text"],
.demo-row .gradio-audio [data-testid="file-preview-text"],
.demo-row .gradio-audio .upload-text,
.demo-row .gradio-audio .file-preview-text {
    display: none !important;
    visibility: hidden !important;
    font-size: 0 !important;
}

/* Target all text nodes (more aggressive) */
.demo-row .gradio-audio *:not(svg):not(path):not(circle):not(rect):not(line) {
    color: transparent !important;
}

.demo-row .gradio-audio button {
    color: white !important;
}

/* Ensure icons remain visible */
.demo-row .gradio-audio svg,
.demo-row .gradio-audio svg * {
    color: initial !important;
    fill: currentColor !important;
    stroke: currentColor !important;
}

/* NUCLEAR OPTION: Hide everything in label, then show only necessary elements */
.demo-row .gradio-audio label > div > div {
    display: none !important;
}

.demo-row .gradio-audio label::before {
    content: '' !important;
}

.demo-row .gradio-audio label * {
    visibility: hidden !important;
}

.demo-row .gradio-audio label svg {
    visibility: visible !important;
}

.demo-row .gradio-audio label button {
    visibility: visible !important;
}

.demo-row .gradio-audio label audio {
    visibility: visible !important;
}

/* Force hide any text content */
.demo-row .gradio-audio label > div::after,
.demo-row .gradio-audio label > div::before {
    content: '' !important;
    display: none !important;
}

/* Additional override for upload text elements */
.demo-row .gradio-audio [class*="upload"],
.demo-row .gradio-audio [class*="placeholder"],
.demo-row .gradio-audio [class*="text"] {
    font-size: 0 !important;
    line-height: 0 !important;
    width: 0 !important;
    height: 0 !important;
    opacity: 0 !important;
    visibility: hidden !important;
    position: absolute !important;
    left: -9999px !important;
}

/* NUCLEAR OPTION 2: Complete removal of label content */
.demo-row .gradio-audio label.block {
    display: none !important;
}

.demo-row .gradio-audio .file-upload {
    display: none !important;
}

/* Hide all direct text children */
.demo-row .gradio-audio label > span:not(:has(button)):not(:has(audio)):not(:has(svg)) {
    display: none !important;
}

/* Gradio 6.0 specific selectors - upload area */
.demo-row .gradio-audio [data-testid="upload-button"],
.demo-row .gradio-audio [data-testid="file-upload"],
.demo-row .gradio-audio .upload-area {
    display: none !important;
}

/* Hide all paragraph elements in audio component */
.demo-row .gradio-audio label p,
.demo-row .gradio-audio label span.text,
.demo-row .gradio-audio label div.text {
    display: none !important;
}

/* More aggressive text hiding - target by content */
.demo-row .gradio-audio *::before,
.demo-row .gradio-audio *::after {
    content: '' !important;
    display: none !important;
}

/* Make sure only buttons and audio players are visible */
.demo-row .gradio-audio > label > div > div:not(:has(button)):not(:has(audio)) {
    display: none !important;
}

/* Gradio Blocks specific - Hide wrapper divs that contain text */
.demo-row .gradio-audio .wrap > div:not(:has(button)):not(:has(audio)):not(:has(svg)) {
    display: none !important;
}

/* Override for Gradio 6.x structure */
.demo-row .gradio-audio [class*="svelte-"] span:not(:has(svg)):not(:has(button)) {
    display: none !important;
}

.demo-row .gradio-dropdown,
.demo-row .gradio-textbox {
    margin-bottom: 2px !important;
}

.demo-row .gradio-row {
    margin-bottom: 2px !important;
}

/* IMPORTANT: Button alignment - push buttons to bottom with margin-top: auto */
.demo-row .gradio-button {
    margin-top: auto !important;
    margin-bottom: 0px !important;
    flex-shrink: 0 !important;
}

/* Output area should not push button down - set flex: 1 */
.demo-row .gradio-html {
    flex: 1 !important;
    margin-bottom: 0 !important;
    display: flex !important;
    flex-direction: column !important;
    max-height: 300px !important;
    overflow-y: auto !important;
}

/* Output audio component (clean_audio_output) height limit */
.demo-row .gradio-audio[data-testid="audio-output"],
.demo-row > div:last-child .gradio-audio {
    max-height: 120px !important;
    min-height: 60px !important;
    height: auto !important;
    margin-bottom: 0px !important;
}


/* ===== CUSTOM ACTION BUTTONS (DEMO CARDS) ===== */
.custom-action-btn,
.custom-action-btn button,
.custom-action-btn button[data-testid="button"],
button.custom-action-btn,
.demo-row .custom-action-btn,
.demo-row .custom-action-btn button {
    width: 100% !important;
    min-width: 100% !important;
    max-width: 100% !important;
    background: linear-gradient(135deg, #6366f1, #7c3aed) !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 8px 16px !important;
    height: 38px !important;
    min-height: 38px !important;
    max-height: 38px !important;
    font-weight: 700 !important;
    font-size: 16px !important;
    letter-spacing: 1.5px !important;
    text-transform: uppercase !important;
    color: white !important;
    box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important;
    transition: all 0.3s ease !important;
}

.custom-action-btn:hover,
.custom-action-btn button:hover,
.custom-action-btn button[data-testid="button"]:hover,
button.custom-action-btn:hover,
.demo-row .custom-action-btn:hover,
.demo-row .custom-action-btn button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important;
    background: linear-gradient(135deg, #6366f1, #7c3aed) !important;
}

/* ===== DECORATIVE ELEMENTS ===== */
.diamond-decoration {
    position: fixed;
    bottom: 40px;
    right: 40px;
    width: 80px;
    height: 80px;
    border: 2px solid rgba(124, 58, 237, 0.2);
    transform: rotate(45deg);
    pointer-events: none;
    z-index: 1;
}

.star-decoration {
    display: none;
}
"""

with gr.Blocks() as demo:
    # Inject custom CSS and decorative elements (positioned fixed, no DOM space)
    gr.HTML(f"""
    
    
    
        
    
    
    """)

    # ==================== HEADER (FLOATING) ====================
    gr.HTML(f"""
    
        
            
                
            
            
                VoiceKit
                MCP Server
            
        
        
    

    
    
        
            
                
                    
                    Documentation
                
                
            
            
                {readme_html}
            
        
    
    """)

    # ==================== TOP ROW: QUICK START + AVAILABLE TOOLS ====================
    with gr.Row(equal_height=True):
        # QUICK START CARD
        with gr.Column(scale=1):
            gr.HTML("""
            
                
                    
                    QUICK START
                

                
                    
                    
                        
                            
                            
                            
                        
                        claude_desktop_config.json
                        
 
                    

                    
                    
                        
                            1
                            2
                            3
                            4
                            5
                            6
                            7
                            8
                            9
                            10
                            11
                            12
                        
                        
                            {
                              "mcpServers": {
                                "voicekit": {
                                  "command": "npx",
                                  "args": [
                                    "-y",
                                    "mcp-remote",
                                    "https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse"
                                  ]
                                }
                              }
                            }
                        
                    
                

                
            
            """)

        # AVAILABLE TOOLS CARD
        with gr.Column(scale=1):
            gr.HTML("""
            
                
                    
                    AVAILABLE TOOLS
                
                
                    
                        
                            TOOL
                            PURPOSE
                            INPUT
                            OUTPUT
                        
                    
                    
                        
                            
                                
                                    
                                    extract_embedding
                                
                            
                            Extract 768-dim voice fingerprint
                            audio_base64
                            embedding, model, dim
                        
                        
                            
                                
                                    
                                    match_voice
                                
                            
                            Compare two voice similarities
                            audio1_base64, audio2_base64
                            similarity, tone_score
                        
                        
                            
                                
                                    
                                    analyze_acoustics
                                
                            
                            Analyze pitch, energy, rhythm, tempo
                            audio_base64
                            pitch, energy, rhythm, tempo
                        
                        
                            
                                
                                    
                                    transcribe_audio
                                
                            
                            Convert speech to text
                            audio_base64, language
                            text, language, model
                        
                        
                            
                                
                                    
                                    isolate_voice
                                
                            
                            Remove background music/noise
                            audio_base64
                            isolated_audio_base64, metadata
                        
                        
                            
                                
                                    
                                    grade_voice
                                
                            
                            5-metric comprehensive analysis
                            user_audio, reference_audio, text, category
                            overall, metrics, feedback
                        
                    
                
            
            """)

    # ==================== FIRST ROW: 3 DEMO CARDS ====================
    with gr.Row(equal_height=True, elem_classes="demo-row"):
        # EXTRACT EMBEDDING
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    EXTRACT EMBEDDING
                
            
            """)
            embedding_audio = gr.Audio(
                type="filepath",
                label="Audio Input",
                show_label=False,
                format="wav"
            )
            embedding_btn = gr.Button("EXTRACT", variant="primary", size="lg", elem_classes="custom-action-btn")
            embedding_output = gr.HTML(value=create_embedding_empty())

            embedding_btn.click(
                demo_extract_embedding,
                inputs=[embedding_audio],
                outputs=[embedding_output],
                api_visibility="private"
            )

        # COMPARE VOICES
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    MATCH VOICE
                
            
            """)
            with gr.Row():
                compare_audio1 = gr.Audio(
                    type="filepath",
                    label="Audio 1",
                    show_label=False,
                    format="wav"
                )
                compare_audio2 = gr.Audio(
                    type="filepath",
                    label="Audio 2",
                    show_label=False,
                    format="wav"
                )
            compare_btn = gr.Button("COMPARE", variant="primary", size="lg", elem_classes="custom-action-btn")
            compare_output = gr.HTML(value=create_compare_empty())

            compare_btn.click(
                demo_match_voice,
                inputs=[compare_audio1, compare_audio2],
                outputs=[compare_output],
                api_visibility="private"
            )

        # ACOUSTIC ANALYSIS
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    ANALYZE ACOUSTICS
                
            
            """)
            acoustic_audio = gr.Audio(
                type="filepath",
                label="Audio Input",
                show_label=False,
                format="wav"
            )
            acoustic_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn")
            acoustic_output = gr.HTML(value=create_acoustic_empty())

            acoustic_btn.click(
                demo_acoustic_analysis,
                inputs=[acoustic_audio],
                outputs=[acoustic_output],
                api_visibility="private"
            )

    # ==================== SECOND ROW: 3 MORE DEMO CARDS ====================
    with gr.Row(equal_height=True, elem_classes="demo-row"):
        # AUDIO TRANSCRIPTION
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    TRANSCRIBE AUDIO
                
            
            """)
            transcribe_audio_input = gr.Audio(
                type="filepath",
                label="Audio Input",
                show_label=False,
                format="wav"
            )
            transcribe_btn = gr.Button("TRANSCRIBE", variant="primary", size="lg", elem_classes="custom-action-btn")
            transcribe_output = gr.HTML(value=create_transcription_empty())

            transcribe_btn.click(
                lambda audio: demo_transcribe_audio(audio, "en"),
                inputs=[transcribe_audio_input],
                outputs=[transcribe_output],
                api_visibility="private"
            )

        # CLEAN AUDIO EXTRACTION
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    ISOLATE VOICE
                
            
            """)
            clean_audio_input = gr.Audio(
                type="filepath",
                label="Audio with Background",
                show_label=False,
                format="wav"
            )
            clean_btn = gr.Button("EXTRACT VOICE", variant="primary", size="lg", elem_classes="custom-action-btn")
            clean_audio_output = gr.Audio(label="Clean Audio", type="filepath", visible=True)

            clean_btn.click(
                demo_clean_extraction,
                inputs=[clean_audio_input],
                outputs=[clean_audio_output],
                api_visibility="private"
            )

        # VOICE SIMILARITY
        with gr.Column(scale=1, elem_classes="demo-card-column"):
            gr.HTML("""
            
                
                
                    GRADE VOICE
                
            
            """)
            with gr.Row():
                similarity_user_audio = gr.Audio(
                    type="filepath",
                    label="User Audio",
                    show_label=False,
                    format="wav"
                )
                similarity_ref_audio = gr.Audio(
                    type="filepath",
                    label="Reference Audio",
                    show_label=False,
                    format="wav"
                )
            similarity_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn")
            similarity_output = gr.HTML(value=create_similarity_empty())

            similarity_btn.click(
                demo_voice_similarity,
                inputs=[similarity_user_audio, similarity_ref_audio],
                outputs=[similarity_output],
                api_visibility="private"
            )


    # ==================== MCP TOOL INTERFACES (HIDDEN, API ONLY) ====================
    with gr.Row(visible=False):
        # extract_embedding
        mcp_emb_input = gr.Textbox()
        mcp_emb_output = gr.Textbox()
        mcp_emb_btn = gr.Button()
        mcp_emb_btn.click(extract_embedding, inputs=[mcp_emb_input], outputs=[mcp_emb_output])

        # match_voice
        mcp_cmp_input1 = gr.Textbox()
        mcp_cmp_input2 = gr.Textbox()
        mcp_cmp_output = gr.Textbox()
        mcp_cmp_btn = gr.Button()
        mcp_cmp_btn.click(match_voice, inputs=[mcp_cmp_input1, mcp_cmp_input2], outputs=[mcp_cmp_output])

        # analyze_acoustics
        mcp_ac_input = gr.Textbox()
        mcp_ac_output = gr.Textbox()
        mcp_ac_btn = gr.Button()
        mcp_ac_btn.click(analyze_acoustics, inputs=[mcp_ac_input], outputs=[mcp_ac_output])

        # transcribe_audio
        mcp_tr_input = gr.Textbox()
        mcp_tr_lang = gr.Textbox(value="en")
        mcp_tr_output = gr.Textbox()
        mcp_tr_btn = gr.Button()
        mcp_tr_btn.click(transcribe_audio, inputs=[mcp_tr_input, mcp_tr_lang], outputs=[mcp_tr_output])

        # isolate_voice
        mcp_iso_input = gr.Textbox()
        mcp_iso_output = gr.Textbox()
        mcp_iso_btn = gr.Button()
        mcp_iso_btn.click(isolate_voice, inputs=[mcp_iso_input], outputs=[mcp_iso_output])

        # grade_voice
        mcp_sim_user = gr.Textbox()
        mcp_sim_ref = gr.Textbox()
        mcp_sim_text = gr.Textbox()
        mcp_sim_cat = gr.Textbox(value="meme")
        mcp_sim_output = gr.Textbox()
        mcp_sim_btn = gr.Button()
        mcp_sim_btn.click(grade_voice, inputs=[mcp_sim_user, mcp_sim_ref, mcp_sim_text, mcp_sim_cat], outputs=[mcp_sim_output])


if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        mcp_server=True
    )

TOOL	PURPOSE	INPUT	OUTPUT
extract_embedding	Extract 768-dim voice fingerprint	audio_base64	embedding, model, dim
match_voice	Compare two voice similarities	audio1_base64, audio2_base64	similarity, tone_score
analyze_acoustics	Analyze pitch, energy, rhythm, tempo	audio_base64	pitch, energy, rhythm, tempo
transcribe_audio	Convert speech to text	audio_base64, language	text, language, model
isolate_voice	Remove background music/noise	audio_base64	isolated_audio_base64, metadata
grade_voice	5-metric comprehensive analysis	user_audio, reference_audio, text, category	overall, metrics, feedback