Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

File size: 11,253 Bytes

import streamlit as st
import pandas as pd
import io

# Page configuration
st.set_page_config(
    page_title="📊 LLM Data Analyzer",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.title("📊 LLM Data Analyzer")
st.write("*Analyze data and chat with AI - Powered by Hugging Face Spaces*")

# Store dataframe in session state globally
if "current_df" not in st.session_state:
    st.session_state.current_df = None

# AI response function with data awareness
def get_ai_response(prompt, df=None):
    """Generate AI responses with data awareness"""
    prompt_lower = prompt.lower()
    
    # If we have data, provide data-specific responses
    if df is not None and not df.empty:
        # Try to generate data-aware responses
        numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        
        if "average" in prompt_lower or "mean" in prompt_lower:
            if numeric_cols:
                means = df[numeric_cols].mean()
                return f"📊 **Average values for numeric columns:**\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index])
            return "The data summary shows average values for numeric columns."
        
        elif "max" in prompt_lower or "highest" in prompt_lower:
            if numeric_cols:
                maxes = df[numeric_cols].max()
                return f"📊 **Maximum values for numeric columns:**\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index])
            return "No numeric columns found to show max values."
        
        elif "min" in prompt_lower or "lowest" in prompt_lower:
            if numeric_cols:
                mins = df[numeric_cols].min()
                return f"📊 **Minimum values for numeric columns:**\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index])
            return "No numeric columns found to show min values."
        
        elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower:
            return f"📊 Your dataset has **{len(df)} rows** and **{len(df.columns)} columns**.\n\nColumns: {', '.join(df.columns.tolist())}"
        
        elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower:
            return f"📊 **Dataset Columns ({len(df.columns)}):**\n" + "\n".join([f"- {col}" for col in df.columns])
        
        elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower:
            summary = f"📊 **Data Summary:**\n"
            summary += f"- Rows: {len(df)}\n"
            summary += f"- Columns: {len(df.columns)}\n"
            summary += f"- Column names: {', '.join(df.columns.tolist())}\n"
            if numeric_cols:
                summary += f"- Numeric columns: {', '.join(numeric_cols)}\n"
            return summary
        
        elif "trend" in prompt_lower or "pattern" in prompt_lower:
            return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns."
        
        else:
            # Generic response with data context
            return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?"
    
    # Fallback responses (no data)
    if "hello" in prompt_lower or "hi" in prompt_lower:
        return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!"
    elif "what can you do" in prompt_lower or "help" in prompt_lower:
        return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!"
    elif "thank" in prompt_lower:
        return "You're welcome! Ask me anything about your data!"
    else:
        return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!"

# Create tabs
tab1, tab2, tab3 = st.tabs(["📤 Paste Data", "💬 Chat", "📊 About"])

# ============================================================================
# TAB 1: Paste Data
# ============================================================================
with tab1:
    st.header("📤 Analyze Data")
    
    st.info("💡 Load demo data or paste your CSV to start analyzing!")
    
    # Demo mode
    if st.button("📌 Load Demo Data (Click to test)", use_container_width=True):
        demo_csv = """Name,Age,Salary,Department,Experience_Years
Alice,25,50000,Sales,2
Bob,30,60000,IT,5
Charlie,35,75000,HR,8
David,28,55000,Sales,3
Eve,32,65000,IT,6"""
        st.session_state.csv_data = demo_csv
        st.success("✅ Demo data loaded! Check the Chat tab to ask questions about it.")
    
    st.subheader("Or paste your CSV data here:")
    csv_text = st.text_area(
        "Paste CSV content (headers, comma-separated):",
        value=st.session_state.get('csv_data', ''),
        height=150,
        placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000",
        key="csv_input"
    )
    
    if csv_text.strip():
        try:
            # Parse CSV from text
            df = pd.read_csv(io.StringIO(csv_text))
            
            # Store in session state for chat to access
            st.session_state.current_df = df
            
            st.success(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
            
            # Display data preview
            st.subheader("📋 Data Preview")
            st.dataframe(df, use_container_width=True)
            
            # Display statistics
            st.subheader("📊 Data Statistics")
            col1, col2, col3 = st.columns(3)
            
            with col1:
                st.metric("Rows", len(df))
            with col2:
                st.metric("Columns", len(df.columns))
            with col3:
                st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")
            
            # Detailed statistics
            try:
                numeric_df = df.select_dtypes(include=['number'])
                if not numeric_df.empty:
                    st.write("### Numeric Columns Summary")
                    st.write(numeric_df.describe().T)
                else:
                    st.info("No numeric columns found in dataset.")
            except:
                st.info("Could not generate statistics for this data.")
            
            # Ask AI about the data
            st.subheader("❓ Ask AI About Your Data")
            question = st.text_input(
                "What would you like to know about this data?",
                placeholder="e.g., What is the average salary? How many rows?",
                key="data_question"
            )
            
            if question:
                response = get_ai_response(question, df)
                st.success("✅ Analysis Complete")
                st.write(response)
        
        except Exception as e:
            st.error(f"❌ Error parsing CSV: {str(e)[:100]}")
            st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.")

# ============================================================================
# TAB 2: Chat
# ============================================================================
with tab2:
    st.header("💬 Chat with AI Assistant")
    st.write("Have a conversation about your data.")
    
    # Show current data status
    if st.session_state.current_df is not None:
        st.success(f"✅ Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns")
        st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}")
    else:
        st.warning("⚠️ No data loaded yet. Go to 'Paste Data' tab and load data first!")
    
    # Initialize session state for chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []
    
    # Display chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
    
    # Chat input
    user_input = st.text_input(
        "Type your message:",
        placeholder="Ask me about your data...",
        key="chat_input"
    )
    
    if user_input:
        # Add user message immediately
        st.session_state.messages.append({"role": "user", "content": user_input})
        
        # Get response with data context
        response = get_ai_response(user_input, st.session_state.current_df)
        
        # Add assistant message
        st.session_state.messages.append({
            "role": "assistant",
            "content": response
        })
        
        # Display latest messages
        st.divider()
        with st.chat_message("assistant"):
            st.markdown(response)

# ============================================================================
# TAB 3: About
# ============================================================================
with tab3:
    st.header("ℹ️ About This App")
    
    st.markdown("""
    ### 🎯 What is this?
    
    **LLM Data Analyzer** is a tool for analyzing data and having conversations about your datasets.
    
    ### 🔧 Technology Stack
    
    - **Framework:** Streamlit
    - **Hosting:** Hugging Face Spaces (Free Tier)
    - **Language:** Python
    
    ### ⚡ Features
    
    1. **Data Analysis**: Paste CSV and analyze your data
    2. **Smart Chat**: Chat with AI about your data
    3. **Statistics**: View comprehensive data summaries
    4. **Demo Mode**: Test with sample data instantly
    
    ### 📝 How to Use
    
    1. **Load Data** - Click "Load Demo Data" or paste your CSV
    2. **View Preview** - See your data in table format
    3. **Chat** - Go to Chat tab and ask questions about your data
    4. **Get Insights** - AI analyzes and answers questions
    
    ### 💡 Example Questions
    
    - "What's the average salary?"
    - "Show me the maximum values"
    - "How many rows do I have?"
    - "What columns are in the data?"
    - "Give me a summary"
    
    ### 📋 CSV Format Example
    
    ```
    Name,Age,Salary,Department
    Alice,25,50000,Sales
    Bob,30,60000,IT
    Charlie,35,75000,HR
    ```
    
    ### 🌐 Powered By
    
    - [Hugging Face](https://huggingface.co/) - AI platform and hosting
    - [Streamlit](https://streamlit.io/) - Web framework
    - [Pandas](https://pandas.pydata.org/) - Data analysis
    
    ### 🛠 Troubleshooting
    
    **Chat can't see my data?**
    - Make sure to load data in the "Paste Data" tab first
    - Then go to "Chat" tab - it will show your data status
    
    **How do I format CSV?**
    - First line: column headers separated by commas
    - Following lines: data values separated by commas
    
    ### 🔗 Links
    
    - [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer)
    - [Hugging Face Hub](https://huggingface.co/)
    
    ---
    
    **Version:** 2.1 | **Last Updated:** Dec 2025
    
    💡 **Note:** Chat now has access to your data! Load data first, then ask questions.
    """)