Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import io | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="π LLM Data Analyzer", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| st.title("π LLM Data Analyzer") | |
| st.write("*Analyze data and chat with AI - Powered by Hugging Face Spaces*") | |
| # Store dataframe in session state globally | |
| if "current_df" not in st.session_state: | |
| st.session_state.current_df = None | |
| # AI response function with data awareness | |
| def get_ai_response(prompt, df=None): | |
| """Generate AI responses with data awareness""" | |
| prompt_lower = prompt.lower() | |
| # If we have data, provide data-specific responses | |
| if df is not None and not df.empty: | |
| # Try to generate data-aware responses | |
| numeric_cols = df.select_dtypes(include=['number']).columns.tolist() | |
| if "average" in prompt_lower or "mean" in prompt_lower: | |
| if numeric_cols: | |
| means = df[numeric_cols].mean() | |
| return f"π **Average values for numeric columns:**\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index]) | |
| return "The data summary shows average values for numeric columns." | |
| elif "max" in prompt_lower or "highest" in prompt_lower: | |
| if numeric_cols: | |
| maxes = df[numeric_cols].max() | |
| return f"π **Maximum values for numeric columns:**\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index]) | |
| return "No numeric columns found to show max values." | |
| elif "min" in prompt_lower or "lowest" in prompt_lower: | |
| if numeric_cols: | |
| mins = df[numeric_cols].min() | |
| return f"π **Minimum values for numeric columns:**\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index]) | |
| return "No numeric columns found to show min values." | |
| elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower: | |
| return f"π Your dataset has **{len(df)} rows** and **{len(df.columns)} columns**.\n\nColumns: {', '.join(df.columns.tolist())}" | |
| elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower: | |
| return f"π **Dataset Columns ({len(df.columns)}):**\n" + "\n".join([f"- {col}" for col in df.columns]) | |
| elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower: | |
| summary = f"π **Data Summary:**\n" | |
| summary += f"- Rows: {len(df)}\n" | |
| summary += f"- Columns: {len(df.columns)}\n" | |
| summary += f"- Column names: {', '.join(df.columns.tolist())}\n" | |
| if numeric_cols: | |
| summary += f"- Numeric columns: {', '.join(numeric_cols)}\n" | |
| return summary | |
| elif "trend" in prompt_lower or "pattern" in prompt_lower: | |
| return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns." | |
| else: | |
| # Generic response with data context | |
| return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?" | |
| # Fallback responses (no data) | |
| if "hello" in prompt_lower or "hi" in prompt_lower: | |
| return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!" | |
| elif "what can you do" in prompt_lower or "help" in prompt_lower: | |
| return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!" | |
| elif "thank" in prompt_lower: | |
| return "You're welcome! Ask me anything about your data!" | |
| else: | |
| return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!" | |
| # Create tabs | |
| tab1, tab2, tab3 = st.tabs(["π€ Paste Data", "π¬ Chat", "π About"]) | |
| # ============================================================================ | |
| # TAB 1: Paste Data | |
| # ============================================================================ | |
| with tab1: | |
| st.header("π€ Analyze Data") | |
| st.info("π‘ Load demo data or paste your CSV to start analyzing!") | |
| # Demo mode | |
| if st.button("π Load Demo Data (Click to test)", use_container_width=True): | |
| demo_csv = """Name,Age,Salary,Department,Experience_Years | |
| Alice,25,50000,Sales,2 | |
| Bob,30,60000,IT,5 | |
| Charlie,35,75000,HR,8 | |
| David,28,55000,Sales,3 | |
| Eve,32,65000,IT,6""" | |
| st.session_state.csv_data = demo_csv | |
| st.success("β Demo data loaded! Check the Chat tab to ask questions about it.") | |
| st.subheader("Or paste your CSV data here:") | |
| csv_text = st.text_area( | |
| "Paste CSV content (headers, comma-separated):", | |
| value=st.session_state.get('csv_data', ''), | |
| height=150, | |
| placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000", | |
| key="csv_input" | |
| ) | |
| if csv_text.strip(): | |
| try: | |
| # Parse CSV from text | |
| df = pd.read_csv(io.StringIO(csv_text)) | |
| # Store in session state for chat to access | |
| st.session_state.current_df = df | |
| st.success(f"β Data loaded: {df.shape[0]} rows, {df.shape[1]} columns") | |
| # Display data preview | |
| st.subheader("π Data Preview") | |
| st.dataframe(df, use_container_width=True) | |
| # Display statistics | |
| st.subheader("π Data Statistics") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Rows", len(df)) | |
| with col2: | |
| st.metric("Columns", len(df.columns)) | |
| with col3: | |
| st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB") | |
| # Detailed statistics | |
| try: | |
| numeric_df = df.select_dtypes(include=['number']) | |
| if not numeric_df.empty: | |
| st.write("### Numeric Columns Summary") | |
| st.write(numeric_df.describe().T) | |
| else: | |
| st.info("No numeric columns found in dataset.") | |
| except: | |
| st.info("Could not generate statistics for this data.") | |
| # Ask AI about the data | |
| st.subheader("β Ask AI About Your Data") | |
| question = st.text_input( | |
| "What would you like to know about this data?", | |
| placeholder="e.g., What is the average salary? How many rows?", | |
| key="data_question" | |
| ) | |
| if question: | |
| response = get_ai_response(question, df) | |
| st.success("β Analysis Complete") | |
| st.write(response) | |
| except Exception as e: | |
| st.error(f"β Error parsing CSV: {str(e)[:100]}") | |
| st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.") | |
| # ============================================================================ | |
| # TAB 2: Chat | |
| # ============================================================================ | |
| with tab2: | |
| st.header("π¬ Chat with AI Assistant") | |
| st.write("Have a conversation about your data.") | |
| # Show current data status | |
| if st.session_state.current_df is not None: | |
| st.success(f"β Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns") | |
| st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}") | |
| else: | |
| st.warning("β οΈ No data loaded yet. Go to 'Paste Data' tab and load data first!") | |
| # Initialize session state for chat history | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Display chat history | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # Chat input | |
| user_input = st.text_input( | |
| "Type your message:", | |
| placeholder="Ask me about your data...", | |
| key="chat_input" | |
| ) | |
| if user_input: | |
| # Add user message immediately | |
| st.session_state.messages.append({"role": "user", "content": user_input}) | |
| # Get response with data context | |
| response = get_ai_response(user_input, st.session_state.current_df) | |
| # Add assistant message | |
| st.session_state.messages.append({ | |
| "role": "assistant", | |
| "content": response | |
| }) | |
| # Display latest messages | |
| st.divider() | |
| with st.chat_message("assistant"): | |
| st.markdown(response) | |
| # ============================================================================ | |
| # TAB 3: About | |
| # ============================================================================ | |
| with tab3: | |
| st.header("βΉοΈ About This App") | |
| st.markdown(""" | |
| ### π― What is this? | |
| **LLM Data Analyzer** is a tool for analyzing data and having conversations about your datasets. | |
| ### π§ Technology Stack | |
| - **Framework:** Streamlit | |
| - **Hosting:** Hugging Face Spaces (Free Tier) | |
| - **Language:** Python | |
| ### β‘ Features | |
| 1. **Data Analysis**: Paste CSV and analyze your data | |
| 2. **Smart Chat**: Chat with AI about your data | |
| 3. **Statistics**: View comprehensive data summaries | |
| 4. **Demo Mode**: Test with sample data instantly | |
| ### π How to Use | |
| 1. **Load Data** - Click "Load Demo Data" or paste your CSV | |
| 2. **View Preview** - See your data in table format | |
| 3. **Chat** - Go to Chat tab and ask questions about your data | |
| 4. **Get Insights** - AI analyzes and answers questions | |
| ### π‘ Example Questions | |
| - "What's the average salary?" | |
| - "Show me the maximum values" | |
| - "How many rows do I have?" | |
| - "What columns are in the data?" | |
| - "Give me a summary" | |
| ### π CSV Format Example | |
| ``` | |
| Name,Age,Salary,Department | |
| Alice,25,50000,Sales | |
| Bob,30,60000,IT | |
| Charlie,35,75000,HR | |
| ``` | |
| ### π Powered By | |
| - [Hugging Face](https://huggingface.co/) - AI platform and hosting | |
| - [Streamlit](https://streamlit.io/) - Web framework | |
| - [Pandas](https://pandas.pydata.org/) - Data analysis | |
| ### π Troubleshooting | |
| **Chat can't see my data?** | |
| - Make sure to load data in the "Paste Data" tab first | |
| - Then go to "Chat" tab - it will show your data status | |
| **How do I format CSV?** | |
| - First line: column headers separated by commas | |
| - Following lines: data values separated by commas | |
| ### π Links | |
| - [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer) | |
| - [Hugging Face Hub](https://huggingface.co/) | |
| --- | |
| **Version:** 2.1 | **Last Updated:** Dec 2025 | |
| π‘ **Note:** Chat now has access to your data! Load data first, then ask questions. | |
| """) |