Arif
Updated app.py to version 13
c85ec07
import streamlit as st
import pandas as pd
import io
# Page configuration
st.set_page_config(
page_title="πŸ“Š LLM Data Analyzer",
page_icon="πŸ“Š",
layout="wide",
initial_sidebar_state="expanded"
)
st.title("πŸ“Š LLM Data Analyzer")
st.write("*Analyze data and chat with AI - Powered by Hugging Face Spaces*")
# Store dataframe in session state globally
if "current_df" not in st.session_state:
st.session_state.current_df = None
# AI response function with data awareness
def get_ai_response(prompt, df=None):
"""Generate AI responses with data awareness"""
prompt_lower = prompt.lower()
# If we have data, provide data-specific responses
if df is not None and not df.empty:
# Try to generate data-aware responses
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
if "average" in prompt_lower or "mean" in prompt_lower:
if numeric_cols:
means = df[numeric_cols].mean()
return f"πŸ“Š **Average values for numeric columns:**\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index])
return "The data summary shows average values for numeric columns."
elif "max" in prompt_lower or "highest" in prompt_lower:
if numeric_cols:
maxes = df[numeric_cols].max()
return f"πŸ“Š **Maximum values for numeric columns:**\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index])
return "No numeric columns found to show max values."
elif "min" in prompt_lower or "lowest" in prompt_lower:
if numeric_cols:
mins = df[numeric_cols].min()
return f"πŸ“Š **Minimum values for numeric columns:**\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index])
return "No numeric columns found to show min values."
elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower:
return f"πŸ“Š Your dataset has **{len(df)} rows** and **{len(df.columns)} columns**.\n\nColumns: {', '.join(df.columns.tolist())}"
elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower:
return f"πŸ“Š **Dataset Columns ({len(df.columns)}):**\n" + "\n".join([f"- {col}" for col in df.columns])
elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower:
summary = f"πŸ“Š **Data Summary:**\n"
summary += f"- Rows: {len(df)}\n"
summary += f"- Columns: {len(df.columns)}\n"
summary += f"- Column names: {', '.join(df.columns.tolist())}\n"
if numeric_cols:
summary += f"- Numeric columns: {', '.join(numeric_cols)}\n"
return summary
elif "trend" in prompt_lower or "pattern" in prompt_lower:
return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns."
else:
# Generic response with data context
return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?"
# Fallback responses (no data)
if "hello" in prompt_lower or "hi" in prompt_lower:
return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!"
elif "what can you do" in prompt_lower or "help" in prompt_lower:
return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!"
elif "thank" in prompt_lower:
return "You're welcome! Ask me anything about your data!"
else:
return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!"
# Create tabs
tab1, tab2, tab3 = st.tabs(["πŸ“€ Paste Data", "πŸ’¬ Chat", "πŸ“Š About"])
# ============================================================================
# TAB 1: Paste Data
# ============================================================================
with tab1:
st.header("πŸ“€ Analyze Data")
st.info("πŸ’‘ Load demo data or paste your CSV to start analyzing!")
# Demo mode
if st.button("πŸ“Œ Load Demo Data (Click to test)", use_container_width=True):
demo_csv = """Name,Age,Salary,Department,Experience_Years
Alice,25,50000,Sales,2
Bob,30,60000,IT,5
Charlie,35,75000,HR,8
David,28,55000,Sales,3
Eve,32,65000,IT,6"""
st.session_state.csv_data = demo_csv
st.success("βœ… Demo data loaded! Check the Chat tab to ask questions about it.")
st.subheader("Or paste your CSV data here:")
csv_text = st.text_area(
"Paste CSV content (headers, comma-separated):",
value=st.session_state.get('csv_data', ''),
height=150,
placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000",
key="csv_input"
)
if csv_text.strip():
try:
# Parse CSV from text
df = pd.read_csv(io.StringIO(csv_text))
# Store in session state for chat to access
st.session_state.current_df = df
st.success(f"βœ… Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
# Display data preview
st.subheader("πŸ“‹ Data Preview")
st.dataframe(df, use_container_width=True)
# Display statistics
st.subheader("πŸ“Š Data Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Rows", len(df))
with col2:
st.metric("Columns", len(df.columns))
with col3:
st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")
# Detailed statistics
try:
numeric_df = df.select_dtypes(include=['number'])
if not numeric_df.empty:
st.write("### Numeric Columns Summary")
st.write(numeric_df.describe().T)
else:
st.info("No numeric columns found in dataset.")
except:
st.info("Could not generate statistics for this data.")
# Ask AI about the data
st.subheader("❓ Ask AI About Your Data")
question = st.text_input(
"What would you like to know about this data?",
placeholder="e.g., What is the average salary? How many rows?",
key="data_question"
)
if question:
response = get_ai_response(question, df)
st.success("βœ… Analysis Complete")
st.write(response)
except Exception as e:
st.error(f"❌ Error parsing CSV: {str(e)[:100]}")
st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.")
# ============================================================================
# TAB 2: Chat
# ============================================================================
with tab2:
st.header("πŸ’¬ Chat with AI Assistant")
st.write("Have a conversation about your data.")
# Show current data status
if st.session_state.current_df is not None:
st.success(f"βœ… Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns")
st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}")
else:
st.warning("⚠️ No data loaded yet. Go to 'Paste Data' tab and load data first!")
# Initialize session state for chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
user_input = st.text_input(
"Type your message:",
placeholder="Ask me about your data...",
key="chat_input"
)
if user_input:
# Add user message immediately
st.session_state.messages.append({"role": "user", "content": user_input})
# Get response with data context
response = get_ai_response(user_input, st.session_state.current_df)
# Add assistant message
st.session_state.messages.append({
"role": "assistant",
"content": response
})
# Display latest messages
st.divider()
with st.chat_message("assistant"):
st.markdown(response)
# ============================================================================
# TAB 3: About
# ============================================================================
with tab3:
st.header("ℹ️ About This App")
st.markdown("""
### 🎯 What is this?
**LLM Data Analyzer** is a tool for analyzing data and having conversations about your datasets.
### πŸ”§ Technology Stack
- **Framework:** Streamlit
- **Hosting:** Hugging Face Spaces (Free Tier)
- **Language:** Python
### ⚑ Features
1. **Data Analysis**: Paste CSV and analyze your data
2. **Smart Chat**: Chat with AI about your data
3. **Statistics**: View comprehensive data summaries
4. **Demo Mode**: Test with sample data instantly
### πŸ“ How to Use
1. **Load Data** - Click "Load Demo Data" or paste your CSV
2. **View Preview** - See your data in table format
3. **Chat** - Go to Chat tab and ask questions about your data
4. **Get Insights** - AI analyzes and answers questions
### πŸ’‘ Example Questions
- "What's the average salary?"
- "Show me the maximum values"
- "How many rows do I have?"
- "What columns are in the data?"
- "Give me a summary"
### πŸ“‹ CSV Format Example
```
Name,Age,Salary,Department
Alice,25,50000,Sales
Bob,30,60000,IT
Charlie,35,75000,HR
```
### 🌐 Powered By
- [Hugging Face](https://huggingface.co/) - AI platform and hosting
- [Streamlit](https://streamlit.io/) - Web framework
- [Pandas](https://pandas.pydata.org/) - Data analysis
### πŸ›  Troubleshooting
**Chat can't see my data?**
- Make sure to load data in the "Paste Data" tab first
- Then go to "Chat" tab - it will show your data status
**How do I format CSV?**
- First line: column headers separated by commas
- Following lines: data values separated by commas
### πŸ”— Links
- [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer)
- [Hugging Face Hub](https://huggingface.co/)
---
**Version:** 2.1 | **Last Updated:** Dec 2025
πŸ’‘ **Note:** Chat now has access to your data! Load data first, then ask questions.
""")