Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

Arif

Updated app.py to version 13

c85ec07 5 days ago

11.3 kB

	import streamlit as st
	import pandas as pd
	import io

	# Page configuration
	st.set_page_config(
	page_title="📊 LLM Data Analyzer",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	st.title("📊 LLM Data Analyzer")
	st.write("Analyze data and chat with AI - Powered by Hugging Face Spaces")

	# Store dataframe in session state globally
	if "current_df" not in st.session_state:
	st.session_state.current_df = None

	# AI response function with data awareness
	def get_ai_response(prompt, df=None):
	"""Generate AI responses with data awareness"""
	prompt_lower = prompt.lower()

	# If we have data, provide data-specific responses
	if df is not None and not df.empty:
	# Try to generate data-aware responses
	numeric_cols = df.select_dtypes(include=['number']).columns.tolist()

	if "average" in prompt_lower or "mean" in prompt_lower:
	if numeric_cols:
	means = df[numeric_cols].mean()
	return f"📊 Average values for numeric columns:\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index])
	return "The data summary shows average values for numeric columns."

	elif "max" in prompt_lower or "highest" in prompt_lower:
	if numeric_cols:
	maxes = df[numeric_cols].max()
	return f"📊 Maximum values for numeric columns:\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index])
	return "No numeric columns found to show max values."

	elif "min" in prompt_lower or "lowest" in prompt_lower:
	if numeric_cols:
	mins = df[numeric_cols].min()
	return f"📊 Minimum values for numeric columns:\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index])
	return "No numeric columns found to show min values."

	elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower:
	return f"📊 Your dataset has {len(df)} rows and {len(df.columns)} columns.\n\nColumns: {', '.join(df.columns.tolist())}"

	elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower:
	return f"📊 Dataset Columns ({len(df.columns)}):\n" + "\n".join([f"- {col}" for col in df.columns])

	elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower:
	summary = f"📊 Data Summary:\n"
	summary += f"- Rows: {len(df)}\n"
	summary += f"- Columns: {len(df.columns)}\n"
	summary += f"- Column names: {', '.join(df.columns.tolist())}\n"
	if numeric_cols:
	summary += f"- Numeric columns: {', '.join(numeric_cols)}\n"
	return summary

	elif "trend" in prompt_lower or "pattern" in prompt_lower:
	return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns."

	else:
	# Generic response with data context
	return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?"

	# Fallback responses (no data)
	if "hello" in prompt_lower or "hi" in prompt_lower:
	return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!"
	elif "what can you do" in prompt_lower or "help" in prompt_lower:
	return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!"
	elif "thank" in prompt_lower:
	return "You're welcome! Ask me anything about your data!"
	else:
	return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!"

	# Create tabs
	tab1, tab2, tab3 = st.tabs(["📤 Paste Data", "💬 Chat", "📊 About"])

	# ============================================================================
	# TAB 1: Paste Data
	# ============================================================================
	with tab1:
	st.header("📤 Analyze Data")

	st.info("💡 Load demo data or paste your CSV to start analyzing!")

	# Demo mode
	if st.button("📌 Load Demo Data (Click to test)", use_container_width=True):
	demo_csv = """Name,Age,Salary,Department,Experience_Years
	Alice,25,50000,Sales,2
	Bob,30,60000,IT,5
	Charlie,35,75000,HR,8
	David,28,55000,Sales,3
	Eve,32,65000,IT,6"""
	st.session_state.csv_data = demo_csv
	st.success("✅ Demo data loaded! Check the Chat tab to ask questions about it.")

	st.subheader("Or paste your CSV data here:")
	csv_text = st.text_area(
	"Paste CSV content (headers, comma-separated):",
	value=st.session_state.get('csv_data', ''),
	height=150,
	placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000",
	key="csv_input"
	)

	if csv_text.strip():
	try:
	# Parse CSV from text
	df = pd.read_csv(io.StringIO(csv_text))

	# Store in session state for chat to access
	st.session_state.current_df = df

	st.success(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")

	# Display data preview
	st.subheader("📋 Data Preview")
	st.dataframe(df, use_container_width=True)

	# Display statistics
	st.subheader("📊 Data Statistics")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Rows", len(df))
	with col2:
	st.metric("Columns", len(df.columns))
	with col3:
	st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")

	# Detailed statistics
	try:
	numeric_df = df.select_dtypes(include=['number'])
	if not numeric_df.empty:
	st.write("### Numeric Columns Summary")
	st.write(numeric_df.describe().T)
	else:
	st.info("No numeric columns found in dataset.")
	except:
	st.info("Could not generate statistics for this data.")

	# Ask AI about the data
	st.subheader("❓ Ask AI About Your Data")
	question = st.text_input(
	"What would you like to know about this data?",
	placeholder="e.g., What is the average salary? How many rows?",
	key="data_question"
	)

	if question:
	response = get_ai_response(question, df)
	st.success("✅ Analysis Complete")
	st.write(response)

	except Exception as e:
	st.error(f"❌ Error parsing CSV: {str(e)[:100]}")
	st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.")

	# ============================================================================
	# TAB 2: Chat
	# ============================================================================
	with tab2:
	st.header("💬 Chat with AI Assistant")
	st.write("Have a conversation about your data.")

	# Show current data status
	if st.session_state.current_df is not None:
	st.success(f"✅ Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns")
	st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}")
	else:
	st.warning("⚠️ No data loaded yet. Go to 'Paste Data' tab and load data first!")

	# Initialize session state for chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Chat input
	user_input = st.text_input(
	"Type your message:",
	placeholder="Ask me about your data...",
	key="chat_input"
	)

	if user_input:
	# Add user message immediately
	st.session_state.messages.append({"role": "user", "content": user_input})

	# Get response with data context
	response = get_ai_response(user_input, st.session_state.current_df)

	# Add assistant message
	st.session_state.messages.append({
	"role": "assistant",
	"content": response
	})

	# Display latest messages
	st.divider()
	with st.chat_message("assistant"):
	st.markdown(response)

	# ============================================================================
	# TAB 3: About
	# ============================================================================
	with tab3:
	st.header("ℹ️ About This App")

	st.markdown("""
	### 🎯 What is this?

	LLM Data Analyzer is a tool for analyzing data and having conversations about your datasets.

	### 🔧 Technology Stack

	- Framework: Streamlit
	- Hosting: Hugging Face Spaces (Free Tier)
	- Language: Python

	### ⚡ Features

	1. Data Analysis: Paste CSV and analyze your data
	2. Smart Chat: Chat with AI about your data
	3. Statistics: View comprehensive data summaries
	4. Demo Mode: Test with sample data instantly

	### 📝 How to Use

	1. Load Data - Click "Load Demo Data" or paste your CSV
	2. View Preview - See your data in table format
	3. Chat - Go to Chat tab and ask questions about your data
	4. Get Insights - AI analyzes and answers questions

	### 💡 Example Questions

	- "What's the average salary?"
	- "Show me the maximum values"
	- "How many rows do I have?"
	- "What columns are in the data?"
	- "Give me a summary"

	### 📋 CSV Format Example

	```
	Name,Age,Salary,Department
	Alice,25,50000,Sales
	Bob,30,60000,IT
	Charlie,35,75000,HR
	```

	### 🌐 Powered By

	- [Hugging Face](https://huggingface.co/) - AI platform and hosting
	- [Streamlit](https://streamlit.io/) - Web framework
	- [Pandas](https://pandas.pydata.org/) - Data analysis

	### 🛠 Troubleshooting

	Chat can't see my data?
	- Make sure to load data in the "Paste Data" tab first
	- Then go to "Chat" tab - it will show your data status

	How do I format CSV?
	- First line: column headers separated by commas
	- Following lines: data values separated by commas

	### 🔗 Links

	- [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer)
	- [Hugging Face Hub](https://huggingface.co/)

	---

	Version: 2.1 \| Last Updated: Dec 2025

	💡 Note: Chat now has access to your data! Load data first, then ask questions.
	""")