File size: 11,253 Bytes
814316f
 
94b7bfa
814316f
 
 
 
 
 
 
 
 
 
e3d2b77
814316f
c85ec07
 
 
 
 
 
 
e3d2b77
 
c85ec07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3d2b77
c85ec07
e3d2b77
c85ec07
e3d2b77
c85ec07
814316f
 
94b7bfa
814316f
 
94b7bfa
814316f
 
94b7bfa
 
c85ec07
94b7bfa
 
 
 
 
 
 
 
 
 
c85ec07
94b7bfa
 
 
 
 
 
 
 
814316f
 
94b7bfa
814316f
94b7bfa
 
9f22029
c85ec07
 
 
94b7bfa
814316f
 
 
94b7bfa
814316f
 
 
 
 
 
ca8b7a3
814316f
ca8b7a3
814316f
94b7bfa
814316f
 
9f22029
ca8b7a3
 
 
 
 
 
9f22029
 
814316f
 
 
 
 
c85ec07
0d96540
814316f
 
 
c85ec07
ca37c17
 
814316f
 
94b7bfa
 
814316f
 
 
 
 
 
c85ec07
 
 
 
 
 
 
 
814316f
 
 
 
 
 
 
 
 
 
24b4795
0d96540
 
c85ec07
0d96540
 
814316f
 
ca37c17
814316f
 
c85ec07
 
ca37c17
 
 
 
 
 
 
 
 
 
 
814316f
 
 
 
 
 
 
 
 
 
e3d2b77
814316f
 
 
9581ef6
814316f
9581ef6
814316f
9581ef6
814316f
94b7bfa
c85ec07
e3d2b77
94b7bfa
814316f
9581ef6
814316f
c85ec07
 
 
 
 
 
 
 
 
 
 
 
94b7bfa
 
 
 
 
 
 
 
 
814316f
9581ef6
814316f
e3d2b77
9581ef6
e3d2b77
814316f
94b7bfa
9f22029
c85ec07
 
 
814316f
94b7bfa
 
 
814316f
 
 
9581ef6
 
814316f
 
 
c85ec07
e3d2b77
c85ec07
814316f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import streamlit as st
import pandas as pd
import io

# Page configuration
st.set_page_config(
    page_title="πŸ“Š LLM Data Analyzer",
    page_icon="πŸ“Š",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.title("πŸ“Š LLM Data Analyzer")
st.write("*Analyze data and chat with AI - Powered by Hugging Face Spaces*")

# Store dataframe in session state globally
if "current_df" not in st.session_state:
    st.session_state.current_df = None

# AI response function with data awareness
def get_ai_response(prompt, df=None):
    """Generate AI responses with data awareness"""
    prompt_lower = prompt.lower()
    
    # If we have data, provide data-specific responses
    if df is not None and not df.empty:
        # Try to generate data-aware responses
        numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        
        if "average" in prompt_lower or "mean" in prompt_lower:
            if numeric_cols:
                means = df[numeric_cols].mean()
                return f"πŸ“Š **Average values for numeric columns:**\n" + "\n".join([f"- {col}: {means[col]:.2f}" for col in means.index])
            return "The data summary shows average values for numeric columns."
        
        elif "max" in prompt_lower or "highest" in prompt_lower:
            if numeric_cols:
                maxes = df[numeric_cols].max()
                return f"πŸ“Š **Maximum values for numeric columns:**\n" + "\n".join([f"- {col}: {maxes[col]}" for col in maxes.index])
            return "No numeric columns found to show max values."
        
        elif "min" in prompt_lower or "lowest" in prompt_lower:
            if numeric_cols:
                mins = df[numeric_cols].min()
                return f"πŸ“Š **Minimum values for numeric columns:**\n" + "\n".join([f"- {col}: {mins[col]}" for col in mins.index])
            return "No numeric columns found to show min values."
        
        elif "count" in prompt_lower or "rows" in prompt_lower or "how many" in prompt_lower:
            return f"πŸ“Š Your dataset has **{len(df)} rows** and **{len(df.columns)} columns**.\n\nColumns: {', '.join(df.columns.tolist())}"
        
        elif "columns" in prompt_lower or "fields" in prompt_lower or "headers" in prompt_lower:
            return f"πŸ“Š **Dataset Columns ({len(df.columns)}):**\n" + "\n".join([f"- {col}" for col in df.columns])
        
        elif "summary" in prompt_lower or "overview" in prompt_lower or "describe" in prompt_lower:
            summary = f"πŸ“Š **Data Summary:**\n"
            summary += f"- Rows: {len(df)}\n"
            summary += f"- Columns: {len(df.columns)}\n"
            summary += f"- Column names: {', '.join(df.columns.tolist())}\n"
            if numeric_cols:
                summary += f"- Numeric columns: {', '.join(numeric_cols)}\n"
            return summary
        
        elif "trend" in prompt_lower or "pattern" in prompt_lower:
            return "The data shows various patterns. Check the statistics above to identify trends in the numeric columns."
        
        else:
            # Generic response with data context
            return f"I have access to your data with {len(df)} rows and {len(df.columns)} columns: {', '.join(df.columns.tolist())}. Ask me specific questions like: What's the average? Show me the max values? How many rows?"
    
    # Fallback responses (no data)
    if "hello" in prompt_lower or "hi" in prompt_lower:
        return "Hello! I'm the LLM Data Analyzer. Load some data first, then ask me questions about it!"
    elif "what can you do" in prompt_lower or "help" in prompt_lower:
        return "I can: 1) Load demo data 2) Analyze your CSV 3) Answer questions about averages, max, min, columns, etc. 4) Chat about your data!"
    elif "thank" in prompt_lower:
        return "You're welcome! Ask me anything about your data!"
    else:
        return "Please load some data first (click 'Load Demo Data' or paste CSV), then ask me questions about it!"

# Create tabs
tab1, tab2, tab3 = st.tabs(["πŸ“€ Paste Data", "πŸ’¬ Chat", "πŸ“Š About"])

# ============================================================================
# TAB 1: Paste Data
# ============================================================================
with tab1:
    st.header("πŸ“€ Analyze Data")
    
    st.info("πŸ’‘ Load demo data or paste your CSV to start analyzing!")
    
    # Demo mode
    if st.button("πŸ“Œ Load Demo Data (Click to test)", use_container_width=True):
        demo_csv = """Name,Age,Salary,Department,Experience_Years
Alice,25,50000,Sales,2
Bob,30,60000,IT,5
Charlie,35,75000,HR,8
David,28,55000,Sales,3
Eve,32,65000,IT,6"""
        st.session_state.csv_data = demo_csv
        st.success("βœ… Demo data loaded! Check the Chat tab to ask questions about it.")
    
    st.subheader("Or paste your CSV data here:")
    csv_text = st.text_area(
        "Paste CSV content (headers, comma-separated):",
        value=st.session_state.get('csv_data', ''),
        height=150,
        placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000",
        key="csv_input"
    )
    
    if csv_text.strip():
        try:
            # Parse CSV from text
            df = pd.read_csv(io.StringIO(csv_text))
            
            # Store in session state for chat to access
            st.session_state.current_df = df
            
            st.success(f"βœ… Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
            
            # Display data preview
            st.subheader("πŸ“‹ Data Preview")
            st.dataframe(df, use_container_width=True)
            
            # Display statistics
            st.subheader("πŸ“Š Data Statistics")
            col1, col2, col3 = st.columns(3)
            
            with col1:
                st.metric("Rows", len(df))
            with col2:
                st.metric("Columns", len(df.columns))
            with col3:
                st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")
            
            # Detailed statistics
            try:
                numeric_df = df.select_dtypes(include=['number'])
                if not numeric_df.empty:
                    st.write("### Numeric Columns Summary")
                    st.write(numeric_df.describe().T)
                else:
                    st.info("No numeric columns found in dataset.")
            except:
                st.info("Could not generate statistics for this data.")
            
            # Ask AI about the data
            st.subheader("❓ Ask AI About Your Data")
            question = st.text_input(
                "What would you like to know about this data?",
                placeholder="e.g., What is the average salary? How many rows?",
                key="data_question"
            )
            
            if question:
                response = get_ai_response(question, df)
                st.success("βœ… Analysis Complete")
                st.write(response)
        
        except Exception as e:
            st.error(f"❌ Error parsing CSV: {str(e)[:100]}")
            st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.")

# ============================================================================
# TAB 2: Chat
# ============================================================================
with tab2:
    st.header("πŸ’¬ Chat with AI Assistant")
    st.write("Have a conversation about your data.")
    
    # Show current data status
    if st.session_state.current_df is not None:
        st.success(f"βœ… Data loaded: {len(st.session_state.current_df)} rows, {len(st.session_state.current_df.columns)} columns")
        st.write(f"Columns: {', '.join(st.session_state.current_df.columns.tolist())}")
    else:
        st.warning("⚠️ No data loaded yet. Go to 'Paste Data' tab and load data first!")
    
    # Initialize session state for chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []
    
    # Display chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
    
    # Chat input
    user_input = st.text_input(
        "Type your message:",
        placeholder="Ask me about your data...",
        key="chat_input"
    )
    
    if user_input:
        # Add user message immediately
        st.session_state.messages.append({"role": "user", "content": user_input})
        
        # Get response with data context
        response = get_ai_response(user_input, st.session_state.current_df)
        
        # Add assistant message
        st.session_state.messages.append({
            "role": "assistant",
            "content": response
        })
        
        # Display latest messages
        st.divider()
        with st.chat_message("assistant"):
            st.markdown(response)

# ============================================================================
# TAB 3: About
# ============================================================================
with tab3:
    st.header("ℹ️ About This App")
    
    st.markdown("""
    ### 🎯 What is this?
    
    **LLM Data Analyzer** is a tool for analyzing data and having conversations about your datasets.
    
    ### πŸ”§ Technology Stack
    
    - **Framework:** Streamlit
    - **Hosting:** Hugging Face Spaces (Free Tier)
    - **Language:** Python
    
    ### ⚑ Features
    
    1. **Data Analysis**: Paste CSV and analyze your data
    2. **Smart Chat**: Chat with AI about your data
    3. **Statistics**: View comprehensive data summaries
    4. **Demo Mode**: Test with sample data instantly
    
    ### πŸ“ How to Use
    
    1. **Load Data** - Click "Load Demo Data" or paste your CSV
    2. **View Preview** - See your data in table format
    3. **Chat** - Go to Chat tab and ask questions about your data
    4. **Get Insights** - AI analyzes and answers questions
    
    ### πŸ’‘ Example Questions
    
    - "What's the average salary?"
    - "Show me the maximum values"
    - "How many rows do I have?"
    - "What columns are in the data?"
    - "Give me a summary"
    
    ### πŸ“‹ CSV Format Example
    
    ```
    Name,Age,Salary,Department
    Alice,25,50000,Sales
    Bob,30,60000,IT
    Charlie,35,75000,HR
    ```
    
    ### 🌐 Powered By
    
    - [Hugging Face](https://huggingface.co/) - AI platform and hosting
    - [Streamlit](https://streamlit.io/) - Web framework
    - [Pandas](https://pandas.pydata.org/) - Data analysis
    
    ### πŸ›  Troubleshooting
    
    **Chat can't see my data?**
    - Make sure to load data in the "Paste Data" tab first
    - Then go to "Chat" tab - it will show your data status
    
    **How do I format CSV?**
    - First line: column headers separated by commas
    - Following lines: data values separated by commas
    
    ### πŸ”— Links
    
    - [GitHub Repository](https://github.com/Arif-Badhon/LLM-Data-Analyzer)
    - [Hugging Face Hub](https://huggingface.co/)
    
    ---
    
    **Version:** 2.1 | **Last Updated:** Dec 2025
    
    πŸ’‘ **Note:** Chat now has access to your data! Load data first, then ask questions.
    """)