Spaces:

joaopimenta
/

ascendum_data_dashboard

Running

App Files Files Community

joaopimenta commited on Nov 18

Commit

959f7ba

verified ·

1 Parent(s): a01ffab

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -105

app.py CHANGED Viewed

@@ -5,138 +5,161 @@ import plotly.graph_objects as go
 import os
 # =========================================================
-# 1. FUNÇÃO DE PARSING PERSONALIZADO (MOTOR DATA)
 # =========================================================
 def load_motordata_csv(filepath):
-    """
-    Lê o CSV específico da MotorData, limpando caracteres especiais
-    e corrigindo a estrutura de colunas.
-    """
     rows = []
     max_cols = 0
     lines = []
-    # 1. Detetar Encoding
-    for enc in ["latin1", "utf-8", "cp1252"]:
         try:
             with open(filepath, "r", encoding=enc) as f:
                 lines = f.readlines()
-            print(f"✔ Encoding detetado: {enc}")
             break
         except:
             continue
     if not lines:
         return pd.DataFrame()
-    # 2. Remover a primeira linha (que contém apenas filtros/metadata)
     if len(lines) > 1:
-        lines = lines[1:]
     # 3. Processar linha a linha
     for line in lines:
-        # Limpar lixo excel: ="VAL" → VAL e remover aspas extras
         clean = line.replace('="', '').replace('"', '').strip()
-        # Separar por ponto e vírgula
         parts = clean.split(";")
         rows.append(parts)
         max_cols = max(max_cols, len(parts))
-    # 4. Normalizar colunas (preencher linhas curtas com vazio)
     rows = [r + [""] * (max_cols - len(r)) for r in rows]
     if not rows:
         return pd.DataFrame()
-    # 5. Definir Header e Dados
-    header = rows[0] # A primeira linha processada é o cabeçalho
-    data = rows[1:]  # O resto são dados
-    # Criar DataFrame
-    # (Limpa nomes das colunas para garantir que não têm espaços extras)
-    clean_header = [h.strip() for h in header]
     df = pd.DataFrame(data, columns=clean_header)
     return df
 # =========================================================
-# 2. CARREGAMENTO E PREPARAÇÃO
 # =========================================================
 def load_data():
     file_path = "dados_vendas.csv"
     df = pd.DataFrame()
-    # --- TENTA CARREGAR O FICHEIRO REAL ---
     if os.path.exists(file_path):
         print("📂 A processar dados reais...")
         try:
             df = load_motordata_csv(file_path)
         except Exception as e:
-            print(f"❌ Erro ao processar CSV: {e}")
-    # --- SE FALHAR OU NÃO EXISTIR, USA SIMULAÇÃO ---
-    if df.empty:
-        print("⚠️ Usando dados simulados (Ficheiro vazio ou inexistente)...")
-        data = {
-            'Marca': ['VALTRA', 'KIOTI', 'JOHN DEERE', 'NEW HOLLAND', 'KUBOTA'] * 10,
-            'Regiao': ['Norte', 'Centro', 'Alentejo', 'Norte', 'Algarve'] * 10,
-            'Tipo': ['AGRICOLA'] * 50,
-            'Potencia kW': ['75', '19', '88', '55', '70'] * 10,
         }
-        df = pd.DataFrame(data)
-    # ----------------------------------------------------------
-    # === LIMPEZA E TIPAGEM DE DADOS ===
-    # ----------------------------------------------------------
-    # 1. Filtrar por TIPO (se a coluna existir)
-    if 'Tipo' in df.columns:
-        # Normalizar para maiúsculas e remover espaços
-        df['Tipo'] = df['Tipo'].astype(str).str.upper().str.strip()
-        df = df[df['Tipo'] == 'AGRICOLA'].copy()
-    # 2. Tratar Potência (Converter string "88" para número 88.0)
-    if 'Potencia kW' in df.columns:
-        # Remove qualquer caracter não numérico que tenha sobrado
-        df["Potencia kW"] = pd.to_numeric(df["Potencia kW"], errors='coerce')
-        df = df.dropna(subset=["Potencia kW"])
-        # Criar Segmentos (Bins)
-        bins = [0, 25, 50, 100, 500]
-        labels = ['< 25 kW (Compactos)', '25 - 50 kW', '50 - 100 kW', '> 100 kW (Alta)']
-        df['Cluster Potencia'] = pd.cut(df['Potencia kW'], bins=bins, labels=labels)
-        df['Cluster Potencia'] = df['Cluster Potencia'].astype(str)
     return df
-# Carregar dados ao iniciar a aplicação
 df_global = load_data()
-# Definir limites para os sliders (Global variables)
-if not df_global.empty and "Potencia kW" in df_global.columns:
     min_kw_global = int(df_global["Potencia kW"].min())
     max_kw_global = int(df_global["Potencia kW"].max())
-    # Limpa regiões vazias
-    all_regions = sorted([x for x in df_global["Regiao"].unique().tolist() if x and str(x).strip() != ''])
 else:
     min_kw_global, max_kw_global = 0, 100
-    all_regions = []
 # =========================================================
-# 3. LÓGICA DO DASHBOARD (CALLBACK)
 # =========================================================
 def update_dashboard(val_min, val_max, selected_regions):
-    # Se não houver dados
     if df_global.empty:
         return "0", "0%", "0%", None, None, None, pd.DataFrame()
-    # Troca se min > max
-    if val_min > val_max:
-        val_min, val_max = val_max, val_min
-    # Filtragem
     mask = (
         (df_global["Potencia kW"] >= val_min) &
         (df_global["Potencia kW"] <= val_max) &
@@ -147,29 +170,25 @@ def update_dashboard(val_min, val_max, selected_regions):
     if df_filtered.empty:
         return "0", "0%", "0%", None, None, None, df_filtered
-    # Cálculos KPIs
     total = len(df_filtered)
-    v_valtra = len(df_filtered[df_filtered['Marca'].astype(str).str.upper() == 'VALTRA'])
-    v_kioti = len(df_filtered[df_filtered['Marca'].astype(str).str.upper() == 'KIOTI'])
     share_valtra = (v_valtra / total * 100) if total > 0 else 0
     share_kioti = (v_kioti / total * 100) if total > 0 else 0
-    # Textos KPI
-    kpi_total_txt = f"{total} Unidades"
-    kpi_valtra_txt = f"{share_valtra:.1f}% ({v_valtra})"
-    kpi_kioti_txt = f"{share_kioti:.1f}% ({v_kioti})"
-    # Gráfico 1: Ranking
     top_marcas = df_filtered['Marca'].value_counts().reset_index().head(15)
     top_marcas.columns = ['Marca', 'Vendas']
     colors = []
-    for marca in top_marcas['Marca']:
-        m = str(marca).upper()
-        if m == 'VALTRA': colors.append('#d62728') # Vermelho
-        elif m == 'KIOTI': colors.append('#ff7f0e') # Laranja
-        else: colors.append('#cccccc') # Cinza
     fig_rank = go.Figure(data=[go.Bar(
         x=top_marcas['Marca'], y=top_marcas['Vendas'],
@@ -177,63 +196,52 @@ def update_dashboard(val_min, val_max, selected_regions):
     )])
     fig_rank.update_layout(title="Ranking de Mercado", template="plotly_white", height=400)
-    # Gráfico 2: Potência
     if 'Cluster Potencia' in df_filtered.columns:
-        vendas_cluster = df_filtered['Cluster Potencia'].value_counts().reset_index()
-        vendas_cluster.columns = ['Cluster', 'Vendas']
-        fig_pie = px.pie(vendas_cluster, values='Vendas', names='Cluster',
-                         title='Mix de Potência', hole=0.4)
     else:
         fig_pie = go.Figure()
-    # Gráfico 3: Segmentos Valtra vs Kioti
-    nossas = df_filtered[df_filtered['Marca'].astype(str).str.upper().isin(['VALTRA', 'KIOTI'])]
-    if not nossas.empty and 'Cluster Potencia' in nossas.columns:
         fig_seg = px.histogram(nossas, x="Cluster Potencia", color="Marca",
-                               barmode="group", title="Onde competimos?",
                                color_discrete_map={'VALTRA': '#d62728', 'KIOTI': '#ff7f0e'})
     else:
-        fig_seg = go.Figure().add_annotation(text="Sem dados das marcas", showarrow=False)
-    return kpi_total_txt, kpi_valtra_txt, kpi_kioti_txt, fig_rank, fig_pie, fig_seg, df_filtered
 # =========================================================
-# 4. INTERFACE GRÁFICA (LAYOUT)
 # =========================================================
 with gr.Blocks(title="Dashboard Valtra & Kioti", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🚜 Dashboard Executivo: Valtra & KIOTI")
-    gr.Markdown(f"_Dados carregados: {len(df_global)} registos_")
     with gr.Row():
-        # Sidebar Filtros
         with gr.Column(scale=1):
             gr.Label("⚙️ Filtros")
             s_min = gr.Slider(minimum=min_kw_global, maximum=max_kw_global, value=min_kw_global, step=1, label="Min kW")
             s_max = gr.Slider(minimum=min_kw_global, maximum=max_kw_global, value=max_kw_global, step=1, label="Max kW")
             chk_reg = gr.CheckboxGroup(choices=all_regions, value=all_regions, label="Regiões")
             btn = gr.Button("Atualizar", variant="primary")
-        # Área Principal
         with gr.Column(scale=4):
             with gr.Row():
-                # KPIs
                 k1 = gr.Text(label="Mercado Total")
                 k2 = gr.Text(label="Share VALTRA")
                 k3 = gr.Text(label="Share KIOTI")
-            # Gráficos
             plot_rank = gr.Plot(label="Ranking")
             with gr.Row():
                 plot_pie = gr.Plot(label="Potência")
                 plot_seg = gr.Plot(label="Segmentos")
-    with gr.Accordion("📂 Ver Tabela de Dados", open=False):
         tbl = gr.Dataframe()
-    # Eventos
     inputs = [s_min, s_max, chk_reg]
     outputs = [k1, k2, k3, plot_rank, plot_pie, plot_seg, tbl]
@@ -241,8 +249,6 @@ with gr.Blocks(title="Dashboard Valtra & Kioti", theme=gr.themes.Soft()) as demo
     s_max.change(update_dashboard, inputs, outputs)
     chk_reg.change(update_dashboard, inputs, outputs)
     btn.click(update_dashboard, inputs, outputs)
-    # Iniciar
     demo.load(update_dashboard, inputs, outputs)
 if __name__ == "__main__":

 import os
 # =========================================================
+# 1. FUNÇÃO DE PARSING ROBUSTA (MOTOR DATA)
 # =========================================================
 def load_motordata_csv(filepath):
     rows = []
     max_cols = 0
     lines = []
+    # 1. Tentar vários encodings
+    for enc in ["latin1", "utf-8", "cp1252", "ISO-8859-1"]:
         try:
             with open(filepath, "r", encoding=enc) as f:
                 lines = f.readlines()
+            print(f"✔ Encoding detetado com sucesso: {enc}")
             break
         except:
             continue
     if not lines:
+        print("❌ Ficheiro vazio ou ilegível.")
         return pd.DataFrame()
+    # 2. Limpeza inteligente do cabeçalho
+    # Removemos linhas iniciais se não parecerem dados (ex: filtros)
     if len(lines) > 1:
+        # Se a primeira linha tiver menos de 3 colunas, é lixo
+        if len(lines[0].split(';')) < 3:
+            lines = lines[1:]
     # 3. Processar linha a linha
     for line in lines:
+        # Limpar lixo Excel (="Valor")
         clean = line.replace('="', '').replace('"', '').strip()
         parts = clean.split(";")
         rows.append(parts)
         max_cols = max(max_cols, len(parts))
+    # 4. Normalizar colunas
     rows = [r + [""] * (max_cols - len(r)) for r in rows]
     if not rows:
         return pd.DataFrame()
+    # 5. Criar DataFrame
+    header = rows[0]
+    data = rows[1:]
+    # IMPORTANTE: Limpar espaços dos nomes das colunas (strip)
+    clean_header = [str(h).strip() for h in header]
     df = pd.DataFrame(data, columns=clean_header)
+    # === [CRÍTICO] CORREÇÃO DE NOMES DAS COLUNAS ===
+    print(f"🔍 Colunas encontradas no CSV (RAW): {df.columns.tolist()}")
+    # Renomeação forçada para padronizar
+    for col in df.columns:
+        c_low = col.lower()
+        # Procura qualquer variação de Potência (com/sem acento, com/sem parenteses)
+        if ("pot" in c_low and "kw" in c_low):
+            print(f"✅ Coluna de Potência encontrada: '{col}' -> Renomeando para 'Potencia kW'")
+            df.rename(columns={col: 'Potencia kW'}, inplace=True)
+        elif "regi" in c_low: # Regiao, Região, Region...
+            df.rename(columns={col: 'Regiao'}, inplace=True)
+        elif "marca" in c_low:
+            df.rename(columns={col: 'Marca'}, inplace=True)
     return df
 # =========================================================
+# 2. CARREGAMENTO E LÓGICA PRINCIPAL
 # =========================================================
 def load_data():
     file_path = "dados_vendas.csv"
     df = pd.DataFrame()
     if os.path.exists(file_path):
         print("📂 A processar dados reais...")
         try:
             df = load_motordata_csv(file_path)
         except Exception as e:
+            print(f"❌ Erro crítico ao ler CSV: {e}")
+    else:
+        print("⚠️ Ficheiro 'dados_vendas.csv' não encontrado.")
+    # --- SEGURANÇA ANTI-CRASH ---
+    # Se a coluna não existir (mesmo depois da renomeação), criamos uma vazia
+    if "Potencia kW" not in df.columns:
+        print("⚠️ AVISO: Coluna 'Potencia kW' não encontrada! Criando coluna dummy a zero.")
+        df["Potencia kW"] = 0
+    if "Regiao" not in df.columns:
+        df["Regiao"] = "Desconhecido"
+    if "Marca" not in df.columns:
+        df["Marca"] = "Outros"
+    # Se o dataframe estiver vazio, carrega simulação para não dar erro visual
+    if df.empty or len(df) < 2:
+        print("⚠️ DataFrame vazio. Usando dados simulados.")
+        data_sim = {
+            'Marca': ['VALTRA', 'KIOTI', 'JOHN DEERE', 'NEW HOLLAND'] * 10,
+            'Regiao': ['Norte', 'Centro', 'Sul'] * 14,
+            'Tipo': ['AGRICOLA'] * 42,
+            'Potencia kW': [75, 25, 100, 50] * 10 + [0, 0], # Garante números
         }
+        df = pd.DataFrame(data_sim)
+    # --- LIMPEZA E TIPAGEM ---
+    # 1. Filtrar Tipo (Agrícola)
+    # Procura coluna Tipo de forma flexível
+    tipo_col = next((c for c in df.columns if "tipo" in c.lower()), None)
+    if tipo_col:
+        df['Tipo_clean'] = df[tipo_col].astype(str).str.upper().str.strip()
+        # Filtra onde aparece AGRICOLA
+        df = df[df['Tipo_clean'].str.contains('AGRICOLA', na=False)].copy()
+    # 2. Converter Potência para Números
+    # Forçamos conversão (erros viram NaN)
+    df["Potencia kW"] = pd.to_numeric(df["Potencia kW"], errors='coerce').fillna(0)
+    # 3. Criar Segmentos (Clusters)
+    bins = [-1, 25, 50, 100, 1000] # Começa em -1 para apanhar o 0
+    labels = ['< 25 kW (Compactos)', '25 - 50 kW', '50 - 100 kW', '> 100 kW (Alta)']
+    df['Cluster Potencia'] = pd.cut(df['Potencia kW'], bins=bins, labels=labels).astype(str)
     return df
+# Carregar dados (Variáveis Globais)
 df_global = load_data()
+# Calcular limites para os filtros
+if not df_global.empty:
     min_kw_global = int(df_global["Potencia kW"].min())
     max_kw_global = int(df_global["Potencia kW"].max())
+    # Lista de regiões (sem vazios e NaN)
+    all_regions = sorted([str(x) for x in df_global["Regiao"].unique() if str(x).lower() != 'nan' and str(x).strip() != ''])
 else:
     min_kw_global, max_kw_global = 0, 100
+    all_regions = ["Norte", "Sul"]
 # =========================================================
+# 3. DASHBOARD (GRADIO)
 # =========================================================
 def update_dashboard(val_min, val_max, selected_regions):
+    # Prevenir erros se dataframe estiver vazio
     if df_global.empty:
         return "0", "0%", "0%", None, None, None, pd.DataFrame()
+    if val_min > val_max: val_min, val_max = val_max, val_min
+    # Filtro Principal
     mask = (
         (df_global["Potencia kW"] >= val_min) &
         (df_global["Potencia kW"] <= val_max) &
     if df_filtered.empty:
         return "0", "0%", "0%", None, None, None, df_filtered
+    # KPIs
     total = len(df_filtered)
+    v_valtra = len(df_filtered[df_filtered['Marca'].str.upper() == 'VALTRA'])
+    v_kioti = len(df_filtered[df_filtered['Marca'].str.upper() == 'KIOTI'])
     share_valtra = (v_valtra / total * 100) if total > 0 else 0
     share_kioti = (v_kioti / total * 100) if total > 0 else 0
+    # Gráficos
+    # 1. Ranking
     top_marcas = df_filtered['Marca'].value_counts().reset_index().head(15)
     top_marcas.columns = ['Marca', 'Vendas']
     colors = []
+    for m in top_marcas['Marca']:
+        m_str = str(m).upper()
+        if 'VALTRA' in m_str: colors.append('#d62728')
+        elif 'KIOTI' in m_str: colors.append('#ff7f0e')
+        else: colors.append('#cccccc')
     fig_rank = go.Figure(data=[go.Bar(
         x=top_marcas['Marca'], y=top_marcas['Vendas'],
     )])
     fig_rank.update_layout(title="Ranking de Mercado", template="plotly_white", height=400)
+    # 2. Pizza Potência
     if 'Cluster Potencia' in df_filtered.columns:
+        v_cluster = df_filtered['Cluster Potencia'].value_counts().reset_index()
+        v_cluster.columns = ['Cluster', 'Vendas']
+        fig_pie = px.pie(v_cluster, values='Vendas', names='Cluster', title='Mix de Potência', hole=0.4)
     else:
         fig_pie = go.Figure()
+    # 3. Histograma Segmentos
+    nossas = df_filtered[df_filtered['Marca'].str.upper().isin(['VALTRA', 'KIOTI'])]
+    if not nossas.empty:
         fig_seg = px.histogram(nossas, x="Cluster Potencia", color="Marca",
+                               barmode="group", title="Comparativo Direto",
                                color_discrete_map={'VALTRA': '#d62728', 'KIOTI': '#ff7f0e'})
     else:
+        fig_seg = go.Figure().add_annotation(text="Sem dados Valtra/Kioti", showarrow=False)
+    return f"{total}", f"{share_valtra:.1f}% ({v_valtra})", f"{share_kioti:.1f}% ({v_kioti})", fig_rank, fig_pie, fig_seg, df_filtered
 # =========================================================
+# 4. INTERFACE
 # =========================================================
 with gr.Blocks(title="Dashboard Valtra & Kioti", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🚜 Dashboard Executivo: Valtra & KIOTI")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Label("⚙️ Filtros")
             s_min = gr.Slider(minimum=min_kw_global, maximum=max_kw_global, value=min_kw_global, step=1, label="Min kW")
             s_max = gr.Slider(minimum=min_kw_global, maximum=max_kw_global, value=max_kw_global, step=1, label="Max kW")
             chk_reg = gr.CheckboxGroup(choices=all_regions, value=all_regions, label="Regiões")
             btn = gr.Button("Atualizar", variant="primary")
         with gr.Column(scale=4):
             with gr.Row():
                 k1 = gr.Text(label="Mercado Total")
                 k2 = gr.Text(label="Share VALTRA")
                 k3 = gr.Text(label="Share KIOTI")
             plot_rank = gr.Plot(label="Ranking")
             with gr.Row():
                 plot_pie = gr.Plot(label="Potência")
                 plot_seg = gr.Plot(label="Segmentos")
+    with gr.Accordion("📂 Dados Detalhados", open=False):
         tbl = gr.Dataframe()
     inputs = [s_min, s_max, chk_reg]
     outputs = [k1, k2, k3, plot_rank, plot_pie, plot_seg, tbl]
     s_max.change(update_dashboard, inputs, outputs)
     chk_reg.change(update_dashboard, inputs, outputs)
     btn.click(update_dashboard, inputs, outputs)
     demo.load(update_dashboard, inputs, outputs)
 if __name__ == "__main__":