Spaces:

shrut27
/

ESG_Report_Analysis

Build error

App Files Files Community

shrut27 commited on Mar 6, 2024

Commit

c4426e9

verified ·

1 Parent(s): 2d2c5f7

Upload app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import streamlit as st
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+import spacy
+from tika import parser
+import requests
+import pandas as pd
+# Loading spaCy model outside the streamlit cache
+nlp = spacy.load("en_core_web_sm")
+@st.cache(allow_output_mutation=True)
+def load_environmental_model():
+    name_env = "ESGBERT/EnvironmentalBERT-environmental"
+    tokenizer_env = AutoTokenizer.from_pretrained(name_env)
+    model_env = AutoModelForSequenceClassification.from_pretrained(name_env)
+    return pipeline("text-classification", model=model_env, tokenizer=tokenizer_env)
+@st.cache(allow_output_mutation=True)
+def load_social_model():
+    name_soc = "ESGBERT/SocialBERT-social"
+    tokenizer_soc = AutoTokenizer.from_pretrained(name_soc)
+    model_soc = AutoModelForSequenceClassification.from_pretrained(name_soc)
+    return pipeline("text-classification", model=model_soc, tokenizer=tokenizer_soc)
+@st.cache(allow_output_mutation=True)
+def load_governance_model():
+    name_gov = "ESGBERT/GovernanceBERT-governance"
+    tokenizer_gov = AutoTokenizer.from_pretrained(name_gov)
+    model_gov = AutoModelForSequenceClassification.from_pretrained(name_gov)
+    return pipeline("text-classification", model=model_gov, tokenizer=tokenizer_gov)
+@st.cache(allow_output_mutation=True)
+def load_sentiment_model():
+    model_name = "climatebert/distilroberta-base-climate-sentiment"
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    tokenizer = AutoTokenizer.from_pretrained(model_name, max_len=512)
+    return pipeline("text-classification", model=model, tokenizer=tokenizer)
+# Streamlit App
+st.title("ESGBERT Text Classification App")
+# Get report URL from user input
+url = st.text_input("Enter the URL of the report (PDF):")
+# Model selection dropdown
+selected_model = st.selectbox("Select Model", ["Environmental Model", "Social Model", "Governance Model", "Sentiment Model"])
+if url:
+    # Download PDF content from the URL
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        # Parse PDF and extract text
+        raw_text = parser.from_buffer(response.content)['content']
+        # Extract sentences using spaCy
+        doc = nlp(raw_text)
+        sentences = [sent.text for sent in doc.sents]
+        # Filtering and preprocessing sentences
+        sequences = list(map(str, sentences))
+        sentences = [x.replace("\n", "") for x in sequences]
+        sentences = [x for x in sentences if x != ""]
+        sentences = [x for x in sentences if x[0].isupper()]
+        sub_sentences = sentences[:100]  # Takes around 20 seconds
+        # Classification using different models based on user selection
+        if selected_model == "Environmental Model":
+            pipe_model = load_environmental_model()
+        elif selected_model == "Social Model":
+            pipe_model = load_social_model()
+        elif selected_model == "Governance Model":
+            pipe_model = load_governance_model()
+        else:
+            pipe_model = load_sentiment_model()
+        # Get predictions for the selected model
+        model_results = pipe_model(sub_sentences, padding=True, truncation=True)
+        model_labels = [x["label"] for x in model_results]
+        # Display count of sentences labeled as the selected model
+        st.subheader(f"{selected_model} Sentences Count")
+        st.write(pd.DataFrame({"sentence": sub_sentences, selected_model: model_labels}).groupby(selected_model).count())
+    else:
+        st.error("Error fetching PDF content from the provided URL. Please check the URL and try again.")