Spaces:

nickmuchi
/

article-text-summarizer

Running

App Files Files Community

nickmuchi commited on Jul 16, 2022

Commit

12da133

1 Parent(s): b0f397c

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -254,11 +254,11 @@ def highlight_entities(article_content,summary_output):
     matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
     for entity in matched_entities:
-        summary_content = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
     for entity in unmatched_entities:
-        summary_content = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
-    soup = BeautifulSoup(summary_content, features="html.parser")
     return HTML_WRAPPER.format(soup)
@@ -338,11 +338,12 @@ def schleifer_model():
     device=0 if torch.cuda.is_available() else -1)
     return summarizer
-#@st.experimental_singleton(suppress_st_warning=True)
-#def google_model():
-    #summarizer = pipeline('summarization',model='google/pegasus-cnn_dailymail')
-    r#eturn summarizer
 @st.experimental_singleton(suppress_st_warning=True)
 def get_sentence_embedding_model():
@@ -450,8 +451,8 @@ if summarize:
             text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = facebook_model()
-            summarized_text = summarizer_model(text_to_summarize.strip().replace("\n", " "), max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True)
-            summarized_text = ' '.join([summ['summary_text'].replace("<n>", " ") for summ in summarized_text])
     elif model_type == "Sshleifer-DistilBart":
         if url_text:
@@ -463,25 +464,26 @@ if summarize:
             text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = schleifer_model()
-            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     elif model_type == "Google-Pegasus":
         if url_text:
-            text_to_summarize = cleaned_text
         else:
-            text_to_summarize = cleaned_text
         with st.spinner(
             text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = google_model()
-            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     with st.spinner("Calculating and matching entities, this takes a few seconds..."):
-        entity_match_html = highlight_entities(' '.join(cleaned_text),summarized_text)
         st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
         st.markdown("####")

     matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
     for entity in matched_entities:
+        summary_output = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
     for entity in unmatched_entities:
+        summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
+    soup = BeautifulSoup(summary_output, features="html.parser")
     return HTML_WRAPPER.format(soup)
     device=0 if torch.cuda.is_available() else -1)
     return summarizer
+@st.experimental_singleton(suppress_st_warning=True)
+def google_model():
+    model_name = 'google/pegasus-cnn_dailymail'
+    summarizer = pipeline('summarization',model=model=model_name, tokenizer=model_name,
+    device=0 if torch.cuda.is_available() else -1)
+    return summarizer
 @st.experimental_singleton(suppress_st_warning=True)
 def get_sentence_embedding_model():
             text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = facebook_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
+            summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     elif model_type == "Sshleifer-DistilBart":
         if url_text:
             text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = schleifer_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     elif model_type == "Google-Pegasus":
         if url_text:
+            text_to_summarize = cleaned_text[0]
         else:
+            text_to_summarize = cleaned_text[0]
         with st.spinner(
             text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
         ):
             summarizer_model = google_model()
+            summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
             summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
     with st.spinner("Calculating and matching entities, this takes a few seconds..."):
+        entity_match_html = highlight_entities(' '.join(text_to_summarize),summarized_text)
         st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
         st.markdown("####")