Update app.py
Browse files
app.py
CHANGED
|
@@ -254,11 +254,11 @@ def highlight_entities(article_content,summary_output):
|
|
| 254 |
matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
|
| 255 |
|
| 256 |
for entity in matched_entities:
|
| 257 |
-
|
| 258 |
|
| 259 |
for entity in unmatched_entities:
|
| 260 |
-
|
| 261 |
-
soup = BeautifulSoup(
|
| 262 |
return HTML_WRAPPER.format(soup)
|
| 263 |
|
| 264 |
|
|
@@ -338,11 +338,12 @@ def schleifer_model():
|
|
| 338 |
device=0 if torch.cuda.is_available() else -1)
|
| 339 |
return summarizer
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
|
|
|
| 346 |
|
| 347 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 348 |
def get_sentence_embedding_model():
|
|
@@ -450,8 +451,8 @@ if summarize:
|
|
| 450 |
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 451 |
):
|
| 452 |
summarizer_model = facebook_model()
|
| 453 |
-
summarized_text = summarizer_model(text_to_summarize
|
| 454 |
-
summarized_text = ' '.join([summ['summary_text']
|
| 455 |
|
| 456 |
elif model_type == "Sshleifer-DistilBart":
|
| 457 |
if url_text:
|
|
@@ -463,25 +464,26 @@ if summarize:
|
|
| 463 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 464 |
):
|
| 465 |
summarizer_model = schleifer_model()
|
| 466 |
-
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
|
| 467 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 468 |
|
| 469 |
elif model_type == "Google-Pegasus":
|
| 470 |
if url_text:
|
| 471 |
-
text_to_summarize = cleaned_text
|
|
|
|
| 472 |
else:
|
| 473 |
-
text_to_summarize = cleaned_text
|
| 474 |
|
| 475 |
with st.spinner(
|
| 476 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 477 |
):
|
| 478 |
summarizer_model = google_model()
|
| 479 |
-
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len)
|
| 480 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 481 |
|
| 482 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
| 483 |
|
| 484 |
-
entity_match_html = highlight_entities(' '.join(
|
| 485 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
|
| 486 |
st.markdown("####")
|
| 487 |
|
|
|
|
| 254 |
matched_entities, unmatched_entities = get_and_compare_entities(article_content,summary_output)
|
| 255 |
|
| 256 |
for entity in matched_entities:
|
| 257 |
+
summary_output = summary_output.replace(entity, markdown_start_green + entity + markdown_end)
|
| 258 |
|
| 259 |
for entity in unmatched_entities:
|
| 260 |
+
summary_output = summary_output.replace(entity, markdown_start_red + entity + markdown_end)
|
| 261 |
+
soup = BeautifulSoup(summary_output, features="html.parser")
|
| 262 |
return HTML_WRAPPER.format(soup)
|
| 263 |
|
| 264 |
|
|
|
|
| 338 |
device=0 if torch.cuda.is_available() else -1)
|
| 339 |
return summarizer
|
| 340 |
|
| 341 |
+
@st.experimental_singleton(suppress_st_warning=True)
|
| 342 |
+
def google_model():
|
| 343 |
+
model_name = 'google/pegasus-cnn_dailymail'
|
| 344 |
+
summarizer = pipeline('summarization',model=model=model_name, tokenizer=model_name,
|
| 345 |
+
device=0 if torch.cuda.is_available() else -1)
|
| 346 |
+
return summarizer
|
| 347 |
|
| 348 |
@st.experimental_singleton(suppress_st_warning=True)
|
| 349 |
def get_sentence_embedding_model():
|
|
|
|
| 451 |
text="Loading Facebook-Bart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 452 |
):
|
| 453 |
summarizer_model = facebook_model()
|
| 454 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
| 455 |
+
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 456 |
|
| 457 |
elif model_type == "Sshleifer-DistilBart":
|
| 458 |
if url_text:
|
|
|
|
| 464 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 465 |
):
|
| 466 |
summarizer_model = schleifer_model()
|
| 467 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
| 468 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 469 |
|
| 470 |
elif model_type == "Google-Pegasus":
|
| 471 |
if url_text:
|
| 472 |
+
text_to_summarize = cleaned_text[0]
|
| 473 |
+
|
| 474 |
else:
|
| 475 |
+
text_to_summarize = cleaned_text[0]
|
| 476 |
|
| 477 |
with st.spinner(
|
| 478 |
text="Loading Sshleifer-DistilBart Model and Extracting summary. This might take a few seconds depending on the length of your text..."
|
| 479 |
):
|
| 480 |
summarizer_model = google_model()
|
| 481 |
+
summarized_text = summarizer_model(text_to_summarize, max_length=max_len, min_length=min_len,clean_up_tokenization_spaces=True,no_repeat_ngram_size=4)
|
| 482 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 483 |
|
| 484 |
with st.spinner("Calculating and matching entities, this takes a few seconds..."):
|
| 485 |
|
| 486 |
+
entity_match_html = highlight_entities(' '.join(text_to_summarize),summarized_text)
|
| 487 |
st.subheader("Summarized text with matched entities in Green and mismatched entities in Red relative to the original text")
|
| 488 |
st.markdown("####")
|
| 489 |
|