Spaces:

nickmuchi
/

article-text-summarizer

Running

nickmuchi commited on Apr 20, 2022

Commit

173ed2d

1 Parent(s): 9c8ceb7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,11 +16,10 @@ import docx2txt
 from io import StringIO
 from PyPDF2 import PdfFileReader
 import warnings
-import nltk
-nltk.download()
-from nltk import sent_tokenize
 warnings.filterwarnings("ignore")
@@ -71,7 +70,7 @@ def article_text_extractor(url: str):
 def chunk_clean_text(text):
-    sentences = sent_tokenize(text)
     current_chunk = 0
     chunks = []

 from io import StringIO
 from PyPDF2 import PdfFileReader
 import warnings
+import nltk.data
+tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
 warnings.filterwarnings("ignore")
 def chunk_clean_text(text):
+    sentences = tokenizer(text)
     current_chunk = 0
     chunks = []