Spaces:
Build error
Build error
hellopahe
commited on
Commit
·
94692cf
1
Parent(s):
d777f98
remove redundancy
Browse files- lex_rank.py +5 -7
- lex_rank_L12.py +4 -7
- lex_rank_text2vec_v1.py +4 -7
- lex_rank_util.py +14 -6
- test.py +8 -0
lex_rank.py
CHANGED
|
@@ -5,7 +5,7 @@ nltk.download('punkt')
|
|
| 5 |
|
| 6 |
|
| 7 |
from harvesttext import HarvestText
|
| 8 |
-
from lex_rank_util import degree_centrality_scores,
|
| 9 |
from sentence_transformers import SentenceTransformer, util
|
| 10 |
|
| 11 |
|
|
@@ -30,15 +30,13 @@ class LexRank(object):
|
|
| 30 |
# We argsort so that the first element is the sentence with the highest score
|
| 31 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 32 |
|
| 33 |
-
|
| 34 |
res = []
|
| 35 |
-
for index in
|
| 36 |
-
|
| 37 |
-
break
|
| 38 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
| 39 |
-
num -= 1
|
| 40 |
return res
|
| 41 |
|
|
|
|
| 42 |
def contains_chinese(self, content: str):
|
| 43 |
for _char in content:
|
| 44 |
if '\u4e00' <= _char <= '\u9fa5':
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
from harvesttext import HarvestText
|
| 8 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
| 9 |
from sentence_transformers import SentenceTransformer, util
|
| 10 |
|
| 11 |
|
|
|
|
| 30 |
# We argsort so that the first element is the sentence with the highest score
|
| 31 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 32 |
|
| 33 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
| 34 |
res = []
|
| 35 |
+
for index in central_and_siblings:
|
| 36 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
|
|
|
| 37 |
return res
|
| 38 |
|
| 39 |
+
|
| 40 |
def contains_chinese(self, content: str):
|
| 41 |
for _char in content:
|
| 42 |
if '\u4e00' <= _char <= '\u9fa5':
|
lex_rank_L12.py
CHANGED
|
@@ -3,7 +3,7 @@ nltk.download('punkt')
|
|
| 3 |
|
| 4 |
|
| 5 |
from harvesttext import HarvestText
|
| 6 |
-
from lex_rank_util import degree_centrality_scores,
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
|
|
@@ -28,13 +28,10 @@ class LexRankL12(object):
|
|
| 28 |
# We argsort so that the first element is the sentence with the highest score
|
| 29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 30 |
|
| 31 |
-
|
| 32 |
res = []
|
| 33 |
-
for index in
|
| 34 |
-
|
| 35 |
-
break
|
| 36 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
| 37 |
-
num -= 1
|
| 38 |
return res
|
| 39 |
|
| 40 |
def contains_chinese(self, content: str):
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
from harvesttext import HarvestText
|
| 6 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
|
|
|
|
| 28 |
# We argsort so that the first element is the sentence with the highest score
|
| 29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 30 |
|
| 31 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
| 32 |
res = []
|
| 33 |
+
for index in central_and_siblings:
|
| 34 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
|
|
|
| 35 |
return res
|
| 36 |
|
| 37 |
def contains_chinese(self, content: str):
|
lex_rank_text2vec_v1.py
CHANGED
|
@@ -3,7 +3,7 @@ nltk.download('punkt')
|
|
| 3 |
|
| 4 |
|
| 5 |
from harvesttext import HarvestText
|
| 6 |
-
from lex_rank_util import degree_centrality_scores,
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
|
|
@@ -28,13 +28,10 @@ class LexRankText2VecV1(object):
|
|
| 28 |
# We argsort so that the first element is the sentence with the highest score
|
| 29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 30 |
|
| 31 |
-
|
| 32 |
res = []
|
| 33 |
-
for index in
|
| 34 |
-
|
| 35 |
-
break
|
| 36 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
| 37 |
-
num -= 1
|
| 38 |
return res
|
| 39 |
|
| 40 |
def contains_chinese(self, content: str):
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
from harvesttext import HarvestText
|
| 6 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
|
|
|
|
| 28 |
# We argsort so that the first element is the sentence with the highest score
|
| 29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
| 30 |
|
| 31 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
| 32 |
res = []
|
| 33 |
+
for index in central_and_siblings:
|
| 34 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
|
|
|
| 35 |
return res
|
| 36 |
|
| 37 |
def contains_chinese(self, content: str):
|
lex_rank_util.py
CHANGED
|
@@ -124,9 +124,17 @@ def stationary_distribution(
|
|
| 124 |
return distribution
|
| 125 |
|
| 126 |
|
| 127 |
-
def
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
return distribution
|
| 125 |
|
| 126 |
|
| 127 |
+
def find_siblings_by_index(sentences: [str], central_indices: [int], siblings: int, num: int):
|
| 128 |
+
ret = []
|
| 129 |
+
for idx in central_indices:
|
| 130 |
+
if num < 0:
|
| 131 |
+
break
|
| 132 |
+
head = max(idx - siblings, 0)
|
| 133 |
+
tail = min(idx + siblings + 1, len(sentences))
|
| 134 |
+
for i in range(head, tail):
|
| 135 |
+
if i not in ret:
|
| 136 |
+
ret.append(i)
|
| 137 |
+
num -= 1
|
| 138 |
+
|
| 139 |
+
print(ret)
|
| 140 |
+
return ret
|
test.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
s = set()
|
| 2 |
+
for i in range
|
| 3 |
+
|
| 4 |
+
arr = [i for i in s]
|
| 5 |
+
|
| 6 |
+
print(type(arr))
|
| 7 |
+
arr.sort(reverse=True)
|
| 8 |
+
print(arr)
|