Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,55 @@ from usearch.index import Index
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
from datasets import load_dataset
|
| 6 |
from sentencex import segment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
model = SentenceTransformer("Corran/SciGenAllMiniLM")
|
| 9 |
|
|
@@ -11,31 +60,42 @@ rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function']
|
|
| 11 |
|
| 12 |
rf = list(rf)
|
| 13 |
rf_emb = model.encode(rf)
|
| 14 |
-
index = Index(ndim=rf_emb[0].size)
|
| 15 |
-
index.add(range(len(rf)), rf_emb)
|
| 16 |
|
| 17 |
-
|
|
|
|
| 18 |
global index, model, rf
|
| 19 |
-
|
| 20 |
-
matches = index.search(emb,4)
|
| 21 |
-
if type(input)==list and len(input)>1:
|
| 22 |
-
matches = [m[0] for m in matches]
|
| 23 |
-
else:
|
| 24 |
-
matches = [m for m in matches]
|
| 25 |
-
return [(rf[m.key],m.distance) for m in matches]
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def return_rf_scores(paragraph):
|
| 30 |
|
| 31 |
sentences = list(segment("en", paragraph))
|
| 32 |
matches = get_matches(sentences)
|
| 33 |
-
|
| 34 |
-
output =
|
|
|
|
| 35 |
for s,m in zip(sentences,matches):
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
return output
|
| 39 |
|
| 40 |
-
|
|
|
|
| 41 |
demo.launch()
|
|
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
from datasets import load_dataset
|
| 6 |
from sentencex import segment
|
| 7 |
+
from usearch.index import search, MetricKind, Matches, BatchMatches
|
| 8 |
+
|
| 9 |
+
HTML_Output = """<html><head><style>/* Tooltip container */
|
| 10 |
+
.tooltip {
|
| 11 |
+
position: relative;
|
| 12 |
+
width: 600px;
|
| 13 |
+
display: inline-block;
|
| 14 |
+
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
/* Tooltip text */
|
| 18 |
+
.tooltip .tooltiptext {
|
| 19 |
+
visibility: hidden;
|
| 20 |
+
width: "100%";
|
| 21 |
+
background-color: #555;
|
| 22 |
+
color: #34e1eb;
|
| 23 |
+
text-align: center;
|
| 24 |
+
padding: 5px 0;
|
| 25 |
+
border-radius: 6px;
|
| 26 |
+
|
| 27 |
+
/* Position the tooltip text */
|
| 28 |
+
position: absolute;
|
| 29 |
+
z-index: 1;
|
| 30 |
+
top: 125%;
|
| 31 |
+
left: 50%;
|
| 32 |
+
margin-left: -60px;
|
| 33 |
+
|
| 34 |
+
/* Fade in tooltip */
|
| 35 |
+
opacity: 0;
|
| 36 |
+
transition: opacity 0.3s;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
/* Tooltip arrow */
|
| 40 |
+
.tooltip .tooltiptext::before {
|
| 41 |
+
content: "";
|
| 42 |
+
position: absolute;
|
| 43 |
+
bottom: 100%;
|
| 44 |
+
left: 50%;
|
| 45 |
+
margin-left: -5px;
|
| 46 |
+
border-width: 5px;
|
| 47 |
+
border-style: solid;
|
| 48 |
+
border-color: #555 transparent transparent transparent;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* Show the tooltip text when you mouse over the tooltip container */
|
| 52 |
+
.tooltip:hover .tooltiptext {
|
| 53 |
+
visibility: visible;
|
| 54 |
+
opacity: 1;
|
| 55 |
+
}</style></head><body>"""
|
| 56 |
|
| 57 |
model = SentenceTransformer("Corran/SciGenAllMiniLM")
|
| 58 |
|
|
|
|
| 60 |
|
| 61 |
rf = list(rf)
|
| 62 |
rf_emb = model.encode(rf)
|
|
|
|
|
|
|
| 63 |
|
| 64 |
+
|
| 65 |
+
def get_matches(inputs):
|
| 66 |
global index, model, rf
|
| 67 |
+
paragraph_matches = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
+
for input in inputs:
|
| 70 |
+
embs = model.encode(input,batch_size=128)
|
| 71 |
+
|
| 72 |
+
matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True)
|
| 73 |
+
sentence_matches = []
|
| 74 |
+
for match_ in matches:
|
| 75 |
+
sentence_matches.append((rf[match_.key],str(round(match_.distance,2))))
|
| 76 |
+
paragraph_matches.append(sentence_matches)
|
| 77 |
+
|
| 78 |
+
return paragraph_matches
|
| 79 |
|
| 80 |
|
| 81 |
def return_rf_scores(paragraph):
|
| 82 |
|
| 83 |
sentences = list(segment("en", paragraph))
|
| 84 |
matches = get_matches(sentences)
|
| 85 |
+
|
| 86 |
+
output = HTML_Output
|
| 87 |
+
|
| 88 |
for s,m in zip(sentences,matches):
|
| 89 |
+
tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m]
|
| 90 |
+
tooltip = "\n".join(tooltip)
|
| 91 |
+
output+=f"""<div class="tooltip">{s}
|
| 92 |
+
<span class="tooltiptext">{tooltip}</span>
|
| 93 |
+
</div><br>"""
|
| 94 |
+
|
| 95 |
+
output += "</body></html>"
|
| 96 |
|
| 97 |
return output
|
| 98 |
|
| 99 |
+
|
| 100 |
+
demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html")
|
| 101 |
demo.launch()
|