Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,38 +12,29 @@ import requests
|
|
| 12 |
def mark_tokens_bold(string, tokens):
|
| 13 |
for token in tokens:
|
| 14 |
pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
|
| 15 |
-
string = re.sub(pattern, "<span style='color:
|
| 16 |
return string
|
| 17 |
|
| 18 |
|
| 19 |
def process_results(results, highlight_terms):
|
| 20 |
if len(results) == 0:
|
| 21 |
-
return """<br><p
|
| 22 |
-
No results retrieved.</p><br><hr>"""
|
| 23 |
|
| 24 |
results_html = ""
|
| 25 |
for result in results:
|
| 26 |
text_html = result["text"]
|
| 27 |
text_html = mark_tokens_bold(text_html, highlight_terms)
|
| 28 |
-
|
| 29 |
-
"""
|
| 30 |
-
<p class='underline-on-hover' style='font-size:12px; font-family: Arial; color:#585858; text-align: left;'>
|
| 31 |
-
<a href='{}' target='_blank'>{}</a></p>""".format(
|
| 32 |
-
result["meta"]["url"], result["meta"]["url"]
|
| 33 |
-
)
|
| 34 |
-
if "meta" in result and result["meta"] is not None and "url" in result["meta"]
|
| 35 |
-
else ""
|
| 36 |
-
)
|
| 37 |
docid_html = str(result["docid"])
|
| 38 |
|
| 39 |
licenses = " | ".join(result["repo_license"])
|
| 40 |
repo_name = result["repo_name"]
|
| 41 |
repo_path = result["repo_path"]
|
| 42 |
|
| 43 |
-
results_html += """
|
| 44 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #
|
| 45 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #
|
| 46 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #
|
| 47 |
<pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
|
| 48 |
<br>
|
| 49 |
""".format(
|
|
@@ -74,18 +65,14 @@ def scisearch(query, language, num_results=10):
|
|
| 74 |
return process_results(results, highlight_terms)
|
| 75 |
|
| 76 |
|
| 77 |
-
description = """# <p style="text-align: center;">
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
you to search through the ROOTS corpus. We serve a BM25 index for each language or group of languages included in
|
| 81 |
-
ROOTS. You can read more about the details of the tool design
|
| 82 |
-
[here](https://huggingface.co/spaces/bigscience-data/scisearch/blob/main/roots_search_tool_specs.pdf). For more
|
| 83 |
-
information and instructions on how to access the full corpus check [this form](https://forms.gle/qyYswbEL5kA23Wu99)."""
|
| 84 |
|
| 85 |
|
| 86 |
if __name__ == "__main__":
|
| 87 |
demo = gr.Blocks(
|
| 88 |
-
css=".
|
| 89 |
)
|
| 90 |
|
| 91 |
with demo:
|
|
|
|
| 12 |
def mark_tokens_bold(string, tokens):
|
| 13 |
for token in tokens:
|
| 14 |
pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
|
| 15 |
+
string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
|
| 16 |
return string
|
| 17 |
|
| 18 |
|
| 19 |
def process_results(results, highlight_terms):
|
| 20 |
if len(results) == 0:
|
| 21 |
+
return """<br><p>No results retrieved.</p><br><hr>"""
|
|
|
|
| 22 |
|
| 23 |
results_html = ""
|
| 24 |
for result in results:
|
| 25 |
text_html = result["text"]
|
| 26 |
text_html = mark_tokens_bold(text_html, highlight_terms)
|
| 27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
docid_html = str(result["docid"])
|
| 29 |
|
| 30 |
licenses = " | ".join(result["repo_license"])
|
| 31 |
repo_name = result["repo_name"]
|
| 32 |
repo_path = result["repo_path"]
|
| 33 |
|
| 34 |
+
results_html += """\
|
| 35 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #ff75b3;'>{}</span></p>
|
| 36 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #ff75b3;'>{}</span></p>
|
| 37 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #ff75b3;'>{}</span></p>
|
| 38 |
<pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
|
| 39 |
<br>
|
| 40 |
""".format(
|
|
|
|
| 65 |
return process_results(results, highlight_terms)
|
| 66 |
|
| 67 |
|
| 68 |
+
description = """# <p style="text-align: center;"> π IceCoder Dataset Search π </p>
|
| 69 |
+
When you use [IceCoder]() to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires
|
| 70 |
+
and with this search tool we aim to provide help to finding out where the code came from."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
if __name__ == "__main__":
|
| 74 |
demo = gr.Blocks(
|
| 75 |
+
css=".gradio-container {background-color: #20233fff; color:white}"
|
| 76 |
)
|
| 77 |
|
| 78 |
with demo:
|