Spaces:
Sleeping
Sleeping
File size: 1,183 Bytes
6f0237b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import gradio as gr
tokenizer = AutoTokenizer.from_pretrained("dslim/distilbert-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/distilbert-NER")
ner_pipeline = pipeline(
"ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple"
)
samples_dir = Path("samples")
samples = [
"basic.txt",
"single-names-and-initials.txt",
"false-positive.txt",
"uncased-names.txt",
]
examples = [(samples_dir / sample).read_text().strip() for sample in samples]
example_labels = [
sample.replace(".txt", "").replace("-", " ").title() for sample in samples
]
def ner(text):
output = ner_pipeline(text)
output = [e for e in output if e["entity_group"] == "PER" and e["score"] > 0.90]
output = [{**e, "entity_group": "PERSON"} for e in output]
return {"text": text, "entities": output}
demo = gr.Interface(
ner,
gr.Textbox(placeholder="Enter sentence here..."),
gr.HighlightedText(combine_adjacent=True, show_legend=True),
examples=examples,
example_labels=example_labels,
)
demo.launch(debug=True)
|