from pathlib import Path from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline import gradio as gr tokenizer = AutoTokenizer.from_pretrained("dslim/distilbert-NER") model = AutoModelForTokenClassification.from_pretrained("dslim/distilbert-NER") ner_pipeline = pipeline( "ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple" ) samples_dir = Path("samples") samples = [ "basic.txt", "single-names-and-initials.txt", "false-positive.txt", "uncased-names.txt", ] examples = [(samples_dir / sample).read_text().strip() for sample in samples] example_labels = [ sample.replace(".txt", "").replace("-", " ").title() for sample in samples ] def ner(text): output = ner_pipeline(text) output = [e for e in output if e["entity_group"] == "PER" and e["score"] > 0.90] output = [{**e, "entity_group": "PERSON"} for e in output] return {"text": text, "entities": output} demo = gr.Interface( ner, gr.Textbox(placeholder="Enter sentence here..."), gr.HighlightedText(combine_adjacent=True, show_legend=True), examples=examples, example_labels=example_labels, ) demo.launch(debug=True)