Spaces:

sudormrfbin
/

basic-name-recognition

Sleeping

Gokul Soumya

Initial commit

6f0237b 3 months ago

1.18 kB

	from pathlib import Path
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	import gradio as gr

	tokenizer = AutoTokenizer.from_pretrained("dslim/distilbert-NER")
	model = AutoModelForTokenClassification.from_pretrained("dslim/distilbert-NER")
	ner_pipeline = pipeline(
	"ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple"
	)

	samples_dir = Path("samples")
	samples = [
	"basic.txt",
	"single-names-and-initials.txt",
	"false-positive.txt",
	"uncased-names.txt",
	]
	examples = [(samples_dir / sample).read_text().strip() for sample in samples]
	example_labels = [
	sample.replace(".txt", "").replace("-", " ").title() for sample in samples
	]


	def ner(text):
	output = ner_pipeline(text)
	output = [e for e in output if e["entity_group"] == "PER" and e["score"] > 0.90]
	output = [{**e, "entity_group": "PERSON"} for e in output]
	return {"text": text, "entities": output}


	demo = gr.Interface(
	ner,
	gr.Textbox(placeholder="Enter sentence here..."),
	gr.HighlightedText(combine_adjacent=True, show_legend=True),
	examples=examples,
	example_labels=example_labels,
	)

	demo.launch(debug=True)