Spaces:

boun-tabi-LMG
/

TURNA-GPU

Paused

App Files Files Community

TURNA-GPU / app.py

onurgu

Added a radio box to choose `turna_model_version`

95a7d48 over 1 year ago

raw

history blame contribute delete

20.2 kB

	import gradio as gr
	import spaces
	from transformers import pipeline
	import torch

	DESCRIPTION="""

	### a Turkish encoder-decoder language model

	Welcome to our Huggingface space, where you can explore the capabilities of TURNA.

	Key Features of TURNA:

	- Powerful Architecture: TURNA contains 1.1B parameters, and was pre-trained with an encoder-decoder architecture following the UL2 framework on 43B tokens from various domains.
	- Diverse Training Data: Our model is trained on a varied dataset of 43 billion tokens, covering a wide array of domains.
	- Broad Applications: TURNA is fine-tuned for a variety of generation and understanding tasks, including:
	- Summarization
	- Paraphrasing
	- News title generation
	- Sentiment classification
	- Text categorization
	- Named entity recognition
	- Part-of-speech tagging
	- Semantic textual similarity
	- Natural language inference

	Note: First inference might take time as the models are downloaded on-the-go.

	TURNA can generate toxic content or provide erroneous information. Double-check before usage.

	"""

	CITATION = """
	Refer to our [paper](https://arxiv.org/abs/2401.14373) for more details.

	### Citation
	```bibtex
	@misc{uludogan2024turna,
	title={TURNA: A Turkish Encoder-Decoder Language Model for Enhanced Understanding and Generation},
	author={Gökçe Uludoğan and Zeynep Yirmibeşoğlu Balal and Furkan Akkurt and Melikşah Türker and Onur Güngör and Susan Üsküdarlı},
	year={2024},
	eprint={2401.14373},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}
	```
	"""


	sentiment_example = [["Bu üründen çok memnun kaldım."]]
	long_text = [["Eyfel Kulesi (Fransızca: La tour Eiffel [la tuʀ ɛˈfɛl]), Paris'teki demir kule. Kule, aynı zamanda tüm dünyada Fransa'nın sembolü halini almıştır. İsmini, inşa ettiren Fransız inşaat mühendisi Gustave Eiffel'den alır.[1] En büyük turizm cazibelerinden biri olan Eyfel Kulesi, yılda 6 milyon turist çeker. 2002 yılında toplam ziyaretçi sayısı 200 milyona ulaşmıştır."], ["Kalp krizi geçirenlerin yaklaşık üçte birinin kısa bir süre önce grip atlattığı düşünülüyor. Peki grip virüsü ne yapıyor da kalp krizine yol açıyor? Karpuz şöyle açıkladı: Grip virüsü kanın yapışkanlığını veya pıhtılaşmasını artırıyor."]]
	ner_example = [["Benim adım Turna."]]
	t2t_example = [["Paraphrase: Bu üründen çok memnun kaldım."]]
	nli_example = [["Bunu çok beğendim.", "Bunu çok sevdim."]]
	text_category_example = [[" anadolu_efes e 18 lik star ! beko_basketbol_ligi nde iddialı bir kadroyla sezona giren anadolu_efes transfer harekatına devam ediyor"]]



	@spaces.GPU
	def nli(first_input, second_input, model_choice="turna_nli_nli_tr"):

	if model_choice=="turna_nli_nli_tr":
	input = f"hipotez: {first_input} önerme: {second_input}"
	nli_model = pipeline(model="boun-tabi-LMG/turna_nli_nli_tr", device=0)
	return nli_model(input)[0]["generated_text"]
	else:
	input = f"ilk cümle: {first_input} ikinci cümle: {second_input}"
	stsb_model = pipeline(model="boun-tabi-LMG/turna_semantic_similarity_stsb_tr", device=0)

	return stsb_model(input)[0]["generated_text"]


	@spaces.GPU
	def sentiment_analysis(input, model_choice="turna_classification_17bintweet_sentiment"):
	sentiment_model = pipeline(model=f"boun-tabi-LMG/{model_choice}", device=0)
	return sentiment_model(input, max_new_tokens = 4)[0]["generated_text"]

	@spaces.GPU
	def pos(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
	if model_choice=="turna_pos_imst":
	pos_imst = pipeline(model="boun-tabi-LMG/turna_pos_imst", device=0)
	return pos_imst(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
	else:
	pos_boun = pipeline(model="boun-tabi-LMG/turna_pos_boun", device=0)
	return pos_boun(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]

	@spaces.GPU
	def ner(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
	if model_choice=="turna_ner_wikiann":
	ner_wikiann = pipeline(model="boun-tabi-LMG/turna_ner_wikiann", device=0)
	return ner_wikiann(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]
	else:
	ner_model = pipeline(model="boun-tabi-LMG/turna_ner_milliyet", device=0)
	return ner_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]


	@spaces.GPU
	def paraphrase(input, model_choice, max_new_tokens):
	if model_choice=="turna_paraphrasing_tatoeba":
	paraphrasing = pipeline(model="boun-tabi-LMG/turna_paraphrasing_tatoeba", device=0)
	return paraphrasing(input, max_new_tokens = max_new_tokens)[0]["generated_text"]
	else:
	paraphrasing_sub = pipeline(model="boun-tabi-LMG/turna_paraphrasing_opensubtitles", device=0)

	return paraphrasing_sub(input, max_new_tokens = max_new_tokens)[0]["generated_text"]

	@spaces.GPU
	def summarize(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
	model_mapping = {"turna_summarization_tr_news": "boun-tabi-LMG/turna_summarization_tr_news",
	"turna_summarization_mlsum": "boun-tabi-LMG/turna_summarization_mlsum"}
	summarization_model = pipeline(model=model_mapping[model_choice], device=0)
	return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]

	@spaces.GPU
	def generate_title(input, model_choice, max_new_tokens, length_penalty, no_repeat_ngram_size):
	model_mapping = {"turna_title_generation_tr_news": "boun-tabi-LMG/turna_title_generation_tr_news",
	"turna_title_generation_mlsum": "boun-tabi-LMG/turna_title_generation_mlsum"}
	summarization_model = pipeline(model=model_mapping[model_choice], device=0)
	return summarization_model(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty, no_repeat_ngram_size=no_repeat_ngram_size)[0]["generated_text"]


	@spaces.GPU
	def categorize(input):
	ttc = pipeline(model="boun-tabi-LMG/turna_classification_ttc4900", device=0)

	return ttc(input, max_new_tokens = 8)[0]["generated_text"]

	@spaces.GPU
	def turna(input, max_new_tokens, length_penalty,
	top_k, top_p, temp, num_beams,
	do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version):

	turna = pipeline(model=f"boun-tabi-LMG/{turna_model_version}", device=0)
	input = f"[S2S] {input}<EOS>"

	return turna(input, max_new_tokens = max_new_tokens, length_penalty=length_penalty,
	top_k=top_k, top_p=top_p, temperature=temp, num_beams=num_beams,
	do_sample = do_sample, no_repeat_ngram_size=no_repeat_ngram_size, repetition_penalty=repetition_penalty)[0]["generated_text"]


	with gr.Blocks(theme="abidlabs/Lime") as demo:

	gr.Markdown("# TURNA")
	gr.Image("images/turna-logo.png", width=100, show_label=False, show_download_button=False, show_share_button=False)

	with gr.Tab("TURNA"):
	gr.Markdown(DESCRIPTION)

	with gr.Tab("Sentiment Analysis"):
	gr.Markdown("TURNA fine-tuned on sentiment analysis. Enter text to analyse sentiment and pick the model (tweets or product reviews).")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	sentiment_choice = gr.Radio(choices = ["turna_classification_17bintweet_sentiment", "turna_classification_tr_product_reviews"], label ="Model", value="turna_classification_17bintweet_sentiment")
	sentiment_input = gr.Textbox(label="Sentiment Analysis Input")

	sentiment_submit = gr.Button()
	sentiment_output = gr.Textbox(label="Sentiment Analysis Output")
	sentiment_submit.click(sentiment_analysis, inputs=[sentiment_input, sentiment_choice], outputs=sentiment_output)
	sentiment_examples = gr.Examples(examples = sentiment_example, inputs = [sentiment_input, sentiment_choice], outputs=sentiment_output, fn=sentiment_analysis)

	with gr.Tab("Text Categorization"):
	gr.Markdown("TURNA fine-tuned on text categorization. Enter text to categorize text or try the example.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Text Categorization Input")

	text_submit = gr.Button()
	text_output = gr.Textbox(label="Text Categorization Output")
	text_submit.click(categorize, inputs=[text_input], outputs=text_output)
	text_examples = gr.Examples(examples = text_category_example,inputs=[text_input], outputs=text_output, fn=categorize)


	with gr.Tab("NLI & STS"):
	gr.Markdown("TURNA fine-tuned on natural language inference or semantic textual similarity. Enter text to infer entailment or measure semantic similarity. ")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	nli_choice = gr.Radio(choices = ["turna_nli_nli_tr", "turna_semantic_similarity_stsb_tr"], label ="Model", value="turna_nli_nli_tr")
	nli_first_input = gr.Textbox(label="First Sentence")
	nli_second_input = gr.Textbox(label="Second Sentence")

	nli_submit = gr.Button()
	nli_output = gr.Textbox(label="NLI Output")
	nli_submit.click(nli, inputs=[nli_first_input, nli_second_input, nli_choice], outputs=nli_output)
	nli_examples = gr.Examples(examples = nli_example, inputs = [nli_first_input, nli_second_input, nli_choice], outputs=nli_output, fn=nli)

	with gr.Tab("POS"):
	gr.Markdown("TURNA fine-tuned on part-of-speech-tagging. Enter text to parse parts of speech and pick the model.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	pos_choice = gr.Radio(choices = ["turna_pos_imst", "turna_pos_boun"], label ="Model", value="turna_pos_imst")
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 64,
	value = 64)
	length_penalty = gr.Slider(label = "Length penalty",
	minimum = -10,
	maximum = 10,
	value=2.0)
	no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
	with gr.Column():
	pos_input = gr.Textbox(label="POS Input")
	pos_submit = gr.Button()
	pos_output = gr.Textbox(label="POS Output")
	pos_submit.click(pos, inputs=[pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output)
	pos_examples = gr.Examples(examples = ner_example, inputs = [pos_input, pos_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=pos_output, fn=pos)

	with gr.Tab("NER"):
	gr.Markdown("TURNA fine-tuned on named entity recognition. Enter text to parse named entities and pick the model.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	ner_choice = gr.Radio(choices = ["turna_ner_wikiann", "turna_ner_milliyet"], label ="Model", value="turna_ner_wikiann")
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 64,
	value = 64)
	length_penalty = gr.Slider(label = "Length penalty",
	minimum = -10,
	maximum = 10,
	value=2.0)
	no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
	with gr.Column():
	ner_input = gr.Textbox(label="NER Input")
	ner_submit = gr.Button()
	ner_output = gr.Textbox(label="NER Output")

	ner_submit.click(ner, inputs=[ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output)
	ner_examples = gr.Examples(examples = ner_example, inputs = [ner_input, ner_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=ner_output, fn=ner)
	with gr.Tab("Paraphrase"):
	gr.Markdown("TURNA fine-tuned on paraphrasing. Enter text to paraphrase and pick the model.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	paraphrasing_choice = gr.Radio(choices = ["turna_paraphrasing_tatoeba", "turna_paraphrasing_opensubtitles"], label ="Model", value="turna_paraphrasing_tatoeba")
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 20,
	value = 20)
	with gr.Column():
	paraphrasing_input = gr.Textbox(label = "Paraphrasing Input")
	paraphrasing_submit = gr.Button()
	paraphrasing_output = gr.Text(label="Paraphrasing Output")

	paraphrasing_submit.click(paraphrase, inputs=[paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output)
	paraphrase_examples = gr.Examples(examples = long_text, inputs = [paraphrasing_input, paraphrasing_choice, max_new_tokens], outputs=paraphrasing_output, fn=paraphrase)
	with gr.Tab("Summarization"):
	gr.Markdown("TURNA fine-tuned on summarization. Enter text to summarize and pick the model.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	sum_choice = gr.Radio(choices = ["turna_summarization_mlsum", "turna_summarization_tr_news"], label ="Model", value="turna_summarization_mlsum")
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 512,
	value = 128)
	length_penalty = gr.Slider(label = "Length penalty",
	minimum = -10,
	maximum = 10,
	value=2.0)
	no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
	with gr.Column():
	sum_input = gr.Textbox(label = "Summarization Input")
	sum_submit = gr.Button()
	sum_output = gr.Textbox(label = "Summarization Output")

	sum_submit.click(summarize, inputs=[sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output)
	sum_examples = gr.Examples(examples = long_text, inputs = [sum_input, sum_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=sum_output, fn=summarize)

	with gr.Tab("Title Generation"):
	gr.Markdown("TURNA fine-tuned on news title generation. Enter news text to generate a title.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	title_choice = gr.Radio(choices = ["turna_title_generation_tr_news", "turna_title_generation_mlsum"], label ="Model", value="turna_title_generation_tr_news")
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 64,
	value = 64)
	length_penalty = gr.Slider(label = "Length penalty",
	minimum = -10,
	maximum = 10,
	value=2.0)
	no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
	with gr.Column():
	title_input = gr.Textbox(label = "News Title Generation Input")
	title_submit = gr.Button()
	title_output = gr.Textbox(label = "News Title Generation Output")

	title_submit.click(generate_title, inputs=[title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output)
	title_examples = gr.Examples(examples = long_text, inputs = [title_input, title_choice, max_new_tokens, length_penalty, no_repeat_ngram_size], outputs=title_output, fn=generate_title)

	with gr.Tab("Text Generation"):
	gr.Markdown("Pre-trained TURNA. Enter text to start generating.")
	with gr.Column():
	with gr.Row():
	with gr.Column():
	with gr.Accordion("Advanced Generation Parameters"):
	max_new_tokens = gr.Slider(label = "Maximum length",
	minimum = 0,
	maximum = 512,
	value = 128)
	length_penalty = gr.Slider(label = "Length penalty",
	value=1.0)
	top_k = gr.Slider(label = "Top-k", value=10)
	top_p = gr.Slider(label = "Top-p", value=0.95)
	temp = gr.Slider(label = "Temperature", value=1.0, minimum=0.1, maximum=100.0)
	no_repeat_ngram_size =gr.Slider(label="No Repeat N-Gram Size", minimum=0,value=3,)
	repetition_penalty = gr.Slider(label = "Repetition Penalty", minimum=0.0, value=3.1, step=0.1)
	num_beams = gr.Slider(label = "Number of beams", minimum=1,
	maximum=10, value=3)
	do_sample = gr.Radio(choices = [True, False], value = True, label = "Sampling")
	turna_model_version = gr.Radio(choices = ["TURNA", "TURNA-2850K", "TURNA-4350K"], value = "TURNA", label = "Choose TURNA model version")
	with gr.Column():
	text_gen_input = gr.Textbox(label="Text Generation Input")

	text_gen_submit = gr.Button()
	text_gen_output = gr.Textbox(label="Text Generation Output")
	text_gen_submit.click(turna, inputs=[text_gen_input, max_new_tokens, length_penalty,
	top_k, top_p, temp, num_beams,
	do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output)
	text_gen_example = [["Bir varmış, bir yokmuş, evvel zaman içinde, kalbur saman içinde, uzak diyarların birinde bir turna"]]
	text_gen_examples = gr.Examples(examples = text_gen_example, inputs = [text_gen_input, max_new_tokens, length_penalty,
	top_k, top_p, temp, num_beams, do_sample, no_repeat_ngram_size, repetition_penalty, turna_model_version], outputs=text_gen_output, fn=turna)

	gr.Markdown(CITATION)

	demo.launch()