Spaces:

merve
/

gradio-tgi-2

Runtime error

gradio-tgi-2 / app.py

merve HF Staff

Update app.py (#5)

7898ec7 verified over 1 year ago

993 Bytes

	import gradio as gr
	from huggingface_hub import InferenceClient
	import os

	token = os.getenv("TOKEN")
	endpoint = os.getenv("ENDPOINT")

	# initialize InferenceClient
	client = InferenceClient(model="/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmeta-llama%2FMeta-Llama-3.1-8B-Instruct%26quot%3B%3C%2Fspan%3E%2C token=token)

	# query client using streaming mode
	def inference(message, history):
	partial_message = ""
	for token in client.text_generation(message, max_new_tokens=100, stream=True):
	partial_message += token
	yield partial_message

	gr.ChatInterface(
	inference,
	chatbot=gr.Chatbot(height=300),
	textbox=gr.Textbox(placeholder="Chat with me!", container=False, scale=7),
	title="Gradio 🤝 TGI",
	description="This is the demo for Gradio UI consuming TGI endpoint with LLaMA 7B-Chat model.",
	theme="abidlabs/Lime",
	examples=["Are tomatoes vegetables?"],
	cache_examples=True,
	retry_btn="Retry",
	undo_btn="Undo",
	clear_btn="Clear",
	).queue().launch()