import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline MODEL = "TheBloke/MythoMax-L2-13B-GGUF" # GGUF quantized version (lighter) tokenizer = AutoTokenizer.from_pretrained(MODEL) pipeline_chat = pipeline( "text-generation", model=MODEL, tokenizer=tokenizer, device_map="auto", max_new_tokens=512, temperature=0.9, top_p=0.95, ) def chat_fn(message, history): prompt = "" for user, bot in history: prompt += f"User: {user}\nAssistant: {bot}\n" prompt += f"User: {message}\nAssistant:" output = pipeline_chat(prompt)[0]["generated_text"] reply = output.split("Assistant:")[-1].strip() return reply demo = gr.ChatInterface(chat_fn, title="💖 MythoMax Virtual GF") demo.launch()