fahadkhan93's picture
Update app.py
38840d9 verified
import gradio as gr
from datasets import load_dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Load dataset
ds = load_dataset("STEM-AI-mtl/Electrical-engineering", split="train")
# Combine input (question) and output (answer) for vectorization
docs = [inp + " " + out for inp, out in zip(ds["input"], ds["output"])]
vectorizer = TfidfVectorizer().fit(docs)
tfidf_matrix = vectorizer.transform(docs)
# Retrieval function
def retrieve_answer(user_q, top_k=1):
vec = vectorizer.transform([user_q])
sims = cosine_similarity(vec, tfidf_matrix).flatten()
idxs = np.argsort(-sims)[:top_k]
return "\n\n".join([f"**Q:** {ds['input'][i]}\n**A:** {ds['output'][i]}" for i in idxs])
# Gradio app
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Electronics Engineering Q&A Chatbot")
gr.Markdown("Ask any electronics-related question and get an AI-assisted answer based on a curated dataset.")
with gr.Row():
user_q = gr.Textbox(label="Your Question", lines=2, placeholder="e.g. What is the purpose of a Zener diode?")
answer_box = gr.Markdown(label="Answer")
submit_btn = gr.Button("Get Answer")
submit_btn.click(fn=retrieve_answer, inputs=user_q, outputs=answer_box)
if __name__ == "__main__":
demo.launch()