Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import os | |
| import fitz | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") | |
| def extract_text_from_pdf(pdf_file_path): | |
| doc = fitz.open(pdf_file_path) | |
| text = "" | |
| for page in doc: | |
| text+=page.get_text() | |
| return text | |
| def generate_question_answer_pairs(pdf_file): | |
| if pdf_file is None: | |
| return "Please upload a PDF file" | |
| d = {'Question':[],'Answer':[]} | |
| df = pd.DataFrame(data=d) | |
| pdf_text = extract_text_from_pdf(pdf_file.name) | |
| sentences = re.split(r'(?<=[.!?])', pdf_text) | |
| question_answer_pairs = [] | |
| for sentence in sentences: | |
| input_ids = tokenizer.encode(sentence, return_tensors="pt") | |
| outputs = model.generate(input_ids, max_length=100, num_return_sequences=1) | |
| question_answer = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| question_answer_pairs.append(question_answer) | |
| result = '' | |
| for question_answer in question_answer_pairs: | |
| qa_parts = question_answer.split("?") | |
| if len(qa_parts) >= 2: | |
| question_part = qa_parts[0] + "?" | |
| answer_part = qa_parts[1].strip() | |
| new_data = {'Question': [question_part], 'Answer': [answer_part]} | |
| df = pd.concat([df, pd.DataFrame(new_data)], ignore_index=True) | |
| result += f"Question: {question_part}\nAnswer: {answer_part}\n\n" | |
| df.to_csv("QAPairs.csv") | |
| return result, "QAPairs.csv" | |
| title = "Question-Answer Pairs Generation" | |
| input_file = gr.File(label="Upload a PDF file") | |
| output_file = gr.File(label="Download as csv") | |
| output_text = gr.Textbox() | |
| interface = gr.Interface( | |
| fn=generate_question_answer_pairs, | |
| inputs=input_file, | |
| outputs=[output_text, output_file], | |
| title=title, | |
| ) | |
| interface.launch() |