Spaces:
Runtime error
Runtime error
| from pdfminer.high_level import extract_text | |
| from docx import Document | |
| import pytesseract | |
| from PIL import Image | |
| def extract_text_from_image(file_path): | |
| image = Image.open(file_path) | |
| text = pytesseract.image_to_string(image) | |
| return text | |
| def extract_text_from_docx(file_path): | |
| doc = Document(file_path) | |
| full_text = [] | |
| for para in doc.paragraphs: | |
| full_text.append(para.text) | |
| return '\n'.join(full_text) | |
| def extract_text_from_pdf(file_path): | |
| text = extract_text(file_path) | |
| return text | |