Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader | |
| from langchain_community.document_loaders import UnstructuredPowerPointLoader | |
| from langchain_cohere.llms import Cohere | |
| from langchain.chains.summarize import load_summarize_chain | |
| from pathlib import Path | |
| def summarize_file(method, files): | |
| file = files[0] | |
| # Initialize the LLM | |
| llm = Cohere(temperature=0) | |
| ext = Path(file).suffix.lower() | |
| if ext == '.pdf': | |
| loader = PyPDFLoader(file) | |
| elif ext == '.docx': | |
| loader = Docx2txtLoader(file) | |
| elif ext == '.pptx': | |
| loader = UnstructuredPowerPointLoader(file) | |
| else: | |
| raise ValueError(f"Unsupported file extension: {ext}") | |
| docs = loader.load_and_split() | |
| # Initialize a summarization chain with the specified method | |
| summarization_chain = load_summarize_chain(llm=llm, chain_type=method) | |
| summary = summarization_chain.run(docs) | |
| return summary | |
| # def summarize_files(method, files): | |
| # # Initialize the LLM | |
| # llm = Cohere(temperature=0) | |
| # summaries = [] | |
| # # Load and read each file | |
| # for file in files: | |
| # ext = Path(file).suffix.lower() | |
| # if ext == '.pdf': | |
| # loader = PyPDFLoader(file) | |
| # elif ext == '.docx': | |
| # loader = Docx2txtLoader(file) | |
| # elif ext == '.pptx': | |
| # loader = UnstructuredPowerPointLoader(file) | |
| # else: | |
| # raise ValueError(f"Unsupported file extension: {ext}") | |
| # docs = loader.load_and_split() | |
| # # Initialize a summarization chain with the specified method | |
| # summarization_chain = load_summarize_chain(llm=llm, chain_type=method) | |
| # summary = summarization_chain.run(docs) | |
| # summaries.append(summary) | |
| # return summaries | |