Spaces:
Sleeping
Sleeping
| # ---- MUST be first lines in app.py ---- | |
| import os | |
| # Stop PyTorch Dynamo/Inductor from probing/compiling C++ kernels on CPU Spaces | |
| os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1") | |
| os.environ.setdefault("TORCH_COMPILE_DISABLE", "1") | |
| # (Optional) tell build tools where compilers live if present | |
| os.environ.setdefault("CXX", "/usr/bin/g++") | |
| os.environ.setdefault("CC", "/usr/bin/gcc") | |
| # --------------------------------------- | |
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline | |
| from huggingface_hub import InferenceClient | |
| # Cache model loading to avoid re-download on every run | |
| def load_model(model_name): | |
| """ | |
| Load the specified model and tokenizer. Returns a transformers pipeline for summarization or text generation. | |
| """ | |
| if model_name == "microsoft/bitnet-b1.58-2B-4T": | |
| # Load BitNet model (causal LM) and tokenizer | |
| dtype = torch.float32 # use float32 on CPU | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype, device_map="auto") | |
| # Create a text-generation pipeline for BitNet | |
| gen_pipeline = pipeline( | |
| "text-generation", | |
| model=model, tokenizer=tokenizer, | |
| max_new_tokens=256, # default max summary length | |
| temperature=0.2, # a low temperature for more focused output | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| return gen_pipeline, tokenizer # return tokenizer as well for prompt preparation | |
| else: | |
| # For seq2seq models like T5 or BART, use summarization pipeline | |
| summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, device=-1) | |
| return summarizer, None | |
| # Set page configuration | |
| st.set_page_config(page_title="Text Summarizer", page_icon="π€") | |
| st.title("π Text Summarizer") | |
| # Model selection: local models and an option for Hugging Face API | |
| model_options = [ | |
| "t5-small", | |
| "facebook/bart-large-cnn", | |
| "microsoft/bitnet-b1.58-2B-4T", | |
| "Use Hugging Face Inference API (bart-large-cnn)" | |
| ] | |
| model_choice = st.selectbox("Choose a summarization model:", model_options, | |
| help="Select a model to use for generating the summary. 'Inference API' will call a hosted model via Hugging Face.") | |
| # Input methods: Text area and File uploader | |
| text_input = st.text_area("Enter text to summarize (English only):", height=200) | |
| uploaded_file = st.file_uploader("...or upload a text file", type=["txt"]) | |
| if uploaded_file is not None: | |
| # If a file is uploaded, read it (assuming UTF-8 text file) | |
| try: | |
| file_content = uploaded_file.read().decode("utf-8") | |
| except Exception: | |
| file_content = uploaded_file.read().decode("latin-1") # fallback decoding | |
| text_to_summarize = file_content | |
| else: | |
| text_to_summarize = text_input | |
| # Button to generate summary | |
| if st.button("Summarize"): | |
| if not text_to_summarize or text_to_summarize.strip() == "": | |
| st.warning("Please provide some text (or upload a file) to summarize.") | |
| else: | |
| st.write("Generating summary...") | |
| # Local model inference | |
| if model_choice != "Use Hugging Face Inference API (bart-large-cnn)": | |
| summarizer_pipeline, tok = load_model(model_choice) | |
| if model_choice == "microsoft/bitnet-b1.58-2B-4T": | |
| # Prepare BitNet prompt for summarization | |
| prompt = ( | |
| "Summarize the text below in 2-3 concise sentences focusing on key facts and implications.\n" | |
| f"Text:\n{ {text_to_summarize} }\nSummary:" | |
| ) | |
| # Use the text-generation pipeline to complete the prompt | |
| result = summarizer_pipeline(prompt, max_new_tokens=200, do_sample=False)[0]["generated_text"] | |
| # The pipeline returns the full prompt + completion; extract only after 'Summary:' | |
| summary = result.split("Summary:")[-1].strip() | |
| else: | |
| # For T5 or BART, use the summarization pipeline directly | |
| summary = summarizer_pipeline(text_to_summarize, max_length=150, min_length=30, do_sample=False)[0]["summary_text"] | |
| st.subheader("Summary") | |
| st.write(summary) | |
| else: | |
| # Use Hugging Face Inference API with a hosted model (bart-large-cnn) | |
| hf_token = os.getenv("HF_TOKEN") or (st.secrets["HF_TOKEN"] if "HF_TOKEN" in st.secrets else None) | |
| client = InferenceClient(model="facebook/bart-large-cnn", token=hf_token) | |
| # Call the summarization API | |
| try: | |
| result = client.summarization(text_to_summarize) | |
| # The result is an object with `summary_text` attribute (or dict with 'summary_text') | |
| summary_text = result.summary_text if hasattr(result, "summary_text") else result["summary_text"] | |
| except Exception as e: | |
| st.error(f"Error using Inference API: {e}") | |
| summary_text = None | |
| if summary_text: | |
| st.subheader("Summary") | |
| st.write(summary_text) | |