Spaces:
Sleeping
Sleeping
File size: 5,244 Bytes
0c3f3b2 6b9b532 0c3f3b2 6b9b532 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# ---- MUST be first lines in app.py ----
import os
# Stop PyTorch Dynamo/Inductor from probing/compiling C++ kernels on CPU Spaces
os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")
# (Optional) tell build tools where compilers live if present
os.environ.setdefault("CXX", "/usr/bin/g++")
os.environ.setdefault("CC", "/usr/bin/gcc")
# ---------------------------------------
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
from huggingface_hub import InferenceClient
# Cache model loading to avoid re-download on every run
@st.cache_resource
def load_model(model_name):
"""
Load the specified model and tokenizer. Returns a transformers pipeline for summarization or text generation.
"""
if model_name == "microsoft/bitnet-b1.58-2B-4T":
# Load BitNet model (causal LM) and tokenizer
dtype = torch.float32 # use float32 on CPU
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype, device_map="auto")
# Create a text-generation pipeline for BitNet
gen_pipeline = pipeline(
"text-generation",
model=model, tokenizer=tokenizer,
max_new_tokens=256, # default max summary length
temperature=0.2, # a low temperature for more focused output
pad_token_id=tokenizer.eos_token_id
)
return gen_pipeline, tokenizer # return tokenizer as well for prompt preparation
else:
# For seq2seq models like T5 or BART, use summarization pipeline
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, device=-1)
return summarizer, None
# Set page configuration
st.set_page_config(page_title="Text Summarizer", page_icon="π€")
st.title("π Text Summarizer")
# Model selection: local models and an option for Hugging Face API
model_options = [
"t5-small",
"facebook/bart-large-cnn",
"microsoft/bitnet-b1.58-2B-4T",
"Use Hugging Face Inference API (bart-large-cnn)"
]
model_choice = st.selectbox("Choose a summarization model:", model_options,
help="Select a model to use for generating the summary. 'Inference API' will call a hosted model via Hugging Face.")
# Input methods: Text area and File uploader
text_input = st.text_area("Enter text to summarize (English only):", height=200)
uploaded_file = st.file_uploader("...or upload a text file", type=["txt"])
if uploaded_file is not None:
# If a file is uploaded, read it (assuming UTF-8 text file)
try:
file_content = uploaded_file.read().decode("utf-8")
except Exception:
file_content = uploaded_file.read().decode("latin-1") # fallback decoding
text_to_summarize = file_content
else:
text_to_summarize = text_input
# Button to generate summary
if st.button("Summarize"):
if not text_to_summarize or text_to_summarize.strip() == "":
st.warning("Please provide some text (or upload a file) to summarize.")
else:
st.write("Generating summary...")
# Local model inference
if model_choice != "Use Hugging Face Inference API (bart-large-cnn)":
summarizer_pipeline, tok = load_model(model_choice)
if model_choice == "microsoft/bitnet-b1.58-2B-4T":
# Prepare BitNet prompt for summarization
prompt = (
"Summarize the text below in 2-3 concise sentences focusing on key facts and implications.\n"
f"Text:\n{ {text_to_summarize} }\nSummary:"
)
# Use the text-generation pipeline to complete the prompt
result = summarizer_pipeline(prompt, max_new_tokens=200, do_sample=False)[0]["generated_text"]
# The pipeline returns the full prompt + completion; extract only after 'Summary:'
summary = result.split("Summary:")[-1].strip()
else:
# For T5 or BART, use the summarization pipeline directly
summary = summarizer_pipeline(text_to_summarize, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
st.subheader("Summary")
st.write(summary)
else:
# Use Hugging Face Inference API with a hosted model (bart-large-cnn)
hf_token = os.getenv("HF_TOKEN") or (st.secrets["HF_TOKEN"] if "HF_TOKEN" in st.secrets else None)
client = InferenceClient(model="facebook/bart-large-cnn", token=hf_token)
# Call the summarization API
try:
result = client.summarization(text_to_summarize)
# The result is an object with `summary_text` attribute (or dict with 'summary_text')
summary_text = result.summary_text if hasattr(result, "summary_text") else result["summary_text"]
except Exception as e:
st.error(f"Error using Inference API: {e}")
summary_text = None
if summary_text:
st.subheader("Summary")
st.write(summary_text)
|