Spaces:

albertchristopher
/

text_summarizer

Sleeping

App Files Files Community

albertchristopher commited on Sep 7

Commit

6b9b532

verified ·

1 Parent(s): 0ed32d5

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +97 -78

src/streamlit_app.py CHANGED Viewed

@@ -1,78 +1,97 @@
-# streamlit_app.py
-return out
-except Exception as e:
-st.error(f"HF Inference API error: {e}")
-return None
-if run:
-if not text.strip():
-st.warning("Please paste some text to summarize.")
-st.stop()
-if engine.startswith("HF Inference API"):
-if not hf_token.strip():
-st.error("Please provide an HF_TOKEN to use the Inference API fallback.")
-st.stop()
-with st.spinner("Calling HF Inference API…"):
-summary = summarize_via_hf_api(text, hf_token)
-if summary:
-st.success("Done!")
-st.markdown("### Summary")
-st.write(summary)
-st.stop()
-info_box = st.empty()
-info_box.info("Loading BitNet model. On CPU this can take several minutes on first run; subsequent runs are cached.")
-@st.cache_resource(show_spinner=False)
-def _load():
-return load_bitnet_model()
-tok, model = _load()
-info_box.empty()
-with st.spinner("Summarizing with BitNet (map‑reduce)…"):
-summary = map_reduce_summarize(
-text=text,
-tokenizer=tok,
-model=model,
-max_chunk_tokens=chunk_tokens,
-overlap=chunk_overlap,
-chunk_max_new_tokens=chunk_max_new,
-final_max_new_tokens=final_max_new,
-temperature=temperature,
-top_p=top_p,
-)
-st.success("Done!")
-st.markdown("### Summary")
-st.write(summary)
-with st.expander("Debug / details"):
-st.markdown(
-"- **Engine:** BitNet (local)
-"
-f"- **chunk size:** {chunk_tokens} tokens, **overlap:** {chunk_overlap} tokens
-"
-f"- **temperature:** {temperature}, **top_p:** {top_p}
-"
-f"- **chunk max_new_tokens:** {chunk_max_new}, **final max_new_tokens:** {final_max_new}"
-)
-st.markdown("---")
-st.caption(
-"Built with Docker + Streamlit + Transformers + Hugging Face Hub. Model: microsoft/bitnet-b1.58-2B-4T.
-"
-"Tip: Select a GPU in Space settings for faster startup."
-)

+import os
+import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
+from huggingface_hub import InferenceClient
+# Cache model loading to avoid re-download on every run
+@st.cache_resource
+def load_model(model_name):
+    """
+    Load the specified model and tokenizer. Returns a transformers pipeline for summarization or text generation.
+    """
+    if model_name == "microsoft/bitnet-b1.58-2B-4T":
+        # Load BitNet model (causal LM) and tokenizer
+        dtype = torch.float32  # use float32 on CPU
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype, device_map="auto")
+        # Create a text-generation pipeline for BitNet
+        gen_pipeline = pipeline(
+            "text-generation",
+            model=model, tokenizer=tokenizer,
+            max_new_tokens=256,  # default max summary length
+            temperature=0.2,     # a low temperature for more focused output
+            pad_token_id=tokenizer.eos_token_id
+        )
+        return gen_pipeline, tokenizer  # return tokenizer as well for prompt preparation
+    else:
+        # For seq2seq models like T5 or BART, use summarization pipeline
+        summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, device=-1)
+        return summarizer, None
+# Set page configuration
+st.set_page_config(page_title="Text Summarizer", page_icon="🤖")
+st.title("📃 Text Summarizer")
+# Model selection: local models and an option for Hugging Face API
+model_options = [
+    "t5-small",
+    "facebook/bart-large-cnn",
+    "microsoft/bitnet-b1.58-2B-4T",
+    "Use Hugging Face Inference API (bart-large-cnn)"
+]
+model_choice = st.selectbox("Choose a summarization model:", model_options,
+                            help="Select a model to use for generating the summary. 'Inference API' will call a hosted model via Hugging Face.")
+# Input methods: Text area and File uploader
+text_input = st.text_area("Enter text to summarize (English only):", height=200)
+uploaded_file = st.file_uploader("...or upload a text file", type=["txt"])
+if uploaded_file is not None:
+    # If a file is uploaded, read it (assuming UTF-8 text file)
+    try:
+        file_content = uploaded_file.read().decode("utf-8")
+    except Exception:
+        file_content = uploaded_file.read().decode("latin-1")  # fallback decoding
+    text_to_summarize = file_content
+else:
+    text_to_summarize = text_input
+# Button to generate summary
+if st.button("Summarize"):
+    if not text_to_summarize or text_to_summarize.strip() == "":
+        st.warning("Please provide some text (or upload a file) to summarize.")
+    else:
+        st.write("Generating summary...")
+        # Local model inference
+        if model_choice != "Use Hugging Face Inference API (bart-large-cnn)":
+            summarizer_pipeline, tok = load_model(model_choice)
+            if model_choice == "microsoft/bitnet-b1.58-2B-4T":
+                # Prepare BitNet prompt for summarization
+                prompt = (
+                    "Summarize the text below in 2-3 concise sentences focusing on key facts and implications.\n"
+                    f"Text:\n{ {text_to_summarize} }\nSummary:"
+                )
+                # Use the text-generation pipeline to complete the prompt
+                result = summarizer_pipeline(prompt, max_new_tokens=200, do_sample=False)[0]["generated_text"]
+                # The pipeline returns the full prompt + completion; extract only after 'Summary:'
+                summary = result.split("Summary:")[-1].strip()
+            else:
+                # For T5 or BART, use the summarization pipeline directly
+                summary = summarizer_pipeline(text_to_summarize, max_length=150, min_length=30, do_sample=False)[0]["summary_text"]
+            st.subheader("Summary")
+            st.write(summary)
+        else:
+            # Use Hugging Face Inference API with a hosted model (bart-large-cnn)
+            hf_token = os.getenv("HF_TOKEN") or (st.secrets["HF_TOKEN"] if "HF_TOKEN" in st.secrets else None)
+            client = InferenceClient(model="facebook/bart-large-cnn", token=hf_token)
+            # Call the summarization API
+            try:
+                result = client.summarization(text_to_summarize)
+                # The result is an object with `summary_text` attribute (or dict with 'summary_text')
+                summary_text = result.summary_text if hasattr(result, "summary_text") else result["summary_text"]
+            except Exception as e:
+                st.error(f"Error using Inference API: {e}")
+                summary_text = None
+            if summary_text:
+                st.subheader("Summary")
+                st.write(summary_text)