Spaces:

albertchristopher
/

text_summarizer

Sleeping

App Files Files Community

albertchristopher commited on Sep 7

Commit

414c30b

verified ·

1 Parent(s): e146d8f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +67 -147

src/streamlit_app.py CHANGED Viewed

@@ -1,158 +1,78 @@
-import os
-os.environ.setdefault("STREAMLIT_BROWSER_GATHERUSAGESTATS", "false")
-os.environ.setdefault("XDG_CACHE_HOME", "/data/.cache")
-os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
-os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/data/.cache/huggingface")
-os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
-# Optional: be explicit about HOME to prevent '/.streamlit'
-os.environ.setdefault("HOME", "/home/user")
-import textwrap
-import streamlit as st
-from typing import Optional
-from utils import (
-    load_bitnet_model,
-    map_reduce_summarize,
-)
-# ---------- Page Config ----------
-st.set_page_config(page_title="BitNet Summarizer", page_icon="📝", layout="wide")
-st.title("📝 Text Summarizer — BitNet on Hugging Face Spaces")
-st.caption(
-    "Open-source summarizer powered by **microsoft/bitnet-b1.58-2B-4T** with a map‑reduce strategy for long documents."
-)
-# ---------- Sidebar Controls ----------
-with st.sidebar:
-    st.header("Engine")
-    engine = st.radio(
-        "Choose inference engine:",
-        options=["BitNet (local)", "HF Inference API (fallback)"],
-        index=0,
-        help="Local BitNet loads inside your Space. Fallback uses a hosted summarization model via HF Inference API.",
-    )
-    st.header("Generation Settings")
-    temperature = st.slider("temperature", 0.0, 1.5, 0.3, 0.05)
-    top_p = st.slider("top_p", 0.5, 1.0, 0.95, 0.01)
-    chunk_tokens = st.slider("chunk size (tokens)", 400, 1600, 900, 50)
-    chunk_overlap = st.slider("overlap (tokens)", 0, 200, 60, 5)
-    chunk_max_new = st.slider("chunk max_new_tokens", 32, 256, 128, 8)
-    final_max_new = st.slider("final max_new_tokens", 64, 512, 220, 8)
-    st.markdown("---")
-    st.subheader("HF Inference API Settings")
-    hf_token = st.text_input(
-        "HF_TOKEN (optional)",
-        type="password",
-        help="Personal access token with Inference API scope if you want to use the fallback engine.",
-        value=os.environ.get("HF_TOKEN", ""),
-    )
-# ---------- Input Area ----------
-DEFAULT_TEXT = (
-    "The Hugging Face Spaces platform makes it simple to build and share machine learning apps. "
-    "This example demonstrates a map‑reduce summarization approach using an efficient BitNet model. "
-    "For longer documents, we split text into token chunks, summarize each piece, and merge the summaries "
-    "into a coherent final summary."
 )
-text = st.text_area(
-    "Paste your text here:",
-    value=DEFAULT_TEXT,
-    height=260,
-    help="Works with long documents via chunking. You can also try the sample text to see the pipeline.",
 )
-colA, colB = st.columns([1, 2])
-with colA:
-    run = st.button("Summarize", type="primary")
-with colB:
-    st.write("")
-# ---------- Inference API Fallback ----------
-# Lightweight helper using huggingface_hub's InferenceClient
-from huggingface_hub import InferenceClient
-def summarize_via_hf_api(text: str, token: str) -> Optional[str]:
-    try:
-        client = InferenceClient(token=token)
-        # A small, instruction‑tuned summarizer works well as fallback
-        # DistilBART CNN is common; switch to any hosted summarization model you prefer
-        model = "sshleifer/distilbart-cnn-12-6"
-        out = client.text_generation(
-            model=model,
-            prompt=(
-                "Summarize the following text in 3-6 concise sentences, preserving key facts and avoiding hallucinations.\n\n" + text
-            ),
-            max_new_tokens=220,
-            temperature=0.3,
-            top_p=0.95,
-        )
-        return out
-    except Exception as e:
-        st.error(f"HF Inference API error: {e}")
-        return None
-# ---------- Main Action ----------
-if run:
-    if not text.strip():
-        st.warning("Please paste some text to summarize.")
-        st.stop()
-    if engine.startswith("HF Inference API"):
-        if not hf_token.strip():
-            st.error("Please provide an HF_TOKEN to use the Inference API fallback.")
-            st.stop()
-        with st.spinner("Calling HF Inference API…"):
-            summary = summarize_via_hf_api(text, hf_token)
-        if summary:
-            st.success("Done!")
-            st.markdown("### Summary")
-            st.write(summary)
-        st.stop()
-    # Local BitNet path
-    info_box = st.empty()
-    info_box.info(
-        "Loading BitNet model. On CPU this can take several minutes on first run; subsequent runs are cached."
-    )
-    @st.cache_resource(show_spinner=False)
-    def _load():
-        return load_bitnet_model()
-    tok, model = _load()
-    info_box.empty()
-    with st.spinner("Summarizing with BitNet (map‑reduce)…"):
-        summary = map_reduce_summarize(
-            text=text,
-            tokenizer=tok,
-            model=model,
-            max_chunk_tokens=chunk_tokens,
-            overlap=chunk_overlap,
-            chunk_max_new_tokens=chunk_max_new,
-            final_max_new_tokens=final_max_new,
-            temperature=temperature,
-            top_p=top_p,
-        )
-    st.success("Done!")
-    st.markdown("### Summary")
-    st.write(summary)
-    with st.expander("Debug / details"):
-        st.markdown(
-            "- **Engine:** BitNet (local)  \n"
-            f"- **chunk size:** {chunk_tokens} tokens, **overlap:** {chunk_overlap} tokens  \n"
-            f"- **temperature:** {temperature}, **top_p:** {top_p}  \n"
-            f"- **chunk max_new_tokens:** {chunk_max_new}, **final max_new_tokens:** {final_max_new}"
-        )
 st.markdown("---")
 st.caption(
-    "Built with Streamlit + Transformers + Hugging Face Hub. Model: microsoft/bitnet-b1.58-2B-4T.\n"
-    "Tip: Select a GPU in Space settings for faster startup."
 )

+# streamlit_app.py
+return out
+except Exception as e:
+st.error(f"HF Inference API error: {e}")
+return None
+if run:
+if not text.strip():
+st.warning("Please paste some text to summarize.")
+st.stop()
+if engine.startswith("HF Inference API"):
+if not hf_token.strip():
+st.error("Please provide an HF_TOKEN to use the Inference API fallback.")
+st.stop()
+with st.spinner("Calling HF Inference API…"):
+summary = summarize_via_hf_api(text, hf_token)
+if summary:
+st.success("Done!")
+st.markdown("### Summary")
+st.write(summary)
+st.stop()
+info_box = st.empty()
+info_box.info("Loading BitNet model. On CPU this can take several minutes on first run; subsequent runs are cached.")
+@st.cache_resource(show_spinner=False)
+def _load():
+return load_bitnet_model()
+tok, model = _load()
+info_box.empty()
+with st.spinner("Summarizing with BitNet (map‑reduce)…"):
+summary = map_reduce_summarize(
+text=text,
+tokenizer=tok,
+model=model,
+max_chunk_tokens=chunk_tokens,
+overlap=chunk_overlap,
+chunk_max_new_tokens=chunk_max_new,
+final_max_new_tokens=final_max_new,
+temperature=temperature,
+top_p=top_p,
 )
+st.success("Done!")
+st.markdown("### Summary")
+st.write(summary)
+with st.expander("Debug / details"):
+st.markdown(
+"- **Engine:** BitNet (local)
+"
+f"- **chunk size:** {chunk_tokens} tokens, **overlap:** {chunk_overlap} tokens
+"
+f"- **temperature:** {temperature}, **top_p:** {top_p}
+"
+f"- **chunk max_new_tokens:** {chunk_max_new}, **final max_new_tokens:** {final_max_new}"
 )
 st.markdown("---")
 st.caption(
+"Built with Docker + Streamlit + Transformers + Hugging Face Hub. Model: microsoft/bitnet-b1.58-2B-4T.
+"
+"Tip: Select a GPU in Space settings for faster startup."
 )