albertchristopher commited on
Commit
414c30b
·
verified ·
1 Parent(s): e146d8f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +67 -147
src/streamlit_app.py CHANGED
@@ -1,158 +1,78 @@
1
- import os
2
- os.environ.setdefault("STREAMLIT_BROWSER_GATHERUSAGESTATS", "false")
3
- os.environ.setdefault("XDG_CACHE_HOME", "/data/.cache")
4
- os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
5
- os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/data/.cache/huggingface")
6
- os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
7
- # Optional: be explicit about HOME to prevent '/.streamlit'
8
- os.environ.setdefault("HOME", "/home/user")
9
-
10
- import textwrap
11
- import streamlit as st
12
- from typing import Optional
13
- from utils import (
14
- load_bitnet_model,
15
- map_reduce_summarize,
16
- )
17
 
18
- # ---------- Page Config ----------
19
- st.set_page_config(page_title="BitNet Summarizer", page_icon="📝", layout="wide")
20
 
21
- st.title("📝 Text Summarizer — BitNet on Hugging Face Spaces")
22
- st.caption(
23
- "Open-source summarizer powered by **microsoft/bitnet-b1.58-2B-4T** with a map‑reduce strategy for long documents."
24
- )
25
 
26
- # ---------- Sidebar Controls ----------
27
- with st.sidebar:
28
- st.header("Engine")
29
- engine = st.radio(
30
- "Choose inference engine:",
31
- options=["BitNet (local)", "HF Inference API (fallback)"],
32
- index=0,
33
- help="Local BitNet loads inside your Space. Fallback uses a hosted summarization model via HF Inference API.",
34
- )
35
-
36
- st.header("Generation Settings")
37
- temperature = st.slider("temperature", 0.0, 1.5, 0.3, 0.05)
38
- top_p = st.slider("top_p", 0.5, 1.0, 0.95, 0.01)
39
- chunk_tokens = st.slider("chunk size (tokens)", 400, 1600, 900, 50)
40
- chunk_overlap = st.slider("overlap (tokens)", 0, 200, 60, 5)
41
- chunk_max_new = st.slider("chunk max_new_tokens", 32, 256, 128, 8)
42
- final_max_new = st.slider("final max_new_tokens", 64, 512, 220, 8)
43
-
44
- st.markdown("---")
45
- st.subheader("HF Inference API Settings")
46
- hf_token = st.text_input(
47
- "HF_TOKEN (optional)",
48
- type="password",
49
- help="Personal access token with Inference API scope if you want to use the fallback engine.",
50
- value=os.environ.get("HF_TOKEN", ""),
51
- )
52
-
53
- # ---------- Input Area ----------
54
- DEFAULT_TEXT = (
55
- "The Hugging Face Spaces platform makes it simple to build and share machine learning apps. "
56
- "This example demonstrates a map‑reduce summarization approach using an efficient BitNet model. "
57
- "For longer documents, we split text into token chunks, summarize each piece, and merge the summaries "
58
- "into a coherent final summary."
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
 
61
- text = st.text_area(
62
- "Paste your text here:",
63
- value=DEFAULT_TEXT,
64
- height=260,
65
- help="Works with long documents via chunking. You can also try the sample text to see the pipeline.",
 
 
 
 
 
 
 
 
 
 
66
  )
67
 
68
- colA, colB = st.columns([1, 2])
69
- with colA:
70
- run = st.button("Summarize", type="primary")
71
- with colB:
72
- st.write("")
73
-
74
- # ---------- Inference API Fallback ----------
75
- # Lightweight helper using huggingface_hub's InferenceClient
76
- from huggingface_hub import InferenceClient
77
-
78
- def summarize_via_hf_api(text: str, token: str) -> Optional[str]:
79
- try:
80
- client = InferenceClient(token=token)
81
- # A small, instruction‑tuned summarizer works well as fallback
82
- # DistilBART CNN is common; switch to any hosted summarization model you prefer
83
- model = "sshleifer/distilbart-cnn-12-6"
84
- out = client.text_generation(
85
- model=model,
86
- prompt=(
87
- "Summarize the following text in 3-6 concise sentences, preserving key facts and avoiding hallucinations.\n\n" + text
88
- ),
89
- max_new_tokens=220,
90
- temperature=0.3,
91
- top_p=0.95,
92
- )
93
- return out
94
- except Exception as e:
95
- st.error(f"HF Inference API error: {e}")
96
- return None
97
-
98
- # ---------- Main Action ----------
99
- if run:
100
- if not text.strip():
101
- st.warning("Please paste some text to summarize.")
102
- st.stop()
103
-
104
- if engine.startswith("HF Inference API"):
105
- if not hf_token.strip():
106
- st.error("Please provide an HF_TOKEN to use the Inference API fallback.")
107
- st.stop()
108
- with st.spinner("Calling HF Inference API…"):
109
- summary = summarize_via_hf_api(text, hf_token)
110
- if summary:
111
- st.success("Done!")
112
- st.markdown("### Summary")
113
- st.write(summary)
114
- st.stop()
115
-
116
- # Local BitNet path
117
- info_box = st.empty()
118
- info_box.info(
119
- "Loading BitNet model. On CPU this can take several minutes on first run; subsequent runs are cached."
120
- )
121
-
122
- @st.cache_resource(show_spinner=False)
123
- def _load():
124
- return load_bitnet_model()
125
-
126
- tok, model = _load()
127
- info_box.empty()
128
-
129
- with st.spinner("Summarizing with BitNet (map‑reduce)…"):
130
- summary = map_reduce_summarize(
131
- text=text,
132
- tokenizer=tok,
133
- model=model,
134
- max_chunk_tokens=chunk_tokens,
135
- overlap=chunk_overlap,
136
- chunk_max_new_tokens=chunk_max_new,
137
- final_max_new_tokens=final_max_new,
138
- temperature=temperature,
139
- top_p=top_p,
140
- )
141
-
142
- st.success("Done!")
143
- st.markdown("### Summary")
144
- st.write(summary)
145
-
146
- with st.expander("Debug / details"):
147
- st.markdown(
148
- "- **Engine:** BitNet (local) \n"
149
- f"- **chunk size:** {chunk_tokens} tokens, **overlap:** {chunk_overlap} tokens \n"
150
- f"- **temperature:** {temperature}, **top_p:** {top_p} \n"
151
- f"- **chunk max_new_tokens:** {chunk_max_new}, **final max_new_tokens:** {final_max_new}"
152
- )
153
 
154
  st.markdown("---")
155
  st.caption(
156
- "Built with Streamlit + Transformers + Hugging Face Hub. Model: microsoft/bitnet-b1.58-2B-4T.\n"
157
- "Tip: Select a GPU in Space settings for faster startup."
 
158
  )
 
1
+ # streamlit_app.py
2
+ return out
3
+ except Exception as e:
4
+ st.error(f"HF Inference API error: {e}")
5
+ return None
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
7
 
 
 
 
 
8
 
9
+
10
+ if run:
11
+ if not text.strip():
12
+ st.warning("Please paste some text to summarize.")
13
+ st.stop()
14
+
15
+
16
+ if engine.startswith("HF Inference API"):
17
+ if not hf_token.strip():
18
+ st.error("Please provide an HF_TOKEN to use the Inference API fallback.")
19
+ st.stop()
20
+ with st.spinner("Calling HF Inference API…"):
21
+ summary = summarize_via_hf_api(text, hf_token)
22
+ if summary:
23
+ st.success("Done!")
24
+ st.markdown("### Summary")
25
+ st.write(summary)
26
+ st.stop()
27
+
28
+
29
+ info_box = st.empty()
30
+ info_box.info("Loading BitNet model. On CPU this can take several minutes on first run; subsequent runs are cached.")
31
+
32
+
33
+ @st.cache_resource(show_spinner=False)
34
+ def _load():
35
+ return load_bitnet_model()
36
+
37
+
38
+ tok, model = _load()
39
+ info_box.empty()
40
+
41
+
42
+ with st.spinner("Summarizing with BitNet (map‑reduce)…"):
43
+ summary = map_reduce_summarize(
44
+ text=text,
45
+ tokenizer=tok,
46
+ model=model,
47
+ max_chunk_tokens=chunk_tokens,
48
+ overlap=chunk_overlap,
49
+ chunk_max_new_tokens=chunk_max_new,
50
+ final_max_new_tokens=final_max_new,
51
+ temperature=temperature,
52
+ top_p=top_p,
53
  )
54
 
55
+
56
+ st.success("Done!")
57
+ st.markdown("### Summary")
58
+ st.write(summary)
59
+
60
+
61
+ with st.expander("Debug / details"):
62
+ st.markdown(
63
+ "- **Engine:** BitNet (local)
64
+ "
65
+ f"- **chunk size:** {chunk_tokens} tokens, **overlap:** {chunk_overlap} tokens
66
+ "
67
+ f"- **temperature:** {temperature}, **top_p:** {top_p}
68
+ "
69
+ f"- **chunk max_new_tokens:** {chunk_max_new}, **final max_new_tokens:** {final_max_new}"
70
  )
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  st.markdown("---")
74
  st.caption(
75
+ "Built with Docker + Streamlit + Transformers + Hugging Face Hub. Model: microsoft/bitnet-b1.58-2B-4T.
76
+ "
77
+ "Tip: Select a GPU in Space settings for faster startup."
78
  )