Spaces:

albertchristopher
/

text_summarizer

Sleeping

App Files Files Community

albertchristopher commited on Sep 7

Commit

e146d8f

verified ·

1 Parent(s): d51faa2

Update src/utils.py

Browse files

Files changed (1) hide show

src/utils.py +65 -132

src/utils.py CHANGED Viewed

@@ -1,148 +1,81 @@
 # utils.py
-from typing import List, Optional
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-DEFAULT_MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"
-SYSTEM_PROMPT = (
-    "You are an expert writing assistant. Summarize the user's text clearly and faithfully. "
-    "Write 2-4 concise sentences capturing the main points, avoiding speculation and numbers not found in the text."
-)
-CHUNK_PROMPT = (
-    "Summarize the following passage in 1-3 sentences, preserving key facts and names.\n\n"
-    "PASSAGE:\n{chunk}\n\nSUMMARY:"
-)
-REDUCE_PROMPT = (
-    "You are merging partial summaries of a longer document. Combine them into one cohesive summary "
-    "of 3-6 sentences covering the overall thrust of the original text, with no contradictions or hallucinations.\n\n"
-    "PARTIAL SUMMARIES:\n{partials}\n\nFINAL SUMMARY:"
 )
-def device_and_dtype():
-    """Select an appropriate device and dtype based on availability."""
-    if torch.cuda.is_available():
-        return "auto", torch.bfloat16
-    # CPU fallback
-    return None, torch.float32
-def load_bitnet_model(model_id: str = DEFAULT_MODEL_ID):
-    """Load tokenizer and model with reasonable defaults for BitNet."""
-    device_map, torch_dtype = device_and_dtype()
-    tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
-    # Ensure pad token exists
-    if tok.pad_token is None:
-        tok.pad_token = tok.eos_token
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch_dtype,
-        device_map=device_map,
-    )
-    return tok, model
-def chunk_by_tokens(text: str, tokenizer: AutoTokenizer, max_tokens: int = 900, overlap: int = 60) -> List[str]:
-    """Greedy token chunking with overlap to preserve context for long docs."""
-    ids = tokenizer.encode(text, add_special_tokens=False)
-    chunks = []
-    i = 0
-    while i < len(ids):
-        j = min(i + max_tokens, len(ids))
-        chunk_ids = ids[i:j]
-        chunks.append(tokenizer.decode(chunk_ids))
-        if j == len(ids):
-            break
-        i = j - overlap  # step with overlap
-        if i < 0:
-            i = 0
-    return chunks
-def generate_summary(
-    tokenizer,
-    model,
-    prompt: str,
-    max_new_tokens: int = 192,
-    temperature: float = 0.3,
-    top_p: float = 0.95,
-    repetition_penalty: float = 1.05,
-    ) -> str:
-    """Generic text generation helper for causal LMs."""
-    inputs = tokenizer(
-        prompt,
-        return_tensors="pt",
-        padding=True,
-        truncation=True,
-    )
-    if torch.cuda.is_available():
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
-    gen_ids = model.generate(
-        **inputs,
-        do_sample=(temperature > 0.0),
-        temperature=temperature,
-        top_p=top_p,
-        max_new_tokens=max_new_tokens,
-        repetition_penalty=repetition_penalty,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.pad_token_id,
-    )
-    out = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
-    # Return only the completion after the prompt if possible
-    if out.startswith(prompt):
-        out = out[len(prompt):]
-    return out.strip()
-def map_reduce_summarize(
-    text: str,
-    tokenizer,
-    model,
-    max_chunk_tokens: int = 900,
-    overlap: int = 60,
-    chunk_max_new_tokens: int = 128,
-    final_max_new_tokens: int = 220,
-    temperature: float = 0.2,
-    top_p: float = 0.9,
-) -> str:
-    """Summarize long text by chunking -> summarizing -> reducing."""
-    chunks = chunk_by_tokens(text, tokenizer, max_tokens=max_chunk_tokens, overlap=overlap)
-    # Short texts: single pass
-    if len(chunks) == 1:
-        prompt = f"{SYSTEM_PROMPT}\n\n{CHUNK_PROMPT.format(chunk=chunks[0])}"
-        return generate_summary(tokenizer, model, prompt, max_new_tokens=final_max_new_tokens,
-                                temperature=temperature, top_p=top_p)
-    partials: List[str] = []
-    for ck in chunks:
-        p = f"{SYSTEM_PROMPT}\n\n{CHUNK_PROMPT.format(chunk=ck)}"
-        s = generate_summary(
-            tokenizer,
-            model,
-            p,
-            max_new_tokens=chunk_max_new_tokens,
-            temperature=temperature,
-            top_p=top_p,
-        )
-        partials.append(s)
-    merged = "\n- ".join(partials)
-    reduce_prompt = f"{SYSTEM_PROMPT}\n\n{REDUCE_PROMPT.format(partials='- ' + merged)}"
-    final = generate_summary(
-        tokenizer,
-        model,
-        reduce_prompt,
-        max_new_tokens=final_max_new_tokens,
-        temperature=max(0.1, temperature - 0.1),
-        top_p=top_p,
-    )
-    return final.strip()

 # utils.py
+prompt: str,
+max_new_tokens: int = 192,
+temperature: float = 0.3,
+top_p: float = 0.95,
+repetition_penalty: float = 1.05,
+) -> str:
+inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
+if torch.cuda.is_available():
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+gen_ids = model.generate(
+**inputs,
+do_sample=(temperature > 0.0),
+temperature=temperature,
+top_p=top_p,
+max_new_tokens=max_new_tokens,
+repetition_penalty=repetition_penalty,
+eos_token_id=tokenizer.eos_token_id,
+pad_token_id=tokenizer.pad_token_id,
 )
+out = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
+if out.startswith(prompt):
+out = out[len(prompt):]
+return out.strip()
+def map_reduce_summarize(
+text: str,
+tokenizer,
+model,
+max_chunk_tokens: int = 900,
+overlap: int = 60,
+chunk_max_new_tokens: int = 128,
+final_max_new_tokens: int = 220,
+temperature: float = 0.2,
+top_p: float = 0.9,
+) -> str:
+chunks = chunk_by_tokens(text, tokenizer, max_tokens=max_chunk_tokens, overlap=overlap)
+if len(chunks) == 1:
+prompt = f"{SYSTEM_PROMPT}
+{CHUNK_PROMPT.format(chunk=chunks[0])}"
+return generate_summary(tokenizer, model, prompt, max_new_tokens=final_max_new_tokens,
+temperature=temperature, top_p=top_p)
+partials: List[str] = []
+for ck in chunks:
+p = f"{SYSTEM_PROMPT}
+{CHUNK_PROMPT.format(chunk=ck)}"
+s = generate_summary(
+tokenizer,
+model,
+p,
+max_new_tokens=chunk_max_new_tokens,
+temperature=temperature,
+top_p=top_p,
+)
+partials.append(s)
+merged = "
+- ".join(partials)
+reduce_prompt = f"{SYSTEM_PROMPT}
+{REDUCE_PROMPT.format(partials='- ' + merged)}"
+final = generate_summary(
+tokenizer,
+model,
+reduce_prompt,
+max_new_tokens=final_max_new_tokens,
+temperature=max(0.1, temperature - 0.1),
+top_p=top_p,
+)
+return final.strip()