Spaces:

Gilvaa
/

lovass

Sleeping

App Files Files Community

Gilvaa commited on Aug 28

Commit

17a1603

verified ·

1 Parent(s): 85700de

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -21

app.py CHANGED Viewed

@@ -16,6 +16,17 @@ TOP_P = float(os.getenv("TOP_P", "0.9"))
 REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.08"))
 SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0"  # 1=开启基础过滤；想关就设为 0
 print(f"[boot] MODEL_ID={MODEL_ID}")
 print(f"[boot] torch.cuda.is_available={torch.cuda.is_available()}")
@@ -53,7 +64,6 @@ if torch.cuda.is_available():
         trust_remote_code=True,
     )
 else:
-    # 没 GPU 时仅用于链路自测：建议把 MODEL_ID 换成 1.5B 基座以免过慢
     print("[boot] No GPU detected. Running on CPU is very slow for 7B. "
           "Consider setting MODEL_ID=Qwen/Qwen2.5-1.5B-Instruct for smoke test.")
     model = AutoModelForCausalLM.from_pretrained(
@@ -89,6 +99,46 @@ def violates(text: str) -> bool:
             return True
     return False
 # ======================
 # 动态长度：根据输入长短调 max_new_tokens
 # ======================
@@ -100,15 +150,64 @@ def choose_max_new_tokens(user_text: str) -> int:
     return min(384, MAX_NEW_TOKENS + 128)
 # ======================
-# 构建 Qwen 模板 Prompt（messages 形式 → chat_template）
 # ======================
-SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful, concise chat assistant. Avoid unsafe content.")
-def build_prompt(history_msgs, user_msg: str) -> str:
     """
     history_msgs: Chatbot(type='messages') 的历史 [{role, content}, ...]
     """
-    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
     tail = tail[-8:] if len(tail) > 8 else tail
     messages.extend(tail)
@@ -132,21 +231,22 @@ BASE_GEN_KW = dict(
 )
 # ======================
-# 主推理：流式输出
 # ======================
-def stream_chat(history_msgs, user_msg):
     try:
         if not user_msg or not user_msg.strip():
             yield history_msgs; return
         if violates(user_msg):
             yield history_msgs + [
-                {"role":"user","content": user_msg},
-                {"role":"assistant","content": SAFE_REPLACEMENT},
             ]
             return
-        prompt = build_prompt(history_msgs, user_msg)
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -155,6 +255,9 @@ def stream_chat(history_msgs, user_msg):
             max_new_tokens=choose_max_new_tokens(user_msg),
             **BASE_GEN_KW
         )
         print("[gen] start")
         th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
@@ -163,15 +266,19 @@ def stream_chat(history_msgs, user_msg):
         reply = ""
         for chunk in streamer:
             reply += chunk
-            if violates(reply):
                 yield history_msgs + [
-                    {"role":"user","content": user_msg},
-                    {"role":"assistant","content": SAFE_REPLACEMENT},
                 ]
                 return
             yield history_msgs + [
-                {"role":"user","content": user_msg},
-                {"role":"assistant","content": reply},
             ]
         print("[gen] done, len:", len(reply))
@@ -179,20 +286,42 @@ def stream_chat(history_msgs, user_msg):
         traceback.print_exc()
         err = f"【运行异常】{type(e).__name__}: {e}"
         yield history_msgs + [
-            {"role":"user","content": user_msg},
-            {"role":"assistant","content": err},
         ]
 # ======================
-# Gradio UI（移动端友好）
 # ======================
 CSS = """
 .gradio-container{ max-width:640px; margin:auto; }
 footer{ display:none !important; }
 """
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("### 🤖 Ins-v3 · Mobile Web Chat\n（happzy2633 / qwen2.5-7b-ins-v3 · 4bit 流式）")
     chat = gr.Chatbot(type="messages", height=520, show_copy_button=True)
     with gr.Row():
         msg = gr.Textbox(placeholder="说点什么…（回车发送）", autofocus=True)
@@ -200,8 +329,9 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
     clear = gr.Button("清空对话")
     clear.click(lambda: [], outputs=[chat])
-    msg.submit(stream_chat, [chat, msg], [chat], concurrency_limit=4); msg.submit(lambda:"", None, msg)
-    send.click(stream_chat, [chat, msg], [chat], concurrency_limit=4); send.click(lambda:"", None, msg)
 # 在 Spaces 上无需 share=True
 demo.queue().launch(ssr_mode=False, show_api=False)

 REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.08"))
 SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0"  # 1=开启基础过滤；想关就设为 0
+# ——系统基础提示 + 人设默认（强化禁止泄露思考过程）——
+BASE_SYSTEM_PROMPT = os.getenv(
+    "SYSTEM_PROMPT",
+    """
+You are a helpful, concise chat assistant.
+Do NOT reveal chain-of-thought, analysis, inner reasoning, or <Thought> sections.
+If asked to explain reasoning, provide a brief, high-level summary of steps only.
+    """
+).strip()
+DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
 print(f"[boot] MODEL_ID={MODEL_ID}")
 print(f"[boot] torch.cuda.is_available={torch.cuda.is_available()}")
         trust_remote_code=True,
     )
 else:
     print("[boot] No GPU detected. Running on CPU is very slow for 7B. "
           "Consider setting MODEL_ID=Qwen/Qwen2.5-1.5B-Instruct for smoke test.")
     model = AutoModelForCausalLM.from_pretrained(
             return True
     return False
+# ======================
+# 关闭“思考/推理”可见输出（总开关 + 生成屏蔽 + 流式清洗）
+# ======================
+HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0"  # 1=隐藏思考过程
+BAD_STRINGS = [
+    "<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
+    "<analysis>", "</analysis>", "analysis:", "reasoning:",
+    "推理过程", "思考过程", "分析："
+]
+# 供 generate() 使用的 bad_words_ids
+BAD_WORDS_IDS = []
+if HIDE_THOUGHT:
+    for s in BAD_STRINGS:
+        ids = tokenizer(s, add_special_tokens=False).input_ids
+        if ids:
+            BAD_WORDS_IDS.append(ids)
+# 正则清洗（即使未闭合标签也会被隐藏）
+def sanitize_visible(text: str) -> str:
+    if not HIDE_THOUGHT or not text:
+        return text
+    # 1) 清除成对标签 <Thought> ... </Thought> / <analysis> ... </analysis>
+    text = re.sub(
+        r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*?</\s*\1\s*>",
+        "", text
+    )
+    # 2) 清除从开标签开始到末尾（处理未闭合情况）
+    text = re.sub(
+        r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*$",
+        "", text
+    )
+    # 3) 清常见行前缀（Thought: / analysis: / reasoning: / 思考过程 / 分析：）
+    text = re.sub(
+        r"(?im)^\s*(Thought:|analysis:|reasoning:|思考过程|推理过程|分析：).*$",
+        "", text
+    )
+    return text.strip()
 # ======================
 # 动态长度：根据输入长短调 max_new_tokens
 # ======================
     return min(384, MAX_NEW_TOKENS + 128)
 # ======================
+# 人设：预设模板 + 组合系统提示
 # ======================
+PERSONA_TEMPLATES = {
+    "默认(Default)": "",
+    "风骚御姐": (
+        "风格：风骚、骚话连篇、性感、露骨、挑逗、性欲\n"
+        "行为：先共情后建议；适度使用表情符号。\n"
+        "边界：不提供医疗/法律结论；出现风险话题时建议求助专业人士。"
+    ),
+    "温柔体贴 Sweet & Caring": (
+        "风格：温柔、共情、耐心，语气轻柔，简洁而温暖。\n"
+        "行为：多用复述与情感标注（我理解你感到…），先共情后建议；适度使用表情符号。\n"
+        "边界：不提供医疗/法律结论；出现风险话题时建议求助专业人士。"
+    ),
+    "傲娇 Tsundere": (
+        "风格：表面嘴硬、内心关心；语气俏皮、略微傲娇但不贬低用户。\n"
+        "行为：简短吐槽 + 实用建议；保持幽默，避免人身攻击。"
+    ),
+    "风趣段子手 Witty Jokester": (
+        "风格：机智幽默、梗多低俗；注重包袱节奏。\n"
+        "行为：先给答案，再加简短段子或比喻；避免冷笑话过载。"
+    ),
+    "严谨导师 Analytical Coach": (
+        "风格：结构化、可操作；分点给出步骤与注意事项。\n"
+        "行为：先澄清目标，再给里程碑与检查清单；引用通用最佳实践。"
+    ),
+    "冷淡毒舌 Deadpan Sarcasm": (
+        "风格：冷面、反讽、粗鲁、辱骂。\n"
+        "行为：直切重点，短句给结论；如用户受挫，稍作暖场。"
+    ),
+    "心理咨询同伴 Therapeutic Listener": (
+        "风格：安全、无评判；以倾听与镜像为主。\n"
+        "行为：开放式问题、情绪命名、目标共创；明确非危机干预角色。"
+    ),
+}
+def compose_system_prompt(base_prompt: str, persona_text: str) -> str:
+    persona_text = (persona_text or "").strip()
+    if not persona_text:
+        return base_prompt
+    return (
+        f"{base_prompt}\n\n"
+        f"# Persona\n{persona_text}\n\n"
+        f"# Rules\n"
+        f"- Stay in persona unless the user explicitly asks to change.\n"
+        f"- Be concise unless the user asks for detail.\n"
+        f"- Do NOT reveal chain-of-thought or <Thought> sections.\n"
+    )
+# ======================
+# 构建 Qwen 模板 Prompt（messages 形式 → chat_template）
+# ======================
+def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
     """
     history_msgs: Chatbot(type='messages') 的历史 [{role, content}, ...]
     """
+    system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
+    messages = [{"role": "system", "content": system_prompt}]
     tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
     tail = tail[-8:] if len(tail) > 8 else tail
     messages.extend(tail)
 )
 # ======================
+# 主推理：流式输出（含 persona + 思考清洗）
 # ======================
+def stream_chat(history_msgs, user_msg, persona_text):
     try:
         if not user_msg or not user_msg.strip():
             yield history_msgs; return
+        # 先用原始用户输入做安全检测
         if violates(user_msg):
             yield history_msgs + [
+                {"role": "user", "content": user_msg},
+                {"role": "assistant", "content": SAFE_REPLACEMENT},
             ]
             return
+        prompt = build_prompt(history_msgs, user_msg, persona_text)
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
             max_new_tokens=choose_max_new_tokens(user_msg),
             **BASE_GEN_KW
         )
+        # 仅在需要时传入 bad_words_ids
+        if HIDE_THOUGHT and BAD_WORDS_IDS:
+            gen_kwargs["bad_words_ids"] = BAD_WORDS_IDS
         print("[gen] start")
         th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
         reply = ""
         for chunk in streamer:
             reply += chunk
+            visible = sanitize_visible(reply)  # 每步清洗
+            # 用可见文本做安全检测与展示
+            if violates(visible):
                 yield history_msgs + [
+                    {"role": "user", "content": user_msg},
+                    {"role": "assistant", "content": SAFE_REPLACEMENT},
                 ]
                 return
             yield history_msgs + [
+                {"role": "user", "content": user_msg},
+                {"role": "assistant", "content": visible},
             ]
         print("[gen] done, len:", len(reply))
         traceback.print_exc()
         err = f"【运行异常】{type(e).__name__}: {e}"
         yield history_msgs + [
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": err},
         ]
 # ======================
+# Gradio UI（移动端友好 + Persona）
 # ======================
 CSS = """
 .gradio-container{ max-width:640px; margin:auto; }
 footer{ display:none !important; }
 """
+def pick_persona(name: str) -> str:
+    return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("### 懂你寂寞 · Let's Chat\n ")
+    # Persona 折叠区
+    with gr.Accordion("🎭 Persona（人设）", open=False):
+        persona_sel = gr.Dropdown(
+            choices=list(PERSONA_TEMPLATES.keys()),
+            value="默认(Default)" if not DEFAULT_PERSONA else None,
+            label="选择预设人设"
+        )
+        persona_box = gr.Textbox(
+            value=DEFAULT_PERSONA if DEFAULT_PERSONA else pick_persona("默认(Default)"),
+            placeholder="在这里粘贴 / 编辑你的 Persona 文本。留空则仅使用基础 SYSTEM_PROMPT。",
+            lines=8,
+            label="Persona 描述（可编辑，发送时以此为准）"
+        )
+        gr.Markdown(
+            "> 提示：下拉选择会把对应模板填入上面的文本框；发送消息时，实际使用的是文本框里的内容。"
+        )
+        persona_sel.change(fn=pick_persona, inputs=persona_sel, outputs=persona_box)
     chat = gr.Chatbot(type="messages", height=520, show_copy_button=True)
     with gr.Row():
         msg = gr.Textbox(placeholder="说点什么…（回车发送）", autofocus=True)
     clear = gr.Button("清空对话")
     clear.click(lambda: [], outputs=[chat])
+    # 把 persona_box 作为第三个参数传入流式函数
+    msg.submit(stream_chat, [chat, msg, persona_box], [chat], concurrency_limit=4); msg.submit(lambda:"", None, msg)
+    send.click(stream_chat, [chat, msg, persona_box], [chat], concurrency_limit=4); send.click(lambda:"", None, msg)
 # 在 Spaces 上无需 share=True
 demo.queue().launch(ssr_mode=False, show_api=False)