Spaces:

lapa-llm
/

quality-estimation

Sleeping

App Files Files Community

iamthewalrus67 commited on Oct 12

Commit

4aa8608

1 Parent(s): c0f3227

Calculate score

Browse files

Files changed (1) hide show

app.py +24 -145

app.py CHANGED Viewed

@@ -11,8 +11,8 @@ import threading
 import spaces
 import gradio as gr
 import torch
-from PIL.Image import Image
-from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer
 from kernels import get_kernel
 from typing import Any, Optional, Dict
@@ -31,8 +31,8 @@ login(token=HF_LE_LLM_READ_TOKEN)
 # MODEL_ID = "le-llm/lapa-v0.1-instruct"
 # MODEL_ID = "le-llm/lapa-v0.1-matt-instruction-5e06"
 # MODEL_ID = "le-llm/lapa-v0.1-reprojected"
-MODEL_ID = "le-llm/lapa-v0.1.1-instruct"
-# MODEL_ID = "le-llm/manipulative-score-model"
 MAX_TOKENS = 4096
 TEMPERATURE = 0.7
@@ -56,7 +56,7 @@ def load_model():
     except Exception as err:  # pragma: no cover - informative fallback
         print(f"Warning: AutoProcessor not available ({err}). Falling back to tokenizer.")
-    model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         dtype=torch.bfloat16,  # if device == "cuda" else torch.float32,
         device_map="auto",  # if device == "cuda" else None,
@@ -70,7 +70,7 @@ def load_model():
 model, tokenizer, processor, device = load_model()
-def user(user_message, image_data: Image, history: list):
     """Format user message with optional image."""
     import io
@@ -80,44 +80,14 @@ def user(user_message, image_data: Image, history: list):
     stripped_message = user_message.strip()
-    # If we have an image, save it to temp file for Gradio display
-    if image_data is not None:
-        image_data.thumbnail((IMAGE_MAX_SIZE, IMAGE_MAX_SIZE))
-        # Save to temp file for Gradio display
-        fd, tmp_path = tempfile.mkstemp(suffix=".jpg")
-        os.close(fd)
-        image_data.save(tmp_path, format="JPEG")
-        # Also encode as base64 for model processing (stored in metadata)
-        buffered = io.BytesIO()
-        image_data.save(buffered, format="JPEG")
-        # TODO do we leave that message?
-        text_content = stripped_message if stripped_message else "Опиши це зображення"
-        # Store both text and image in a single message with base64 in metadata
-        updated_history.append({
-            "role": "user",
-            "content": text_content
-        })
-        updated_history.append({
-            "role": "user",
-            "content": {
-                    "path": tmp_path,
-                    "alt_text": "User uploaded image"
-                },
-        })
-        has_content = True
-    elif stripped_message:
-        updated_history.append({"role": "user", "content": stripped_message})
         has_content = True
     if not has_content:
         # Nothing to submit yet; keep inputs unchanged
-        return user_message, image_data, history
-    return "", None, updated_history
 def append_example_message(x: gr.SelectData, history):
@@ -166,119 +136,29 @@ def _clean_history_for_display(history: list[dict[str, Any]]) -> list[dict[str,
 @spaces.GPU
 def bot(
-    history: list[dict[str, Any]]
 ):
-    """Generate bot response with support for text and images."""
     # Early return if no input
-    if not history:
         return
-    # Extract last user message for logging
-    last_user_msg = next((msg for msg in reversed(history) if msg.get("role") == "user"), None)
-    user_message_text = _extract_text_from_content(last_user_msg.get("content")) if last_user_msg else ""
-    print('User message:', user_message_text)
-    # Check if any message contains images
-    has_images = any(
-        isinstance(msg.get("content"), tuple)
-        for msg in history
-    )
-    model_inputs = None
-    # Use processor if images are present
-    if processor is not None and has_images:
-        try:
-            processor_history = []
-            for msg in history:
-                role = msg.get("role", "user")
-                content = msg.get("content")
-                if isinstance(content, str):
-                    processor_history.append({"role": role, "content": [{"type": "text", "text": content}]})
-                elif isinstance(content, tuple):
-                    formatted_content = []
-                    tmp_path, _ = content
-                    image_input = {
-                        "type": "image",
-                        "url": f"{tmp_path}",
-                    }
-                    if processor_history[-1].get('role') == 'user':
-                        if isinstance(processor_history[-1].get('content'), str):
-                            previous_message = processor_history[-1].get('content')
-                            formatted_content.append({"type": "text", "text": previous_message})
-                            formatted_content.append(image_input)
-                            processor_history[-1]['content'] = formatted_content
-                        elif isinstance(processor_history[-1].get('content'), list):
-                            processor_history[-1]['content'].append(image_input)
-                    else:
-                        formatted_content.append(image_input)
-                        processor_history.append({"role": role, "content": formatted_content})
-            model_inputs = processor.apply_chat_template(
-                processor_history,
-                tokenize=True,
-                return_dict=True,
-                return_tensors="pt",
-                add_generation_prompt=True,
-            ).to(model.device)
-            print("Using processor for vision input")
-        except Exception as exc:
-            print(f"Processor failed: {exc}")
-            model_inputs = None
-    # Fallback to tokenizer for text-only
-    if model_inputs is None:
-        # Convert to text-only format for tokenizer
-        text_history = []
-        for msg in history:
-            role = msg.get("role", "user")
-            content = msg.get("content")
-            text_content = _extract_text_from_content(content)
-            if text_content:
-                text_history.append({"role": role, "content": text_content})
-        if text_history:
-            input_text = tokenizer.apply_chat_template(
-                text_history,
-                tokenize=False,
-                add_generation_prompt=True,
-            )
-            if input_text and tokenizer.bos_token:
-                input_text = input_text.replace(tokenizer.bos_token, "", 1)
-            model_inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-            print("Using tokenizer for text-only input")
-    if model_inputs is None:
-        return
-    # Streamer setup
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
-    # Run model.generate in background thread
-    generation_kwargs = dict(
-        **model_inputs,
-        max_new_tokens=MAX_TOKENS,
-        temperature=TEMPERATURE,
-        top_p=TOP_P,
-        top_k=64,
-        do_sample=True,
-        streamer=streamer,
-    )
-    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    history.append({"role": "assistant", "content": ""})
-    # Yield tokens as they come in
-    for new_text in streamer:
-        history[-1]["content"] += new_text
-        yield _clean_history_for_display(history)
-    assistant_message = history[-1]["content"]
-    logger.log_interaction(user=user_message_text, answer=assistant_message)
 # --- drop-in UI compatible with older Gradio versions ---
 import os, tempfile, time
@@ -313,8 +193,7 @@ with gr.Blocks(theme=THEME, css=CSS, fill_height=True) as demo:
     gr.HTML(
         """
         <div id="app-header">
-          <div class="app-title">✨ LAPA</div>
-          <div class="app-subtitle">LLM for Ukrainian Language</div>
         </div>
         """
     )

 import spaces
 import gradio as gr
 import torch
+import torch.nn.functional as F
+from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel
 from kernels import get_kernel
 from typing import Any, Optional, Dict
 # MODEL_ID = "le-llm/lapa-v0.1-instruct"
 # MODEL_ID = "le-llm/lapa-v0.1-matt-instruction-5e06"
 # MODEL_ID = "le-llm/lapa-v0.1-reprojected"
+# MODEL_ID = "le-llm/lapa-v0.1.1-instruct"
+MODEL_ID = "le-llm/manipulative-score-model"
 MAX_TOKENS = 4096
 TEMPERATURE = 0.7
     except Exception as err:  # pragma: no cover - informative fallback
         print(f"Warning: AutoProcessor not available ({err}). Falling back to tokenizer.")
+    model = AutoModel.from_pretrained(
         MODEL_ID,
         dtype=torch.bfloat16,  # if device == "cuda" else torch.float32,
         device_map="auto",  # if device == "cuda" else None,
 model, tokenizer, processor, device = load_model()
+def user(user_message, history: list):
     """Format user message with optional image."""
     import io
     stripped_message = user_message.strip()
+    if stripped_message:
         has_content = True
     if not has_content:
         # Nothing to submit yet; keep inputs unchanged
+        return user_message, history
+    return "", updated_history
 def append_example_message(x: gr.SelectData, history):
 @spaces.GPU
 def bot(
+    input: list[dict[str, Any]]
 ):
+    """Generate bot response with support for text."""
     # Early return if no input
+    if not input:
         return
+    clean_input = [f"query: {input}"]
+    batch_dict = tokenizer(input, max_length=512, padding=True, truncation=True, return_tensors='pt')
+    outputs = model(**batch_dict)
+    embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
+    embeddings = F.normalize(embeddings, p=2, dim=1)
+    scores = (embeddings[:2] @ embeddings[2:].T) * 100
+    return scores.tolist()
+def average_pool(last_hidden_states: torch.Tensor,
+                 attention_mask: torch.Tensor) -> torch.Tensor:
+    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
 # --- drop-in UI compatible with older Gradio versions ---
 import os, tempfile, time
     gr.HTML(
         """
         <div id="app-header">
+          <div class="app-title">🤔 LAPA Quality Estimation</div>
         </div>
         """
     )