Spaces:
Sleeping
Sleeping
Commit
·
635f9ff
1
Parent(s):
b04ad10
Add compute_score
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ import spaces
|
|
| 12 |
import gradio as gr
|
| 13 |
import torch
|
| 14 |
import torch.nn.functional as F
|
| 15 |
-
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel
|
| 16 |
from kernels import get_kernel
|
| 17 |
from typing import Any, Optional, Dict
|
| 18 |
|
|
@@ -43,7 +43,7 @@ def load_model(model_id: str):
|
|
| 43 |
print(f"🔹 Loading model: {model_id}")
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 45 |
|
| 46 |
-
model =
|
| 47 |
print(f"Detected embedding model: {model_id}")
|
| 48 |
|
| 49 |
model.to(DEVICE).eval()
|
|
@@ -52,11 +52,22 @@ def load_model(model_id: str):
|
|
| 52 |
return model, tokenizer
|
| 53 |
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# --- Main scoring logic ---
|
| 62 |
@spaces.GPU
|
|
@@ -66,21 +77,9 @@ def bot(user_message: str, history: list[dict[str, Any]], model_choice: str):
|
|
| 66 |
|
| 67 |
model, tokenizer = load_model(model_choice) # returns embedding model
|
| 68 |
history = history + [{"role": "user", "content": user_message}]
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
with torch.no_grad():
|
| 72 |
-
outputs = model(**batch)
|
| 73 |
-
# outputs.last_hidden_state.shape = [batch_size, seq_len, hidden_dim]
|
| 74 |
-
# average pool over tokens
|
| 75 |
-
embedding = average_pool(outputs.last_hidden_state, batch["attention_mask"])
|
| 76 |
-
score = model.score_head(embedding).squeeze().item()
|
| 77 |
-
# embedding = F.normalize(embedding, p=2, dim=1) # optional
|
| 78 |
-
#
|
| 79 |
-
# # Compute scalar score from embedding (example: mean of embedding dims)
|
| 80 |
-
# score = embedding.mean().item()
|
| 81 |
-
|
| 82 |
-
response = f"🔹 {model_choice} → score: {score:.4f}"
|
| 83 |
-
history.append({"role": "assistant", "content": response})
|
| 84 |
return "", history
|
| 85 |
|
| 86 |
# --- UI ---
|
|
|
|
| 12 |
import gradio as gr
|
| 13 |
import torch
|
| 14 |
import torch.nn.functional as F
|
| 15 |
+
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer, AutoModel, AutoModelForSequenceClassification
|
| 16 |
from kernels import get_kernel
|
| 17 |
from typing import Any, Optional, Dict
|
| 18 |
|
|
|
|
| 43 |
print(f"🔹 Loading model: {model_id}")
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 45 |
|
| 46 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
| 47 |
print(f"Detected embedding model: {model_id}")
|
| 48 |
|
| 49 |
model.to(DEVICE).eval()
|
|
|
|
| 52 |
return model, tokenizer
|
| 53 |
|
| 54 |
|
| 55 |
+
def compute_score(text: str, model: torch.nn.Module, tokenizer: AutoTokenizer) -> dict:
|
| 56 |
+
inputs = tokenizer(
|
| 57 |
+
text,
|
| 58 |
+
return_tensors="pt",
|
| 59 |
+
padding="longest",
|
| 60 |
+
truncation=True,
|
| 61 |
+
).to(DEVICE)
|
| 62 |
|
| 63 |
+
with torch.no_grad():
|
| 64 |
+
outputs = model(**inputs)
|
| 65 |
+
logits = outputs.logits.squeeze(-1).float().cpu().numpy()
|
| 66 |
+
|
| 67 |
+
res = {}
|
| 68 |
+
res["score"] = logits.tolist()
|
| 69 |
+
res["int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
|
| 70 |
+
return res
|
| 71 |
|
| 72 |
# --- Main scoring logic ---
|
| 73 |
@spaces.GPU
|
|
|
|
| 77 |
|
| 78 |
model, tokenizer = load_model(model_choice) # returns embedding model
|
| 79 |
history = history + [{"role": "user", "content": user_message}]
|
| 80 |
+
score = compute_score(user_message, model, tokenizer)["score"]
|
| 81 |
|
| 82 |
+
history.append({"role": "assistant", "content": f"{model_choice}: {score}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
return "", history
|
| 84 |
|
| 85 |
# --- UI ---
|