Upload 6 files
Browse files- README.md +21 -14
- api_schemas.py +17 -0
- app.py +85 -0
- clients/client_requests.py +39 -0
- clients/streamlit_client_app.py +24 -0
- requirements.txt +7 -0
README.md
CHANGED
|
@@ -1,14 +1,21 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Salamandra-Vision 7B · ZeroGPU Space (API + Clientes)
|
| 2 |
+
|
| 3 |
+
Space Gradio (ZeroGPU) que sirve **UI + API** para `BSC-LT/salamandra-7b-vision`.
|
| 4 |
+
Clientes incluidos: `clients/client_requests.py` (local/CLI) y `clients/streamlit_client_app.py` (Space Streamlit).
|
| 5 |
+
|
| 6 |
+
## Despliegue del Space (ZeroGPU)
|
| 7 |
+
1. Crea Space → SDK **Gradio**, Hardware **ZeroGPU**.
|
| 8 |
+
2. Sube `app.py`, `requirements.txt`, `README.md`, carpeta `examples`.
|
| 9 |
+
3. Opcional: `MODEL_ID=BSC-LT/salamandra-7b-vision`.
|
| 10 |
+
|
| 11 |
+
## Endpoints
|
| 12 |
+
- UI: interfaz Gradio.
|
| 13 |
+
- **REST puro**: `POST /api/describe_raw` (multipart: `image`, `text`, `max_new_tokens`, `temperature`).
|
| 14 |
+
- Gradio clásico: `POST /api/predict/describe` (recomendado con `gradio_client`).
|
| 15 |
+
|
| 16 |
+
> ZeroGPU sólo con Gradio; exponemos API desde Gradio. Clients externos (requests/Streamlit) funcionan bien. Para cargas altas y fair-use del clúster, considera forwarding de token por usuario (más sencillo en apps Gradio). :contentReference[oaicite:8]{index=8}
|
| 17 |
+
|
| 18 |
+
## Cliente local (sin Gradio UI)
|
| 19 |
+
```bash
|
| 20 |
+
python clients/client_requests.py --space-url "https://<usuario>-<space>.hf.space" \
|
| 21 |
+
--image examples/demo.jpg --text "Descríbela en catalán."
|
api_schemas.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# api_schemas.py
|
| 2 |
+
DESCRIBE_RAW_MULTIPART = {
|
| 3 |
+
"method": "POST",
|
| 4 |
+
"path": "/api/describe_raw",
|
| 5 |
+
"multipart_fields": [
|
| 6 |
+
{"name": "image", "type": "file"},
|
| 7 |
+
{"name": "text", "type": "text", "default": "Describe la imagen con detalle."},
|
| 8 |
+
{"name": "max_new_tokens", "type": "int", "default": 256},
|
| 9 |
+
{"name": "temperature", "type": "float", "default": 0.7}
|
| 10 |
+
]
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
PREDICT_JSON = {
|
| 14 |
+
"method": "POST",
|
| 15 |
+
"path": "/api/predict/describe",
|
| 16 |
+
"json_body": {"data": ["<file or url>", "prompt text", 256, 0.7]}
|
| 17 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import os
|
| 3 |
+
from typing import Tuple, List
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import spaces # <- habilita ZeroGPU decorators
|
| 7 |
+
import torch
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
|
| 10 |
+
|
| 11 |
+
MODEL_ID = os.environ.get("MODEL_ID", "BSC-LT/salamandra-7b-vision")
|
| 12 |
+
DTYPE = torch.float16 # half precision para H200/A100
|
| 13 |
+
DEVICE = "cuda" # ZeroGPU asigna gpu por llamada en @spaces.GPU
|
| 14 |
+
|
| 15 |
+
# Carga perezosa: sólo la primera vez que se invoca en GPU
|
| 16 |
+
_model = None
|
| 17 |
+
_processor = None
|
| 18 |
+
|
| 19 |
+
def _lazy_load():
|
| 20 |
+
global _model, _processor
|
| 21 |
+
if _model is None or _processor is None:
|
| 22 |
+
_processor = AutoProcessor.from_pretrained(MODEL_ID)
|
| 23 |
+
_model = LlavaOnevisionForConditionalGeneration.from_pretrained(
|
| 24 |
+
MODEL_ID,
|
| 25 |
+
torch_dtype=DTYPE,
|
| 26 |
+
low_cpu_mem_usage=True,
|
| 27 |
+
trust_remote_code=True,
|
| 28 |
+
device_map=None, # movemos explícitamente a cuda con @spaces.GPU
|
| 29 |
+
use_safetensors=True,
|
| 30 |
+
)
|
| 31 |
+
return _model, _processor
|
| 32 |
+
|
| 33 |
+
@spaces.GPU # <- asegura que la función se ejecute con GPU asignada
|
| 34 |
+
def describe(image: Image.Image, prompt_text: str, max_new_tokens: int, temperature: float) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Devuelve una descripción a partir de imagen + prompt en texto.
|
| 37 |
+
"""
|
| 38 |
+
model, processor = _lazy_load()
|
| 39 |
+
|
| 40 |
+
# Formateo estilo chat template recomendado por el model card
|
| 41 |
+
conversation = [
|
| 42 |
+
{
|
| 43 |
+
"role": "user",
|
| 44 |
+
"content": [
|
| 45 |
+
{"type": "image"},
|
| 46 |
+
{"type": "text", "text": prompt_text or "Descriu la imatge amb el màxim detall possible."},
|
| 47 |
+
],
|
| 48 |
+
}
|
| 49 |
+
]
|
| 50 |
+
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
| 51 |
+
|
| 52 |
+
# A GPU justo antes de inferir (ZeroGPU)
|
| 53 |
+
model = model.to(DEVICE)
|
| 54 |
+
inputs = processor(images=image, text=prompt, return_tensors="pt").to(DEVICE, DTYPE)
|
| 55 |
+
|
| 56 |
+
with torch.inference_mode():
|
| 57 |
+
output = model.generate(
|
| 58 |
+
**inputs,
|
| 59 |
+
max_new_tokens=int(max_new_tokens),
|
| 60 |
+
temperature=float(temperature),
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
text = processor.decode(output[0], skip_special_tokens=True)
|
| 64 |
+
return text.strip()
|
| 65 |
+
|
| 66 |
+
with gr.Blocks(title="Salamandra Vision 7B (ZeroGPU)") as demo:
|
| 67 |
+
gr.Markdown("# Salamandra-Vision 7B · ZeroGPU\nEnvía una imagen y un texto/prompta, recibe una descripción.")
|
| 68 |
+
|
| 69 |
+
with gr.Row():
|
| 70 |
+
with gr.Column():
|
| 71 |
+
in_img = gr.Image(label="Imagen", type="pil")
|
| 72 |
+
in_txt = gr.Textbox(
|
| 73 |
+
label="Texto/prompta",
|
| 74 |
+
value="Describe la imagen con el mayor detalle posible (en catalán o español)."
|
| 75 |
+
)
|
| 76 |
+
max_new = gr.Slider(16, 1024, value=256, step=16, label="max_new_tokens")
|
| 77 |
+
temp = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
|
| 78 |
+
btn = gr.Button("Generar", variant="primary")
|
| 79 |
+
with gr.Column():
|
| 80 |
+
out = gr.Textbox(label="Descripción", lines=18)
|
| 81 |
+
|
| 82 |
+
btn.click(describe, inputs=[in_img, in_txt, max_new, temp], outputs=out, api_name="describe")
|
| 83 |
+
|
| 84 |
+
# Cola de Gradio: útil para ZeroGPU y picos de demanda
|
| 85 |
+
demo.queue(concurrency_count=1, max_size=16).launch()
|
clients/client_requests.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# clients/client_requests.py
|
| 2 |
+
import argparse, base64, json, os
|
| 3 |
+
import requests
|
| 4 |
+
|
| 5 |
+
def call_describe_raw(space_base_url: str, image_path: str, text: str, max_new_tokens: int = 256, temperature: float = 0.7):
|
| 6 |
+
"""
|
| 7 |
+
Golpea el endpoint REST puro /api/describe_raw (multipart/form-data).
|
| 8 |
+
Ej.: space_base_url = "https://<usuario>-<space>.hf.space"
|
| 9 |
+
"""
|
| 10 |
+
url = space_base_url.rstrip("/") + "/api/describe_raw"
|
| 11 |
+
with open(image_path, "rb") as f:
|
| 12 |
+
files = {"image": (os.path.basename(image_path), f, "application/octet-stream")}
|
| 13 |
+
data = {"text": text, "max_new_tokens": str(max_new_tokens), "temperature": str(temperature)}
|
| 14 |
+
r = requests.post(url, files=files, data=data, timeout=600)
|
| 15 |
+
r.raise_for_status()
|
| 16 |
+
return r.json()["text"]
|
| 17 |
+
|
| 18 |
+
def call_predict_legacy(space_base_url: str, image_path: str, text: str, max_new_tokens: int = 256, temperature: float = 0.7):
|
| 19 |
+
"""
|
| 20 |
+
Alternativa: endpoint estándar /api/predict/describe (formato Gradio).
|
| 21 |
+
Enviamos la imagen como URL de carga previa no disponible aquí, así que
|
| 22 |
+
preferimos /api/describe_raw. Se incluye por compatibilidad.
|
| 23 |
+
"""
|
| 24 |
+
url = space_base_url.rstrip("/") + "/api/predict/describe"
|
| 25 |
+
# Para /api/predict lo normal es usar gradio_client que gestiona subidas;
|
| 26 |
+
# con requests puro, tendrías que hacer upload previo a /upload y referenciar la URL.
|
| 27 |
+
# Lo dejamos como placeholder educativo.
|
| 28 |
+
raise NotImplementedError("Usa call_describe_raw para HTTP directo.")
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
ap = argparse.ArgumentParser()
|
| 32 |
+
ap.add_argument("--space-url", required=True, help="https://<usuario>-<space>.hf.space")
|
| 33 |
+
ap.add_argument("--image", required=True)
|
| 34 |
+
ap.add_argument("--text", default="Describe la imagen con detalle.")
|
| 35 |
+
ap.add_argument("--max-new-tokens", type=int, default=256)
|
| 36 |
+
ap.add_argument("--temperature", type=float, default=0.7)
|
| 37 |
+
args = ap.parse_args()
|
| 38 |
+
out = call_describe_raw(args.space_url, args.image, args.text, args.max_new_tokens, args.temperature)
|
| 39 |
+
print(out)
|
clients/streamlit_client_app.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# clients/streamlit_client_app.py
|
| 2 |
+
import os, requests, streamlit as st
|
| 3 |
+
|
| 4 |
+
SPACE_URL = os.environ.get("UPSTREAM_SPACE_URL", "https://<usuario>-<space>.hf.space")
|
| 5 |
+
|
| 6 |
+
st.set_page_config(page_title="Cliente Salamandra (Streamlit)", layout="centered")
|
| 7 |
+
st.title("Cliente Salamandra · Streamlit")
|
| 8 |
+
|
| 9 |
+
img = st.file_uploader("Sube una imagen", type=["jpg","jpeg","png"])
|
| 10 |
+
prompt = st.text_area("Texto/prompt", "Describe la imagen con detalle (ES/CA).")
|
| 11 |
+
cols = st.columns(2)
|
| 12 |
+
max_new = cols[0].slider("max_new_tokens", 16, 1024, 256, 16)
|
| 13 |
+
temperature = cols[1].slider("temperature", 0.0, 1.5, 0.7, 0.05)
|
| 14 |
+
|
| 15 |
+
if st.button("Generar") and img is not None:
|
| 16 |
+
with st.spinner("Llamando al Space ZeroGPU..."):
|
| 17 |
+
url = SPACE_URL.rstrip("/") + "/api/describe_raw"
|
| 18 |
+
files = {"image": (img.name, img.getvalue(), "application/octet-stream")}
|
| 19 |
+
data = {"text": prompt, "max_new_tokens": str(max_new), "temperature": str(temperature)}
|
| 20 |
+
r = requests.post(url, files=files, data=data, timeout=600)
|
| 21 |
+
if r.ok:
|
| 22 |
+
st.text_area("Descripción", r.json().get("text",""), height=300)
|
| 23 |
+
else:
|
| 24 |
+
st.error(f"Error {r.status_code}: {r.text}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.44.0
|
| 2 |
+
spaces>=0.25.0
|
| 3 |
+
transformers>=4.44.0
|
| 4 |
+
torch>=2.2
|
| 5 |
+
accelerate>=0.30.0
|
| 6 |
+
safetensors>=0.4.2
|
| 7 |
+
pillow>=10.3
|