VeuReu commited on
Commit
e2dc4cb
·
verified ·
1 Parent(s): 7599ced

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -23,7 +23,7 @@ def _lazy_load() -> Tuple[LlavaOnevisionForConditionalGeneration, AutoProcessor]
23
  _processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
24
  _model = LlavaOnevisionForConditionalGeneration.from_pretrained(
25
  MODEL_ID,
26
- torch_dtype=DTYPE,
27
  low_cpu_mem_usage=True,
28
  trust_remote_code=True,
29
  use_safetensors=True,
@@ -58,6 +58,9 @@ def _compose_prompt(user_text: str, context: Optional[Dict] = None) -> List[Dict
58
  @spaces.GPU # en HF Spaces usará GPU cuando haya disponibilidad (ZeroGPU)
59
  def _infer_one(image: Image.Image, text: str, max_new_tokens: int = 256, temperature: float = 0.7,
60
  context: Optional[Dict] = None) -> str:
 
 
 
61
  model, processor = _lazy_load()
62
  prompt = processor.apply_chat_template(_compose_prompt(text, context), add_generation_prompt=True)
63
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(DEVICE, dtype=DTYPE)
 
23
  _processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
24
  _model = LlavaOnevisionForConditionalGeneration.from_pretrained(
25
  MODEL_ID,
26
+ dtype=DTYPE,
27
  low_cpu_mem_usage=True,
28
  trust_remote_code=True,
29
  use_safetensors=True,
 
58
  @spaces.GPU # en HF Spaces usará GPU cuando haya disponibilidad (ZeroGPU)
59
  def _infer_one(image: Image.Image, text: str, max_new_tokens: int = 256, temperature: float = 0.7,
60
  context: Optional[Dict] = None) -> str:
61
+ # Reducir el tamaño de la imagen para ahorrar memoria en la GPU
62
+ image.thumbnail((1024, 1024))
63
+
64
  model, processor = _lazy_load()
65
  prompt = processor.apply_chat_template(_compose_prompt(text, context), add_generation_prompt=True)
66
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(DEVICE, dtype=DTYPE)