focustiki commited on
Commit
e634942
·
1 Parent(s): ed88d63

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +38 -16
streamlit_app.py CHANGED
@@ -77,37 +77,59 @@ def center_crops(img: Image.Image, n=2, frac=0.80) -> List[Image.Image]:
77
  crops.append(img.crop((x0, y0, x0 + cw, y0 + ch)))
78
  return crops
79
 
80
- # ------------------------ Remote calls with surfaced errors ------------------------
81
- def remote_trocr(img: Image.Image) -> Tuple[str, str | None]:
 
82
  try:
83
- out = hf.image_to_text(image=_to_png_bytes(img), model=TROCR_MODEL, timeout=60)
84
- if isinstance(out, list) and out:
85
- out = out[0].get("generated_text", "")
86
- return (out or "").strip(), None
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
  return "", f"TROCR error: {e}"
89
 
90
- def remote_vqa(img: Image.Image, question: str) -> Tuple[str, str | None]:
 
91
  try:
92
  out = hf.visual_question_answering(
93
  image=_to_png_bytes(img),
94
  question=question,
95
  model=VQA_MODEL,
96
- timeout=60,
97
  )
98
- if isinstance(out, list) and out:
 
 
 
99
  ans = (out[0].get("answer") or "").strip()
100
- return ans, None
101
- return (out or "").strip(), None
 
 
 
102
  except Exception as e:
103
  return "", f"VQA error: {e}"
104
 
105
- def remote_caption(img: Image.Image) -> Tuple[str, str | None]:
 
106
  try:
107
- out = hf.image_to_text(image=_to_png_bytes(img), model=CAP_MODEL, timeout=60)
108
- if isinstance(out, list) and out:
109
- out = out[0].get("generated_text", "")
110
- return (out or "").strip(), None
 
 
 
 
 
 
111
  except Exception as e:
112
  return "", f"Caption error: {e}"
113
 
 
77
  crops.append(img.crop((x0, y0, x0 + cw, y0 + ch)))
78
  return crops
79
 
80
+ # ------------------------ Remote calls (HF Inference API) ------------------------
81
+ def remote_trocr(img: Image.Image) -> tuple[str, str | None]:
82
+ """OCR with TrOCR via image_to_text. Works across hub client versions."""
83
  try:
84
+ out = hf.image_to_text(image=_to_png_bytes(img), model=TROCR_MODEL)
85
+ # normalize possible return shapes
86
+ if isinstance(out, str):
87
+ text = out.strip()
88
+ elif isinstance(out, list) and out:
89
+ # some deployments return [{"generated_text": "..."}]
90
+ text = (out[0].get("generated_text") or out[0].get("text") or "").strip()
91
+ elif isinstance(out, dict):
92
+ text = (out.get("generated_text") or out.get("text") or "").strip()
93
+ else:
94
+ text = ""
95
+ return text, None
96
  except Exception as e:
97
  return "", f"TROCR error: {e}"
98
 
99
+ def remote_vqa(img: Image.Image, question: str) -> tuple[str, str | None]:
100
+ """BLIP-VQA call without timeout kwarg; normalize result shapes."""
101
  try:
102
  out = hf.visual_question_answering(
103
  image=_to_png_bytes(img),
104
  question=question,
105
  model=VQA_MODEL,
 
106
  )
107
+ if isinstance(out, str):
108
+ ans = out.strip()
109
+ elif isinstance(out, list) and out:
110
+ # typically [{"answer": "...", "score": ...}]
111
  ans = (out[0].get("answer") or "").strip()
112
+ elif isinstance(out, dict):
113
+ ans = (out.get("answer") or "").strip()
114
+ else:
115
+ ans = ""
116
+ return ans, None
117
  except Exception as e:
118
  return "", f"VQA error: {e}"
119
 
120
+ def remote_caption(img: Image.Image) -> tuple[str, str | None]:
121
+ """BLIP caption fallback; normalize return shapes."""
122
  try:
123
+ out = hf.image_to_text(image=_to_png_bytes(img), model=CAP_MODEL)
124
+ if isinstance(out, str):
125
+ cap = out.strip()
126
+ elif isinstance(out, list) and out:
127
+ cap = (out[0].get("generated_text") or out[0].get("text") or "").strip()
128
+ elif isinstance(out, dict):
129
+ cap = (out.get("generated_text") or out.get("text") or "").strip()
130
+ else:
131
+ cap = ""
132
+ return cap, None
133
  except Exception as e:
134
  return "", f"Caption error: {e}"
135