Spaces:

mknolan
/

cursor_slides_internvl2

Paused

App Files Files Community

mknolan commited on Mar 15

Commit

3f1523d

verified ·

1 Parent(s): ce32a95

Upload InternVL2 implementation

Browse files

Files changed (1) hide show

app_internvl2.py +59 -2

app_internvl2.py CHANGED Viewed

@@ -197,9 +197,61 @@ def analyze_image(image, prompt):
         if internvl2_model is not None:
             try:
                 print("Running inference with InternVL2...")
                 response = internvl2_model((prompt, pil_image))
-                result = response.text if hasattr(response, "text") else str(response)
-                return f"[InternVL2] {result}"
             except Exception as e:
                 print(f"Error with InternVL2: {str(e)}")
                 # If InternVL2 fails, fall back to BLIP if available
@@ -212,6 +264,11 @@ def analyze_image(image, prompt):
                 inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
                 out = blip_model.generate(**inputs, max_new_tokens=100)
                 result = blip_processor.decode(out[0], skip_special_tokens=True)
                 return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
             except Exception as e:
                 print(f"Error with BLIP: {str(e)}")

         if internvl2_model is not None:
             try:
                 print("Running inference with InternVL2...")
+                print(f"Using prompt: '{prompt}'")
+                # Run the model and capture the raw response
                 response = internvl2_model((prompt, pil_image))
+                # Print debug info about the response
+                print(f"Response type: {type(response)}")
+                print(f"Response attributes: {dir(response) if hasattr(response, '__dir__') else 'No dir available'}")
+                # Try different ways to extract the text
+                if hasattr(response, "text"):
+                    result = response.text
+                    print(f"Found 'text' attribute: '{result}'")
+                elif hasattr(response, "response"):
+                    result = response.response
+                    print(f"Found 'response' attribute: '{result}'")
+                elif hasattr(response, "generated_text"):
+                    result = response.generated_text
+                    print(f"Found 'generated_text' attribute: '{result}'")
+                else:
+                    # If no attribute worked, convert the whole response to string
+                    result = str(response)
+                    print(f"Using string conversion: '{result}'")
+                # Check if we got an empty result
+                if not result or result.strip() == "":
+                    print("WARNING: Received empty response from InternVL2")
+                    # Try an alternative prompt to see if that works
+                    print("Trying alternative prompt...")
+                    alt_prompt = "This is an image. Describe what you see in detail."
+                    response2 = internvl2_model((alt_prompt, pil_image))
+                    if hasattr(response2, "text"):
+                        result = response2.text
+                    elif hasattr(response2, "response"):
+                        result = response2.response
+                    elif hasattr(response2, "generated_text"):
+                        result = response2.generated_text
+                    else:
+                        result = str(response2)
+                    if not result or result.strip() == "":
+                        print("Alternative prompt also gave empty result")
+                        # Fall through to BLIP fallback
+                        raise ValueError("Empty response from InternVL2")
+                    else:
+                        print(f"Alternative prompt worked: '{result}'")
+                # If we got a valid result, return it
+                if result and result.strip() != "":
+                    return f"[InternVL2] {result}"
+                else:
+                    # Try BLIP instead
+                    raise ValueError("Empty response from InternVL2")
             except Exception as e:
                 print(f"Error with InternVL2: {str(e)}")
                 # If InternVL2 fails, fall back to BLIP if available
                 inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
                 out = blip_model.generate(**inputs, max_new_tokens=100)
                 result = blip_processor.decode(out[0], skip_special_tokens=True)
+                # Check if BLIP result is empty
+                if not result or result.strip() == "":
+                    return "BLIP model returned an empty response. The model may be having issues processing this image."
                 return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
             except Exception as e:
                 print(f"Error with BLIP: {str(e)}")