Spaces:

mknolan
/

cursor_slides_internvl2

Paused

App Files Files Community

mknolan commited on Mar 15

Commit

fb7360f

verified ·

1 Parent(s): 3f1523d

Upload InternVL2 implementation

Browse files

Files changed (1) hide show

app_internvl2.py +58 -68

app_internvl2.py CHANGED Viewed

@@ -133,46 +133,48 @@ def load_model():
         print("Cannot load models without GPU acceleration.")
         return False
-    # First try to load InternVL2 if lmdeploy is available
     if HAS_LMDEPLOY:
         try:
             print("Attempting to load InternVL2 model...")
-            # Configure for AWQ quantized model
             backend_config = TurbomindEngineConfig(
                 model_format='awq',
-                session_len=2048  # Explicitly set session length
             )
-            # Set to non-streaming mode
             internvl2_model = pipeline(
                 "OpenGVLab/InternVL2-40B-AWQ",
                 backend_config=backend_config,
                 model_name_or_path=None,
                 backend_name="turbomind",
                 stream=False,  # Disable streaming
             )
             print("InternVL2 model loaded successfully!")
-            return True
         except Exception as e:
             print(f"Failed to load InternVL2: {str(e)}")
             internvl2_model = None
-    # If InternVL2 failed or lmdeploy not available, try BLIP
-    if HAS_BLIP:
-        try:
-            print("Falling back to BLIP model...")
-            blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-            blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
-            print("BLIP model loaded successfully!")
-            return True
-        except Exception as e:
-            print(f"Failed to load BLIP: {str(e)}")
-            blip_processor = None
-            blip_model = None
-    print("Could not load any model")
-    return False
 # Try to load a model at startup
 MODEL_LOADED = load_model()
@@ -192,15 +194,44 @@ def analyze_image(image, prompt):
             pil_image = Image.fromarray(image).convert('RGB')
         else:
             pil_image = image.convert('RGB')
-        # If we have InternVL2 loaded, use it
         if internvl2_model is not None:
             try:
                 print("Running inference with InternVL2...")
                 print(f"Using prompt: '{prompt}'")
-                # Run the model and capture the raw response
-                response = internvl2_model((prompt, pil_image))
                 # Print debug info about the response
                 print(f"Response type: {type(response)}")
@@ -224,54 +255,13 @@ def analyze_image(image, prompt):
                 # Check if we got an empty result
                 if not result or result.strip() == "":
                     print("WARNING: Received empty response from InternVL2")
-                    # Try an alternative prompt to see if that works
-                    print("Trying alternative prompt...")
-                    alt_prompt = "This is an image. Describe what you see in detail."
-                    response2 = internvl2_model((alt_prompt, pil_image))
-                    if hasattr(response2, "text"):
-                        result = response2.text
-                    elif hasattr(response2, "response"):
-                        result = response2.response
-                    elif hasattr(response2, "generated_text"):
-                        result = response2.generated_text
-                    else:
-                        result = str(response2)
-                    if not result or result.strip() == "":
-                        print("Alternative prompt also gave empty result")
-                        # Fall through to BLIP fallback
-                        raise ValueError("Empty response from InternVL2")
-                    else:
-                        print(f"Alternative prompt worked: '{result}'")
-                # If we got a valid result, return it
-                if result and result.strip() != "":
-                    return f"[InternVL2] {result}"
-                else:
-                    # Try BLIP instead
-                    raise ValueError("Empty response from InternVL2")
             except Exception as e:
                 print(f"Error with InternVL2: {str(e)}")
-                # If InternVL2 fails, fall back to BLIP if available
-        # If we have BLIP loaded, use it
-        if blip_model is not None and blip_processor is not None:
-            try:
-                print("Running inference with BLIP...")
-                # BLIP doesn't use prompts the same way, simplify
-                inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
-                out = blip_model.generate(**inputs, max_new_tokens=100)
-                result = blip_processor.decode(out[0], skip_special_tokens=True)
-                # Check if BLIP result is empty
-                if not result or result.strip() == "":
-                    return "BLIP model returned an empty response. The model may be having issues processing this image."
-                return f"[BLIP] {result} (Note: Custom prompts not supported with BLIP fallback model)"
-            except Exception as e:
-                print(f"Error with BLIP: {str(e)}")
         return "No model was able to analyze the image. See logs for details."

         print("Cannot load models without GPU acceleration.")
         return False
+    # Try to load BLIP first since it's more reliable
+    if HAS_BLIP:
+        try:
+            print("Loading BLIP model...")
+            blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+            blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
+            print("BLIP model loaded successfully!")
+        except Exception as e:
+            print(f"Failed to load BLIP: {str(e)}")
+            blip_processor = None
+            blip_model = None
+    # Then try InternVL2 if lmdeploy is available
     if HAS_LMDEPLOY:
         try:
             print("Attempting to load InternVL2 model...")
+            # Configure for AWQ quantized model with larger context
             backend_config = TurbomindEngineConfig(
                 model_format='awq',
+                session_len=4096,  # Increased session length
+                max_batch_size=1,  # Limit batch size to reduce memory usage
+                cache_max_entry_count=0.3,  # Adjust cache to optimize for single requests
+                tp=1  # Set tensor parallelism to 1 (use single GPU)
             )
+            # Set to non-streaming mode with explicit token limits
             internvl2_model = pipeline(
                 "OpenGVLab/InternVL2-40B-AWQ",
                 backend_config=backend_config,
                 model_name_or_path=None,
                 backend_name="turbomind",
                 stream=False,  # Disable streaming
+                max_new_tokens=512,  # Explicitly set max new tokens
             )
             print("InternVL2 model loaded successfully!")
         except Exception as e:
             print(f"Failed to load InternVL2: {str(e)}")
             internvl2_model = None
+    # Return True if at least one model is loaded
+    return (blip_model is not None and blip_processor is not None) or (internvl2_model is not None)
 # Try to load a model at startup
 MODEL_LOADED = load_model()
             pil_image = Image.fromarray(image).convert('RGB')
         else:
             pil_image = image.convert('RGB')
+        # Try BLIP first since it's more reliable
+        if blip_model is not None and blip_processor is not None:
+            try:
+                print("Running inference with BLIP...")
+                # BLIP doesn't use prompts the same way, simplify
+                inputs = blip_processor(pil_image, return_tensors="pt").to("cuda")
+                out = blip_model.generate(**inputs, max_length=80, min_length=10, num_beams=5)
+                result = blip_processor.decode(out[0], skip_special_tokens=True)
+                # Check if BLIP result is empty
+                if not result or result.strip() == "":
+                    print("BLIP model returned an empty response")
+                    # Only fall through to InternVL2 if BLIP fails
+                    raise ValueError("Empty response from BLIP")
+                return f"[BLIP] {result}"
+            except Exception as e:
+                print(f"Error with BLIP: {str(e)}")
+                # If BLIP fails, try InternVL2 if available
+        # Try InternVL2 if available
         if internvl2_model is not None:
             try:
                 print("Running inference with InternVL2...")
                 print(f"Using prompt: '{prompt}'")
+                # Create a specifically formatted prompt for InternVL2
+                formatted_prompt = f"<image>\n{prompt}"
+                print(f"Formatted prompt: '{formatted_prompt}'")
+                # Run the model with more explicit parameters
+                response = internvl2_model(
+                    (formatted_prompt, pil_image),
+                    max_new_tokens=512,  # Set higher token limit
+                    temperature=0.7,     # Add temperature for better generation
+                    top_p=0.9            # Add top_p for better generation
+                )
                 # Print debug info about the response
                 print(f"Response type: {type(response)}")
                 # Check if we got an empty result
                 if not result or result.strip() == "":
                     print("WARNING: Received empty response from InternVL2")
+                    return "InternVL2 failed to analyze the image (empty response). This may be due to token limits."
+                return f"[InternVL2] {result}"
             except Exception as e:
                 print(f"Error with InternVL2: {str(e)}")
+                return f"Error with InternVL2: {str(e)}"
         return "No model was able to analyze the image. See logs for details."