Spaces:

prithivMLmods
/

Multimodal-OCR3

Running on Zero

App Files Files Community

prithivMLmods commited on Nov 11

Commit

57cc3ef

verified ·

1 Parent(s): 6587f40

update app

Browse files

Files changed (1) hide show

app.py +1 -32

app.py CHANGED Viewed

@@ -122,37 +122,6 @@ if torch.cuda.is_available():
 print("Using device:", device)
-# CACHE_PATH = "./model_cache"
-# if not os.path.exists(CACHE_PATH):
-#     os.makedirs(CACHE_PATH)
-#
-# model_path_d_local = snapshot_download(
-#     repo_id='rednote-hilab/dots.ocr',
-#     local_dir=os.path.join(CACHE_PATH, 'dots.ocr'),
-#     max_workers=20,
-#     local_dir_use_symlinks=False
-# )
-#
-# config_file_path = os.path.join(model_path_d_local, "configuration_dots.py")
-#
-# if os.path.exists(config_file_path):
-#     with open(config_file_path, 'r') as f:
-#         input_code = f.read()
-#
-#     lines = input_code.splitlines()
-#     if "class DotsVLProcessor" in input_code and not any("attributes = " in line for line in lines):
-#         output_lines = []
-#         for line in lines:
-#             output_lines.append(line)
-#             if line.strip().startswith("class DotsVLProcessor"):
-#                 output_lines.append("    attributes = [\"image_processor\", \"tokenizer\"]")
-#
-#         with open(config_file_path, 'w') as f:
-#             f.write('\n'.join(output_lines))
-#         print("Patched configuration_dots.py successfully.")
-#
-#sys.path.append(model_path_d_local)
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -178,7 +147,7 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 ).to(device).eval()
 # Load Dots.OCR from the local, patched directory
-MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,

 print("Using device:", device)
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 ).to(device).eval()
 # Load Dots.OCR from the local, patched directory
+MODEL_PATH_D = "prithivMLmods/Dots.OCR-Latest-BF16" # -> alt of [rednote-hilab/dots.ocr]
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,