FROM python:3.10-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ libglib2.0-0 \ libgomp1 \ libgl1 \ wget \ tar \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Copy requirements and install Python dependencies COPY requirements.txt . # Install numpy<2 first to avoid compatibility issues RUN pip install --no-cache-dir "numpy<2.0.0" # Install other requirements RUN pip install --no-cache-dir -r requirements.txt huggingface_hub # Create models directory structure RUN mkdir -p /app/models/det/en \ /app/models/rec/en \ /app/models/cls/en \ /content # Download and extract detection model RUN wget -O /tmp/det.tar "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar" && \ tar -xf /tmp/det.tar -C /app/models/det/en && \ rm /tmp/det.tar # Download and extract recognition model RUN wget -O /tmp/rec.tar "https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar" && \ tar -xf /tmp/rec.tar -C /app/models/rec/en && \ rm /tmp/rec.tar # Download and extract classification model RUN wget -O /tmp/cls.tar "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" && \ tar -xf /tmp/cls.tar -C /app/models/cls/en && \ rm /tmp/cls.tar # Download DocLayout-YOLO model RUN wget -O /content/layout-model.pt \ "https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench-imgsz1280-2501/resolve/main/doclayout_yolo_docstructbench_imgsz1280_2501.pt?download=true" RUN pip install ultralytics dill # Copy application files COPY app.py . # Create cache directories for other tools RUN mkdir -p /app/.config/matplotlib \ /app/.config/Ultralytics # Set environment variables ENV MPLCONFIGDIR=/app/.config/matplotlib ENV YOLO_CONFIG_DIR=/app/.config/Ultralytics # Set permissions RUN chmod -R 777 /app /content # Expose port EXPOSE 7860 # Run the application CMD ["python", "app.py"]