FROM python:3.10-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libglib2.0-0 \
    libgomp1 \
    libgl1 \
    wget \
    tar \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install Python dependencies
COPY requirements.txt .

# Install numpy<2 first to avoid compatibility issues
RUN pip install --no-cache-dir "numpy<2.0.0"

# Install other requirements
RUN pip install --no-cache-dir -r requirements.txt huggingface_hub

# Create models directory structure
RUN mkdir -p /app/models/det/en \
    /app/models/rec/en \
    /app/models/cls/en \
    /content

# Download and extract detection model
RUN wget -O /tmp/det.tar "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar" && \
    tar -xf /tmp/det.tar -C /app/models/det/en && \
    rm /tmp/det.tar

# Download and extract recognition model
RUN wget -O /tmp/rec.tar "https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar" && \
    tar -xf /tmp/rec.tar -C /app/models/rec/en && \
    rm /tmp/rec.tar

# Download and extract classification model
RUN wget -O /tmp/cls.tar "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" && \
    tar -xf /tmp/cls.tar -C /app/models/cls/en && \
    rm /tmp/cls.tar

# Download DocLayout-YOLO model
RUN wget -O /content/layout-model.pt \
    "https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench-imgsz1280-2501/resolve/main/doclayout_yolo_docstructbench_imgsz1280_2501.pt?download=true"

RUN pip install ultralytics dill
# Copy application files
COPY app.py .

# Create cache directories for other tools
RUN mkdir -p /app/.config/matplotlib \
    /app/.config/Ultralytics

# Set environment variables
ENV MPLCONFIGDIR=/app/.config/matplotlib
ENV YOLO_CONFIG_DIR=/app/.config/Ultralytics

# Set permissions
RUN chmod -R 777 /app /content

# Expose port
EXPOSE 7860

# Run the application
CMD ["python", "app.py"]