File size: 1,995 Bytes
539a846
 
 
 
 
dac1395
539a846
 
dac1395
539a846
dac1395
 
539a846
 
 
 
 
dac1395
 
486b8bb
dac1395
 
539a846
e6c79c0
 
 
 
 
61cfb19
 
e6c79c0
 
 
61cfb19
 
e6c79c0
 
 
61cfb19
 
e6c79c0
 
 
 
 
 
 
87d8560
81803f2
a52ed1a
 
8dc6be2
e6c79c0
 
 
a52ed1a
 
e6c79c0
 
a52ed1a
 
e6c79c0
6e84025
a52ed1a
 
c0f3292
e6c79c0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
FROM python:3.10-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libglib2.0-0 \
    libgomp1 \
    libgl1 \
    wget \
    tar \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install Python dependencies
COPY requirements.txt .

# Install numpy<2 first to avoid compatibility issues
RUN pip install --no-cache-dir "numpy<2.0.0"

# Install other requirements
RUN pip install --no-cache-dir -r requirements.txt huggingface_hub

# Create models directory structure
RUN mkdir -p /app/models/det/en \
    /app/models/rec/en \
    /app/models/cls/en \
    /content

# Download and extract detection model
RUN wget -O /tmp/det.tar "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar" && \
    tar -xf /tmp/det.tar -C /app/models/det/en && \
    rm /tmp/det.tar

# Download and extract recognition model
RUN wget -O /tmp/rec.tar "https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar" && \
    tar -xf /tmp/rec.tar -C /app/models/rec/en && \
    rm /tmp/rec.tar

# Download and extract classification model
RUN wget -O /tmp/cls.tar "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" && \
    tar -xf /tmp/cls.tar -C /app/models/cls/en && \
    rm /tmp/cls.tar

# Download DocLayout-YOLO model
RUN wget -O /content/layout-model.pt \
    "https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench-imgsz1280-2501/resolve/main/doclayout_yolo_docstructbench_imgsz1280_2501.pt?download=true"

RUN pip install ultralytics dill
# Copy application files
COPY app.py .

# Create cache directories for other tools
RUN mkdir -p /app/.config/matplotlib \
    /app/.config/Ultralytics

# Set environment variables
ENV MPLCONFIGDIR=/app/.config/matplotlib
ENV YOLO_CONFIG_DIR=/app/.config/Ultralytics

# Set permissions
RUN chmod -R 777 /app /content

# Expose port
EXPOSE 7860

# Run the application
CMD ["python", "app.py"]