Spaces:

bla
/

layout_paddle

Paused

App Files Files Community

bla commited on Oct 21

Commit

ee69d03

verified ·

1 Parent(s): 81803f2

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -5

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import logging
 import sys
 import tempfile
 import re
 from pathlib import Path
 from typing import Optional
@@ -13,7 +14,7 @@ import numpy as np
 import requests
 import torch
 import torchvision
-from PIL import Image
 from fastapi import FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.responses import HTMLResponse, JSONResponse
 from paddleocr import PaddleOCR
@@ -82,6 +83,7 @@ async def startup_event():
         logger.error(f"Failed to load DocLayout-YOLO model: {e}", exc_info=True)
         raise RuntimeError("Could not load layout model") from e
 # --- Pydantic Request Models ---
 class URLRequest(BaseModel):
     url: HttpUrl
@@ -274,11 +276,24 @@ def process_document(file_path: str, target_width: Optional[int] = None):
                         "confidence": table["confidence"]
                     })
                 results.append({
                     "page_number": page.number + 1,
                     "figures": image_entries,
                     "tables": table_entries,
-                    "image_dimensions": {"width": processed_img.width, "height": processed_img.height}
                 })
                 logger.info(f"Page {page_num + 1} processed: {len(image_entries)} figures, {len(table_entries)} tables")
@@ -295,6 +310,74 @@ def process_document(file_path: str, target_width: Optional[int] = None):
         logger.error(f"Error in process_document: {e}", exc_info=True)
         raise
 # --- API Endpoints ---
 @app.get("/", response_class=HTMLResponse)
 async def read_root():
@@ -390,9 +473,21 @@ async def read_root():
                 <!-- Results -->
                 <div id="results" class="hidden mt-8">
                     <h3 class="text-xl font-bold text-gray-300 mb-4">Analysis Results</h3>
                     <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
-                        <pre id="resultsContent" class="text-sm text-gray-300 overflow-x-auto"></pre>
                     </div>
                     <button onclick="downloadJSON()" class="mt-4 px-6 py-3 rounded-lg bg-emerald-500/10 text-emerald-400 font-semibold hover:bg-emerald-500/20 transition-all ring-1 ring-emerald-500/30">
                         Download JSON
                     </button>
@@ -493,7 +588,20 @@ async def read_root():
                 }
                 analysisResults = responseData;
-                document.getElementById('resultsContent').textContent = JSON.stringify(analysisResults, null, 2);
                 resultsDiv.classList.remove('hidden');
             } catch (err) {
@@ -506,10 +614,75 @@ async def read_root():
             }
         }
         function downloadJSON() {
             if (!analysisResults) return;
-            const blob = new Blob([JSON.stringify(analysisResults, null, 2)], { type: 'application/json' });
             const url = URL.createObjectURL(blob);
             const a = document.createElement('a');
             a.href = url;

 import sys
 import tempfile
 import re
+import base64
 from pathlib import Path
 from typing import Optional
 import requests
 import torch
 import torchvision
+from PIL import Image, ImageDraw, ImageFont
 from fastapi import FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.responses import HTMLResponse, JSONResponse
 from paddleocr import PaddleOCR
         logger.error(f"Failed to load DocLayout-YOLO model: {e}", exc_info=True)
         raise RuntimeError("Could not load layout model") from e
 # --- Pydantic Request Models ---
 class URLRequest(BaseModel):
     url: HttpUrl
                         "confidence": table["confidence"]
                     })
+                # Create annotated image
+                annotated_img = create_annotated_image(
+                    processed_img,
+                    image_entries,
+                    table_entries
+                )
+                # Convert annotated image to base64
+                buffered = io.BytesIO()
+                annotated_img.save(buffered, format="PNG")
+                img_str = base64.b64encode(buffered.getvalue()).decode()
                 results.append({
                     "page_number": page.number + 1,
                     "figures": image_entries,
                     "tables": table_entries,
+                    "image_dimensions": {"width": processed_img.width, "height": processed_img.height},
+                    "annotated_image": f"data:image/png;base64,{img_str}"
                 })
                 logger.info(f"Page {page_num + 1} processed: {len(image_entries)} figures, {len(table_entries)} tables")
         logger.error(f"Error in process_document: {e}", exc_info=True)
         raise
+def create_annotated_image(img: Image.Image, figures: list, tables: list) -> Image.Image:
+    """Create an annotated image with bounding boxes."""
+    # Create a copy to draw on
+    annotated = img.copy()
+    draw = ImageDraw.Draw(annotated)
+    # Try to load a font
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20)
+        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
+    except:
+        font = ImageFont.load_default()
+        small_font = ImageFont.load_default()
+    # Draw tables (green boxes)
+    for table in tables:
+        bbox = table["bbox"]
+        caption_bbox = table.get("caption_bbox")
+        table_num = table.get("table_number", "?")
+        conf = table.get("confidence", 0)
+        # Draw table content box
+        draw.rectangle(bbox, outline="green", width=3)
+        draw.text(
+            (bbox[0] + 5, bbox[1] + 5),
+            f"Table {table_num} ({conf:.2f})",
+            fill="green",
+            font=font
+        )
+        # Draw caption box
+        if caption_bbox:
+            draw.rectangle(caption_bbox, outline="blue", width=2)
+            draw.text(
+                (caption_bbox[0], caption_bbox[1] - 20),
+                "Caption",
+                fill="blue",
+                font=small_font
+            )
+    # Draw figures (red boxes)
+    for figure in figures:
+        bbox = figure["figure_bbox"]
+        caption_bbox = figure.get("caption_bbox")
+        fig_num = figure.get("figure_number", "?")
+        conf = figure.get("confidence", 0)
+        # Draw figure content box
+        draw.rectangle(bbox, outline="red", width=3)
+        draw.text(
+            (bbox[0] + 5, bbox[1] + 5),
+            f"Figure {fig_num} ({conf:.2f})",
+            fill="red",
+            font=font
+        )
+        # Draw caption box
+        if caption_bbox:
+            draw.rectangle(caption_bbox, outline="blue", width=2)
+            draw.text(
+                (caption_bbox[0], caption_bbox[1] - 20),
+                "Caption",
+                fill="blue",
+                font=small_font
+            )
+    return annotated
 # --- API Endpoints ---
 @app.get("/", response_class=HTMLResponse)
 async def read_root():
                 <!-- Results -->
                 <div id="results" class="hidden mt-8">
                     <h3 class="text-xl font-bold text-gray-300 mb-4">Analysis Results</h3>
+                    <!-- Annotated Images -->
+                    <div id="annotatedImages" class="mb-6 space-y-6"></div>
+                    <!-- JSON Results -->
                     <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
+                        <div class="flex justify-between items-center mb-4">
+                            <h4 class="text-lg font-semibold text-gray-300">JSON Output</h4>
+                            <button onclick="toggleJSON()" class="px-4 py-2 rounded-lg bg-gray-500/10 text-gray-400 text-sm hover:bg-gray-500/20 transition-all">
+                                <span id="toggleText">Show JSON</span>
+                            </button>
+                        </div>
+                        <pre id="resultsContent" class="hidden text-sm text-gray-300 overflow-x-auto max-h-96"></pre>
                     </div>
                     <button onclick="downloadJSON()" class="mt-4 px-6 py-3 rounded-lg bg-emerald-500/10 text-emerald-400 font-semibold hover:bg-emerald-500/20 transition-all ring-1 ring-emerald-500/30">
                         Download JSON
                     </button>
                 }
                 analysisResults = responseData;
+                // Display annotated images
+                displayAnnotatedImages(responseData.results);
+                // Prepare JSON without base64 images for display
+                const jsonForDisplay = {
+                    ...responseData,
+                    results: responseData.results.map(r => {
+                        const {annotated_image, ...rest} = r;
+                        return rest;
+                    })
+                };
+                document.getElementById('resultsContent').textContent = JSON.stringify(jsonForDisplay, null, 2);
                 resultsDiv.classList.remove('hidden');
             } catch (err) {
             }
         }
+        function displayAnnotatedImages(results) {
+            const container = document.getElementById('annotatedImages');
+            container.innerHTML = '';
+            results.forEach((page, idx) => {
+                if (page.annotated_image) {
+                    const pageDiv = document.createElement('div');
+                    pageDiv.className = 'rounded-2xl bg-black/30 p-6 ring-1 ring-white/10 backdrop-blur-sm';
+                    const title = document.createElement('h4');
+                    title.className = 'text-lg font-semibold text-gray-300 mb-4';
+                    title.textContent = `Page ${page.page_number}`;
+                    const stats = document.createElement('div');
+                    stats.className = 'text-sm text-gray-400 mb-4 flex gap-6';
+                    stats.innerHTML = `
+                        <span class="flex items-center gap-2">
+                            <span class="inline-block w-3 h-3 bg-red-500 rounded"></span>
+                            ${page.figures.length} Figure${page.figures.length !== 1 ? 's' : ''}
+                        </span>
+                        <span class="flex items-center gap-2">
+                            <span class="inline-block w-3 h-3 bg-green-500 rounded"></span>
+                            ${page.tables.length} Table${page.tables.length !== 1 ? 's' : ''}
+                        </span>
+                        <span class="flex items-center gap-2">
+                            <span class="inline-block w-3 h-3 bg-blue-500 rounded"></span>
+                            Captions
+                        </span>
+                    `;
+                    const img = document.createElement('img');
+                    img.src = page.annotated_image;
+                    img.className = 'w-full rounded-lg border border-white/10';
+                    img.alt = `Annotated page ${page.page_number}`;
+                    pageDiv.appendChild(title);
+                    pageDiv.appendChild(stats);
+                    pageDiv.appendChild(img);
+                    container.appendChild(pageDiv);
+                }
+            });
+        }
+        function toggleJSON() {
+            const jsonContent = document.getElementById('resultsContent');
+            const toggleText = document.getElementById('toggleText');
+            if (jsonContent.classList.contains('hidden')) {
+                jsonContent.classList.remove('hidden');
+                toggleText.textContent = 'Hide JSON';
+            } else {
+                jsonContent.classList.add('hidden');
+                toggleText.textContent = 'Show JSON';
+            }
+        }
         function downloadJSON() {
             if (!analysisResults) return;
+            // Remove base64 images from download to reduce file size
+            const downloadData = {
+                ...analysisResults,
+                results: analysisResults.results.map(r => {
+                    const {annotated_image, ...rest} = r;
+                    return rest;
+                })
+            };
+            const blob = new Blob([JSON.stringify(downloadData, null, 2)], { type: 'application/json' });
             const url = URL.createObjectURL(blob);
             const a = document.createElement('a');
             a.href = url;