bla commited on
Commit
d97b9d2
·
verified ·
1 Parent(s): 1184151

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +559 -0
app.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import json
4
+ import tempfile
5
+ import requests
6
+ from pathlib import Path
7
+ from typing import Optional, Union
8
+ import base64
9
+
10
+ import fitz # PyMuPDF
11
+ import torch
12
+ import torchvision
13
+ import numpy as np
14
+ from PIL import Image
15
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
16
+ from fastapi.responses import HTMLResponse, JSONResponse
17
+ from fastapi.staticfiles import StaticFiles
18
+ from pydantic import BaseModel, HttpUrl
19
+ from paddleocr import PaddleOCR
20
+ from doclayout_yolo import YOLOv10
21
+
22
+ # Initialize models
23
+ ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
24
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
+ layout_model = YOLOv10('/content/layout-model.pt')
26
+
27
+ # Label mapping
28
+ id_to_names = {
29
+ 0: 'title',
30
+ 1: 'plain text',
31
+ 2: 'abandon',
32
+ 3: 'figure',
33
+ 4: 'figure_caption',
34
+ 5: 'table',
35
+ 6: 'table_caption',
36
+ 7: 'table_footnote',
37
+ 8: 'isolate_formula',
38
+ 9: 'formula_caption'
39
+ }
40
+
41
+ app = FastAPI(title="Document Layout Analysis API", version="1.0.0")
42
+
43
+ # Request models
44
+ class URLRequest(BaseModel):
45
+ url: HttpUrl
46
+ resolution: Optional[int] = None
47
+
48
+ # Helper functions
49
+ def extract_number_from_caption(caption_text):
50
+ """Extract the number from a caption like 'Table 3' or 'Figure 2.1'"""
51
+ import re
52
+ if not caption_text:
53
+ return None
54
+ NUMBER_PATTERN = re.compile(r"(?:Table|Figure)\s*(\d+)", re.IGNORECASE)
55
+ match = NUMBER_PATTERN.search(caption_text)
56
+ return match.group(1) if match else None
57
+
58
+ def detect_layout_regions(page, target_width=None, conf_threshold=0.25, iou_threshold=0.45):
59
+ """Use DocLayout-YOLO to detect document elements."""
60
+ # Get pixmap with optional resolution
61
+ if target_width:
62
+ pix = page.get_pixmap(dpi=150)
63
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
64
+ # Resize to target width maintaining aspect ratio
65
+ aspect_ratio = img.height / img.width
66
+ target_height = int(target_width * aspect_ratio)
67
+ img = img.resize((target_width, target_height), Image.LANCZOS)
68
+ scale_x = target_width / pix.width
69
+ scale_y = target_height / pix.height
70
+ else:
71
+ pix = page.get_pixmap(dpi=150)
72
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
73
+ scale_x = scale_y = 1.0
74
+
75
+ # Run layout detection
76
+ det_res = layout_model.predict(
77
+ img,
78
+ imgsz=1280,
79
+ conf=conf_threshold,
80
+ device=device,
81
+ )[0]
82
+
83
+ boxes = det_res.__dict__['boxes'].xyxy
84
+ classes = det_res.__dict__['boxes'].cls
85
+ scores = det_res.__dict__['boxes'].conf
86
+
87
+ # Apply NMS
88
+ indices = torchvision.ops.nms(
89
+ boxes=torch.Tensor(boxes),
90
+ scores=torch.Tensor(scores),
91
+ iou_threshold=iou_threshold
92
+ )
93
+ boxes, scores, classes = boxes[indices], scores[indices], classes[indices]
94
+
95
+ if len(boxes.shape) == 1:
96
+ boxes = np.expand_dims(boxes, 0)
97
+ scores = np.expand_dims(scores, 0)
98
+ classes = np.expand_dims(classes, 0)
99
+
100
+ detected_regions = []
101
+
102
+ for box, score, cls in zip(boxes, scores, classes):
103
+ # Scale boxes back if resolution was changed
104
+ box = [float(coord) for coord in box]
105
+ label_name = id_to_names[int(cls)]
106
+
107
+ detected_regions.append({
108
+ "bbox": box,
109
+ "type": label_name,
110
+ "confidence": float(score)
111
+ })
112
+
113
+ return detected_regions, img
114
+
115
+ def extract_text_from_bbox(page, bbox, target_width=None, padding=5):
116
+ """Run OCR on a specific bounding box region to extract text."""
117
+ if target_width:
118
+ pix = page.get_pixmap(dpi=150)
119
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
120
+ aspect_ratio = img.height / img.width
121
+ target_height = int(target_width * aspect_ratio)
122
+ img = img.resize((target_width, target_height), Image.LANCZOS)
123
+ else:
124
+ pix = page.get_pixmap(dpi=150)
125
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
126
+
127
+ x0, y0, x1, y1 = [int(coord) for coord in bbox]
128
+
129
+ # Add padding and ensure within bounds
130
+ x0 = max(0, x0 - padding)
131
+ y0 = max(0, y0 - padding)
132
+ x1 = min(img.width, x1 + padding)
133
+ y1 = min(img.height, y1 + padding)
134
+
135
+ # Crop region
136
+ region = img.crop((x0, y0, x1, y1))
137
+
138
+ # Convert to bytes for OCR
139
+ img_byte_arr = io.BytesIO()
140
+ region.save(img_byte_arr, format='PNG')
141
+ img_bytes = img_byte_arr.getvalue()
142
+
143
+ # Run OCR
144
+ ocr_result = ocr.ocr(img_bytes, cls=True)
145
+
146
+ if not ocr_result or not ocr_result[0]:
147
+ return ""
148
+
149
+ # Concatenate all text
150
+ text_parts = []
151
+ for line in ocr_result[0]:
152
+ text = line[1][0]
153
+ text_parts.append(text)
154
+
155
+ return " ".join(text_parts)
156
+
157
+ def process_document(file_path, target_width=None):
158
+ """Process a document and extract layout information."""
159
+ doc = fitz.open(file_path)
160
+ results = []
161
+
162
+ for page in doc:
163
+ detected_regions, processed_img = detect_layout_regions(
164
+ page,
165
+ target_width=target_width,
166
+ conf_threshold=0.25,
167
+ iou_threshold=0.45
168
+ )
169
+
170
+ image_entries = []
171
+ table_entries = []
172
+
173
+ # Group regions by type
174
+ figures = []
175
+ figure_captions = []
176
+ tables = []
177
+ table_captions = []
178
+
179
+ for region in detected_regions:
180
+ region_type = region["type"].lower()
181
+
182
+ if region_type == 'figure':
183
+ figures.append(region)
184
+ elif region_type == 'figure_caption':
185
+ figure_captions.append(region)
186
+ elif region_type == 'table':
187
+ tables.append(region)
188
+ elif region_type == 'table_caption':
189
+ table_captions.append(region)
190
+
191
+ # Match figures with their captions
192
+ for idx, figure in enumerate(figures, start=1):
193
+ figure_bbox = figure["bbox"]
194
+
195
+ caption_text = None
196
+ caption_bbox = None
197
+ min_distance = float('inf')
198
+
199
+ for caption in figure_captions:
200
+ cap_bbox = caption["bbox"]
201
+ distance = cap_bbox[1] - figure_bbox[3]
202
+
203
+ if 0 <= distance < min_distance:
204
+ min_distance = distance
205
+ caption_bbox = cap_bbox
206
+ caption_text = extract_text_from_bbox(page, cap_bbox, target_width)
207
+
208
+ figure_number = extract_number_from_caption(caption_text) or str(idx)
209
+
210
+ image_entries.append({
211
+ "figure_number": figure_number,
212
+ "figure_bbox": figure_bbox,
213
+ "caption": caption_text,
214
+ "caption_bbox": caption_bbox,
215
+ "confidence": figure["confidence"]
216
+ })
217
+
218
+ # Match tables with their captions
219
+ for idx, table in enumerate(tables, start=1):
220
+ table_bbox = table["bbox"]
221
+
222
+ caption_text = None
223
+ caption_bbox = None
224
+ min_distance = float('inf')
225
+
226
+ for caption in table_captions:
227
+ cap_bbox = caption["bbox"]
228
+ distance = table_bbox[1] - cap_bbox[3]
229
+
230
+ if 0 <= distance < min_distance:
231
+ min_distance = distance
232
+ caption_bbox = cap_bbox
233
+ caption_text = extract_text_from_bbox(page, cap_bbox, target_width)
234
+
235
+ table_number = extract_number_from_caption(caption_text) or str(idx)
236
+
237
+ table_entries.append({
238
+ "table_number": table_number,
239
+ "bbox": table_bbox,
240
+ "caption": caption_text,
241
+ "caption_bbox": caption_bbox,
242
+ "confidence": table["confidence"]
243
+ })
244
+
245
+ results.append({
246
+ "page_number": page.number + 1,
247
+ "figures": image_entries,
248
+ "tables": table_entries,
249
+ "image_dimensions": {
250
+ "width": processed_img.width,
251
+ "height": processed_img.height
252
+ }
253
+ })
254
+
255
+ doc.close()
256
+ return results
257
+
258
+ # API Endpoints
259
+ @app.get("/", response_class=HTMLResponse)
260
+ async def read_root():
261
+ """Serve the frontend UI"""
262
+ html_content = """
263
+ <!DOCTYPE html>
264
+ <html lang="en">
265
+ <head>
266
+ <meta charset="UTF-8">
267
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
268
+ <title>Document Layout Analysis API</title>
269
+ <script src="https://cdn.tailwindcss.com"></script>
270
+ <style>
271
+ .card-grainy { filter: url(#grainy); }
272
+ </style>
273
+ </head>
274
+ <body class="bg-[#09090B] min-h-screen">
275
+ <svg class="absolute h-0 w-0">
276
+ <filter id="grainy">
277
+ <feTurbulence type="fractalNoise" baseFrequency="0.7" numOctaves="2" result="noise" />
278
+ <feComponentTransfer>
279
+ <feFuncA type="table" tableValues="0 0.15 0" />
280
+ </feComponentTransfer>
281
+ </filter>
282
+ </svg>
283
+
284
+ <div class="container mx-auto px-4 py-12">
285
+ <!-- Header -->
286
+ <div class="mb-12 text-center">
287
+ <h3 class="text-sm font-semibold tracking-wider text-cyan-400/90 uppercase mb-4">AI-Powered Document Analysis</h3>
288
+ <h1 class="text-5xl font-bold mb-4">
289
+ <span class="bg-gradient-to-r from-gray-100 to-gray-300 bg-clip-text text-transparent">Document Layout</span>
290
+ <span class="text-gray-600"> Detection API</span>
291
+ </h1>
292
+ <p class="text-gray-400 text-lg">Extract tables, figures, and captions from PDFs and images with precision</p>
293
+ </div>
294
+
295
+ <!-- Main Card -->
296
+ <div class="relative isolate max-w-4xl mx-auto rounded-3xl border border-white/10 bg-gradient-to-br from-[#1A1D29] via-[#151821] to-[#0F1117] p-10">
297
+ <div class="card-grainy absolute top-0 left-0 h-full w-full"></div>
298
+ <div class="pointer-events-none absolute top-0 left-0 h-96 w-96 rounded-full bg-blue-500/5 blur-3xl"></div>
299
+
300
+ <div class="relative">
301
+ <!-- Upload Section -->
302
+ <div class="mb-8">
303
+ <label class="block text-sm font-semibold text-gray-300 mb-4">Upload Document</label>
304
+ <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
305
+ <input type="file" id="fileInput" accept=".pdf,.png,.jpg,.jpeg"
306
+ class="block w-full text-sm text-gray-400 file:mr-4 file:py-3 file:px-6 file:rounded-lg file:border-0 file:text-sm file:font-semibold file:bg-cyan-500/10 file:text-cyan-400 hover:file:bg-cyan-500/20 cursor-pointer">
307
+ </div>
308
+ </div>
309
+
310
+ <!-- OR Divider -->
311
+ <div class="flex items-center my-8">
312
+ <div class="flex-1 h-px bg-white/10"></div>
313
+ <span class="px-4 text-gray-500 text-sm font-semibold">OR</span>
314
+ <div class="flex-1 h-px bg-white/10"></div>
315
+ </div>
316
+
317
+ <!-- URL Section -->
318
+ <div class="mb-8">
319
+ <label class="block text-sm font-semibold text-gray-300 mb-4">Document URL</label>
320
+ <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
321
+ <input type="url" id="urlInput" placeholder="https://example.com/document.pdf"
322
+ class="w-full bg-white/5 border border-white/10 rounded-lg px-4 py-3 text-gray-300 placeholder-gray-600 focus:outline-none focus:ring-2 focus:ring-cyan-500/50">
323
+ </div>
324
+ </div>
325
+
326
+ <!-- Resolution Section -->
327
+ <div class="mb-8">
328
+ <label class="block text-sm font-semibold text-gray-300 mb-4">
329
+ Target Width (Optional)
330
+ <span class="text-gray-500 text-xs font-normal ml-2">Leave empty for original size</span>
331
+ </label>
332
+ <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
333
+ <input type="number" id="resolutionInput" placeholder="e.g., 1280" min="256" max="4096"
334
+ class="w-full bg-white/5 border border-white/10 rounded-lg px-4 py-3 text-gray-300 placeholder-gray-600 focus:outline-none focus:ring-2 focus:ring-cyan-500/50">
335
+ </div>
336
+ </div>
337
+
338
+ <!-- Analyze Button -->
339
+ <button id="analyzeBtn" onclick="analyzeDocument()"
340
+ class="w-full py-4 rounded-lg bg-gradient-to-r from-cyan-500 to-blue-500 text-white font-semibold text-lg hover:from-cyan-600 hover:to-blue-600 transition-all shadow-lg hover:shadow-cyan-500/25">
341
+ Analyze Document
342
+ </button>
343
+
344
+ <!-- Loading -->
345
+ <div id="loading" class="hidden mt-8 text-center">
346
+ <div class="inline-block animate-spin rounded-full h-12 w-12 border-4 border-cyan-500 border-t-transparent"></div>
347
+ <p class="text-gray-400 mt-4">Processing document...</p>
348
+ </div>
349
+
350
+ <!-- Results -->
351
+ <div id="results" class="hidden mt-8">
352
+ <h3 class="text-xl font-bold text-gray-300 mb-4">Analysis Results</h3>
353
+ <div class="rounded-2xl bg-black/30 p-8 ring-1 ring-white/10 backdrop-blur-sm">
354
+ <pre id="resultsContent" class="text-sm text-gray-300 overflow-x-auto"></pre>
355
+ </div>
356
+ <button onclick="downloadJSON()" class="mt-4 px-6 py-3 rounded-lg bg-emerald-500/10 text-emerald-400 font-semibold hover:bg-emerald-500/20 transition-all ring-1 ring-emerald-500/30">
357
+ Download JSON
358
+ </button>
359
+ </div>
360
+
361
+ <!-- Error -->
362
+ <div id="error" class="hidden mt-8 rounded-2xl bg-rose-500/10 p-6 ring-1 ring-rose-500/30">
363
+ <p class="text-rose-400 font-semibold" id="errorMessage"></p>
364
+ </div>
365
+ </div>
366
+ </div>
367
+
368
+ <!-- API Documentation -->
369
+ <div class="mt-16 max-w-4xl mx-auto">
370
+ <h2 class="text-3xl font-bold text-gray-300 mb-8">API Documentation</h2>
371
+ <div class="space-y-6">
372
+ <!-- Endpoint 1 -->
373
+ <div class="rounded-2xl border border-white/10 bg-gradient-to-br from-[#1A1D29] via-[#151821] to-[#0F1117] p-8">
374
+ <div class="flex items-center gap-3 mb-4">
375
+ <span class="inline-flex items-center rounded-lg bg-emerald-500/10 px-3 py-1.5 text-xs font-bold text-emerald-400 uppercase ring-1 ring-emerald-500/30">POST</span>
376
+ <code class="text-cyan-400 text-lg font-mono">/analyze</code>
377
+ </div>
378
+ <p class="text-gray-400 mb-4">Analyze a document by uploading a file</p>
379
+ <div class="bg-black/30 rounded-lg p-4 overflow-x-auto">
380
+ <pre class="text-sm text-gray-300"><code>curl -X POST "http://your-api-url/analyze" \\
381
+ -F "[email protected]" \\
382
+ -F "resolution=1280"</code></pre>
383
+ </div>
384
+ </div>
385
+
386
+ <!-- Endpoint 2 -->
387
+ <div class="rounded-2xl border border-white/10 bg-gradient-to-br from-[#1A1D29] via-[#151821] to-[#0F1117] p-8">
388
+ <div class="flex items-center gap-3 mb-4">
389
+ <span class="inline-flex items-center rounded-lg bg-emerald-500/10 px-3 py-1.5 text-xs font-bold text-emerald-400 uppercase ring-1 ring-emerald-500/30">POST</span>
390
+ <code class="text-cyan-400 text-lg font-mono">/analyze-url</code>
391
+ </div>
392
+ <p class="text-gray-400 mb-4">Analyze a document from a URL</p>
393
+ <div class="bg-black/30 rounded-lg p-4 overflow-x-auto">
394
+ <pre class="text-sm text-gray-300"><code>curl -X POST "http://your-api-url/analyze-url" \\
395
+ -H "Content-Type: application/json" \\
396
+ -d '{"url": "https://example.com/doc.pdf", "resolution": 1280}'</code></pre>
397
+ </div>
398
+ </div>
399
+ </div>
400
+ </div>
401
+ </div>
402
+
403
+ <script>
404
+ let analysisResults = null;
405
+
406
+ async function analyzeDocument() {
407
+ const fileInput = document.getElementById('fileInput');
408
+ const urlInput = document.getElementById('urlInput');
409
+ const resolutionInput = document.getElementById('resolutionInput');
410
+ const loading = document.getElementById('loading');
411
+ const results = document.getElementById('results');
412
+ const error = document.getElementById('error');
413
+
414
+ // Hide previous results
415
+ results.classList.add('hidden');
416
+ error.classList.add('hidden');
417
+
418
+ const resolution = resolutionInput.value ? parseInt(resolutionInput.value) : null;
419
+
420
+ try {
421
+ loading.classList.remove('hidden');
422
+
423
+ let response;
424
+
425
+ if (fileInput.files.length > 0) {
426
+ // File upload
427
+ const formData = new FormData();
428
+ formData.append('file', fileInput.files[0]);
429
+ if (resolution) formData.append('resolution', resolution);
430
+
431
+ response = await fetch('/analyze', {
432
+ method: 'POST',
433
+ body: formData
434
+ });
435
+ } else if (urlInput.value) {
436
+ // URL analysis
437
+ const body = { url: urlInput.value };
438
+ if (resolution) body.resolution = resolution;
439
+
440
+ response = await fetch('/analyze-url', {
441
+ method: 'POST',
442
+ headers: { 'Content-Type': 'application/json' },
443
+ body: JSON.stringify(body)
444
+ });
445
+ } else {
446
+ throw new Error('Please provide a file or URL');
447
+ }
448
+
449
+ if (!response.ok) {
450
+ const errorData = await response.json();
451
+ throw new Error(errorData.detail || 'Analysis failed');
452
+ }
453
+
454
+ analysisResults = await response.json();
455
+ document.getElementById('resultsContent').textContent = JSON.stringify(analysisResults, null, 2);
456
+ results.classList.remove('hidden');
457
+
458
+ } catch (err) {
459
+ document.getElementById('errorMessage').textContent = err.message;
460
+ error.classList.remove('hidden');
461
+ } finally {
462
+ loading.classList.add('hidden');
463
+ }
464
+ }
465
+
466
+ function downloadJSON() {
467
+ if (!analysisResults) return;
468
+
469
+ const blob = new Blob([JSON.stringify(analysisResults, null, 2)], { type: 'application/json' });
470
+ const url = URL.createObjectURL(blob);
471
+ const a = document.createElement('a');
472
+ a.href = url;
473
+ a.download = 'layout_analysis.json';
474
+ a.click();
475
+ URL.revokeObjectURL(url);
476
+ }
477
+ </script>
478
+ </body>
479
+ </html>
480
+ """
481
+ return HTMLResponse(content=html_content)
482
+
483
+ @app.post("/analyze")
484
+ async def analyze_file(
485
+ file: UploadFile = File(...),
486
+ resolution: Optional[int] = Form(None)
487
+ ):
488
+ """Analyze an uploaded document file"""
489
+ try:
490
+ # Save uploaded file temporarily
491
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as tmp:
492
+ content = await file.read()
493
+ tmp.write(content)
494
+ tmp_path = tmp.name
495
+
496
+ # Process document
497
+ results = process_document(tmp_path, target_width=resolution)
498
+
499
+ # Cleanup
500
+ os.unlink(tmp_path)
501
+
502
+ return JSONResponse(content={
503
+ "status": "success",
504
+ "filename": file.filename,
505
+ "pages": len(results),
506
+ "results": results
507
+ })
508
+
509
+ except Exception as e:
510
+ raise HTTPException(status_code=500, detail=str(e))
511
+
512
+ @app.post("/analyze-url")
513
+ async def analyze_url(request: URLRequest):
514
+ """Analyze a document from a URL"""
515
+ try:
516
+ # Download file from URL
517
+ response = requests.get(str(request.url), timeout=30)
518
+ response.raise_for_status()
519
+
520
+ # Determine file extension
521
+ content_type = response.headers.get('content-type', '')
522
+ if 'pdf' in content_type:
523
+ ext = '.pdf'
524
+ elif 'image' in content_type:
525
+ ext = '.png'
526
+ else:
527
+ ext = '.pdf' # default
528
+
529
+ # Save temporarily
530
+ with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
531
+ tmp.write(response.content)
532
+ tmp_path = tmp.name
533
+
534
+ # Process document
535
+ results = process_document(tmp_path, target_width=request.resolution)
536
+
537
+ # Cleanup
538
+ os.unlink(tmp_path)
539
+
540
+ return JSONResponse(content={
541
+ "status": "success",
542
+ "url": str(request.url),
543
+ "pages": len(results),
544
+ "results": results
545
+ })
546
+
547
+ except requests.RequestException as e:
548
+ raise HTTPException(status_code=400, detail=f"Failed to download file: {str(e)}")
549
+ except Exception as e:
550
+ raise HTTPException(status_code=500, detail=str(e))
551
+
552
+ @app.get("/health")
553
+ async def health_check():
554
+ """Health check endpoint"""
555
+ return {"status": "healthy", "device": device}
556
+
557
+ if __name__ == "__main__":
558
+ import uvicorn
559
+ uvicorn.run(app, host="0.0.0.0", port=7860)