Spaces:
Sleeping
Sleeping
| """ | |
| Document Management API Routes | |
| Flask Blueprint for document management endpoints that integrates | |
| with the app factory pattern and lazy loading architecture. | |
| """ | |
| import logging | |
| from flask import Blueprint, jsonify, request | |
| # Create blueprint | |
| document_bp = Blueprint("document_management", __name__) | |
| def get_document_services(): | |
| """ | |
| Get document management services from Flask app config. | |
| This follows the same lazy loading pattern as other services | |
| in the app factory. | |
| """ | |
| from flask import current_app | |
| # Check if services are already initialized | |
| if current_app.config.get("DOCUMENT_SERVICES") is None: | |
| logging.info("Initializing document management services for the first time...") | |
| from .document_service import DocumentService | |
| from .processing_service import ProcessingService | |
| from .upload_service import UploadService | |
| # Initialize services | |
| document_service = DocumentService() | |
| processing_service = ProcessingService(max_workers=1) | |
| upload_service = UploadService(document_service, processing_service) | |
| # Start processing service | |
| processing_service.start() | |
| # Cache services in app config | |
| current_app.config["DOCUMENT_SERVICES"] = { | |
| "document": document_service, | |
| "processing": processing_service, | |
| "upload": upload_service, | |
| } | |
| logging.info("Document management services initialized") | |
| return current_app.config["DOCUMENT_SERVICES"] | |
| def upload_documents(): | |
| """Upload one or more documents for processing""" | |
| try: | |
| services = get_document_services() | |
| upload_service = services["upload"] | |
| # Get metadata from form or JSON | |
| metadata = {} | |
| if request.is_json: | |
| metadata = request.get_json() or {} | |
| else: | |
| # Extract metadata from form fields | |
| for key in ["category", "department", "author", "description"]: | |
| if key in request.form: | |
| metadata[key] = request.form[key] | |
| # Processing options | |
| if "chunk_size" in request.form: | |
| metadata["chunk_size"] = int(request.form["chunk_size"]) | |
| if "overlap" in request.form: | |
| metadata["overlap"] = int(request.form["overlap"]) | |
| if "auto_process" in request.form: | |
| metadata["auto_process"] = request.form["auto_process"].lower() == "true" | |
| # Handle file upload | |
| result = upload_service.handle_upload_request(request.files, metadata) | |
| if result["status"] == "error": | |
| return jsonify(result), 400 | |
| elif result["status"] == "partial": | |
| return jsonify(result), 207 # Multi-status | |
| else: | |
| return jsonify(result), 200 | |
| except Exception as e: | |
| logging.error(f"Upload endpoint error: {e}", exc_info=True) | |
| return jsonify({"status": "error", "message": f"Upload failed: {str(e)}"}), 500 | |
| def get_job_status(job_id: str): | |
| """Get status of a processing job""" | |
| try: | |
| services = get_document_services() | |
| processing_service = services["processing"] | |
| job_status = processing_service.get_job_status(job_id) | |
| if job_status is None: | |
| return ( | |
| jsonify({"status": "error", "message": f"Job {job_id} not found"}), | |
| 404, | |
| ) | |
| return jsonify({"status": "success", "job": job_status}), 200 | |
| except Exception as e: | |
| logging.error(f"Job status endpoint error: {e}", exc_info=True) | |
| return ( | |
| jsonify({"status": "error", "message": f"Failed to get job status: {str(e)}"}), | |
| 500, | |
| ) | |
| def get_all_jobs(): | |
| """Get all processing jobs with optional status filter""" | |
| try: | |
| services = get_document_services() | |
| processing_service = services["processing"] | |
| status_filter = request.args.get("status") | |
| jobs = processing_service.get_all_jobs(status_filter) | |
| return jsonify({"status": "success", "jobs": jobs, "count": len(jobs)}), 200 | |
| except Exception as e: | |
| logging.error(f"Jobs list endpoint error: {e}", exc_info=True) | |
| return ( | |
| jsonify({"status": "error", "message": f"Failed to get jobs: {str(e)}"}), | |
| 500, | |
| ) | |
| def get_queue_status(): | |
| """Get processing queue status""" | |
| try: | |
| services = get_document_services() | |
| processing_service = services["processing"] | |
| queue_status = processing_service.get_queue_status() | |
| return jsonify({"status": "success", "queue": queue_status}), 200 | |
| except Exception as e: | |
| logging.error(f"Queue status endpoint error: {e}", exc_info=True) | |
| return ( | |
| jsonify({"status": "error", "message": f"Failed to get queue status: {str(e)}"}), | |
| 500, | |
| ) | |
| def get_document_stats(): | |
| """Get document management statistics""" | |
| try: | |
| services = get_document_services() | |
| upload_service = services["upload"] | |
| stats = upload_service.get_upload_summary() | |
| return jsonify({"status": "success", "stats": stats}), 200 | |
| except Exception as e: | |
| logging.error(f"Stats endpoint error: {e}", exc_info=True) | |
| return ( | |
| jsonify({"status": "error", "message": f"Failed to get stats: {str(e)}"}), | |
| 500, | |
| ) | |
| def validate_files(): | |
| """Validate files before upload""" | |
| try: | |
| services = get_document_services() | |
| upload_service = services["upload"] | |
| if "files" not in request.files: | |
| return jsonify({"status": "error", "message": "No files provided"}), 400 | |
| files = request.files.getlist("files") | |
| valid_files, errors = upload_service.validate_batch_upload(files) | |
| return ( | |
| jsonify( | |
| { | |
| "status": "success", | |
| "validation": { | |
| "total_files": len(files), | |
| "valid_files": len(valid_files), | |
| "invalid_files": len(files) - len(valid_files), | |
| "errors": errors, | |
| "can_upload": len(errors) == 0, | |
| }, | |
| } | |
| ), | |
| 200, | |
| ) | |
| except Exception as e: | |
| logging.error(f"Validation endpoint error: {e}", exc_info=True) | |
| return ( | |
| jsonify({"status": "error", "message": f"Validation failed: {str(e)}"}), | |
| 500, | |
| ) | |
| def document_management_health(): | |
| """Health check for document management services""" | |
| try: | |
| services = get_document_services() | |
| health_status = { | |
| "status": "healthy", | |
| "services": { | |
| "document_service": "active", | |
| "processing_service": ("active" if services["processing"].running else "inactive"), | |
| "upload_service": "active", | |
| }, | |
| "queue_status": services["processing"].get_queue_status(), | |
| } | |
| # Check if any service is unhealthy | |
| if not services["processing"].running: | |
| health_status["status"] = "degraded" | |
| return jsonify(health_status), 200 | |
| except Exception as e: | |
| logging.error(f"Document management health check error: {e}", exc_info=True) | |
| return jsonify({"status": "unhealthy", "error": str(e)}), 500 | |
| # Error handlers for the blueprint | |
| def file_too_large(error): | |
| """Handle file too large errors""" | |
| return ( | |
| jsonify( | |
| { | |
| "status": "error", | |
| "message": "File too large. Maximum file size exceeded.", | |
| } | |
| ), | |
| 413, | |
| ) | |
| def bad_request(error): | |
| """Handle bad request errors""" | |
| return ( | |
| jsonify( | |
| { | |
| "status": "error", | |
| "message": "Bad request. Please check your request format.", | |
| } | |
| ), | |
| 400, | |
| ) | |