Spaces:
Sleeping
Sleeping
| """ | |
| Monitoring utilities specifically for Render production environment. | |
| """ | |
| import json | |
| import logging | |
| import os | |
| import time | |
| from datetime import datetime, timezone | |
| from typing import Any, Dict, List, Optional, TypedDict | |
| from .memory_utils import ( | |
| clean_memory, | |
| force_garbage_collection, | |
| get_memory_usage, | |
| log_memory_checkpoint, | |
| memory_summary, | |
| ) | |
| class MemorySample(TypedDict): | |
| """Type definition for memory sample records.""" | |
| timestamp: float | |
| memory_mb: float | |
| context: str | |
| class MemoryStatus(TypedDict): | |
| """Type definition for memory status results.""" | |
| timestamp: str | |
| memory_mb: float | |
| peak_memory_mb: float | |
| context: str | |
| status: str | |
| action_taken: Optional[str] | |
| memory_limit_mb: float | |
| logger = logging.getLogger(__name__) | |
| # Configure these thresholds based on your Render free tier limits | |
| RENDER_MEMORY_LIMIT_MB = 512 | |
| RENDER_WARNING_THRESHOLD_MB = 400 # 78% of limit | |
| RENDER_CRITICAL_THRESHOLD_MB = 450 # 88% of limit | |
| RENDER_EMERGENCY_THRESHOLD_MB = 480 # 94% of limit | |
| # Memory metrics tracking | |
| _memory_samples: List[MemorySample] = [] | |
| _memory_peak: float = 0.0 | |
| _memory_history_limit: int = 1000 # Keep last N samples to avoid unbounded growth | |
| _memory_last_dump_time: float = 0.0 | |
| def init_render_monitoring(log_interval: int = 10) -> None: | |
| """ | |
| Initialize Render-specific monitoring with shorter intervals | |
| Args: | |
| log_interval: Seconds between memory log entries | |
| """ | |
| # Set environment variables for memory monitoring | |
| os.environ["MEMORY_DEBUG"] = "1" | |
| os.environ["MEMORY_LOG_INTERVAL"] = str(log_interval) | |
| logger.info( | |
| "Initialized Render monitoring with %ds intervals (memory limit: %dMB)", | |
| log_interval, | |
| RENDER_MEMORY_LIMIT_MB, | |
| ) | |
| # Perform initial memory check | |
| memory_mb = get_memory_usage() | |
| logger.info("Initial memory: %.1fMB", memory_mb) | |
| # Record startup metrics | |
| _record_memory_sample("startup", memory_mb) | |
| def check_render_memory_thresholds(context: str = "periodic") -> MemoryStatus: | |
| """ | |
| Check current memory against Render thresholds and take action if needed. | |
| Args: | |
| context: Label for the check (e.g., "request", "background") | |
| Returns: | |
| Dictionary with memory status details | |
| """ | |
| memory_mb = get_memory_usage() | |
| _record_memory_sample(context, memory_mb) | |
| global _memory_peak | |
| if memory_mb > _memory_peak: | |
| _memory_peak = memory_mb | |
| log_memory_checkpoint(f"new_peak_memory_{context}", force=True) | |
| status = "normal" | |
| action_taken: Optional[str] = None | |
| # Progressive response based on severity | |
| if memory_mb > RENDER_EMERGENCY_THRESHOLD_MB: | |
| logger.critical( | |
| "EMERGENCY: Memory usage at %.1fMB - critically close to %.1fMB limit", | |
| memory_mb, | |
| RENDER_MEMORY_LIMIT_MB, | |
| ) | |
| status = "emergency" | |
| action_taken = "emergency_cleanup" | |
| # Take emergency action | |
| clean_memory("emergency") | |
| force_garbage_collection() | |
| elif memory_mb > RENDER_CRITICAL_THRESHOLD_MB: | |
| logger.warning( | |
| "CRITICAL: Memory usage at %.1fMB - approaching %.1fMB limit", | |
| memory_mb, | |
| RENDER_MEMORY_LIMIT_MB, | |
| ) | |
| status = "critical" | |
| action_taken = "aggressive_cleanup" | |
| clean_memory("critical") | |
| elif memory_mb > RENDER_WARNING_THRESHOLD_MB: | |
| logger.warning( | |
| "WARNING: Memory usage at %.1fMB - monitor closely (limit: %.1fMB)", | |
| memory_mb, | |
| RENDER_MEMORY_LIMIT_MB, | |
| ) | |
| status = "warning" | |
| action_taken = "light_cleanup" | |
| clean_memory("warning") | |
| result: MemoryStatus = { | |
| "timestamp": datetime.now(timezone.utc).isoformat(), # Timestamp of the check | |
| "memory_mb": memory_mb, # Current memory usage | |
| "peak_memory_mb": _memory_peak, # Peak memory usage recorded | |
| "context": context, # Context of the memory check | |
| "status": status, # Current status based on memory usage | |
| "action_taken": action_taken, # Action taken if any | |
| "memory_limit_mb": RENDER_MEMORY_LIMIT_MB, # Memory limit defined | |
| } | |
| # Periodically dump memory metrics to a file in /tmp | |
| _maybe_dump_memory_metrics() | |
| return result | |
| def _record_memory_sample(context: str, memory_mb: float) -> None: | |
| """Record a memory sample with timestamp for trend analysis.""" | |
| global _memory_samples | |
| sample: MemorySample = { | |
| "timestamp": time.time(), | |
| "memory_mb": memory_mb, | |
| "context": context, | |
| } | |
| _memory_samples.append(sample) | |
| # Prevent unbounded growth by limiting history | |
| if len(_memory_samples) > _memory_history_limit: | |
| _memory_samples = _memory_samples[-_memory_history_limit:] | |
| def _maybe_dump_memory_metrics() -> None: | |
| """Periodically save memory metrics to file for later analysis.""" | |
| global _memory_last_dump_time | |
| # Only dump once every 5 minutes | |
| now = time.time() | |
| if now - _memory_last_dump_time < 300: # 5 minutes | |
| return | |
| try: | |
| _memory_last_dump_time = now | |
| # Create directory if it doesn't exist | |
| dump_dir = "/tmp/render_metrics" | |
| os.makedirs(dump_dir, exist_ok=True) | |
| # Generate filename with timestamp | |
| timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") | |
| filename = f"{dump_dir}/memory_metrics_{timestamp}.json" | |
| # Dump the samples to a file | |
| with open(filename, "w") as f: | |
| json.dump( | |
| { | |
| "samples": _memory_samples, | |
| "peak_memory_mb": _memory_peak, | |
| "memory_limit_mb": RENDER_MEMORY_LIMIT_MB, | |
| "summary": memory_summary(), | |
| }, | |
| f, | |
| indent=2, | |
| ) | |
| logger.info("Memory metrics dumped to %s", filename) | |
| except Exception as e: | |
| logger.error("Failed to dump memory metrics: %s", e) | |
| def get_memory_trends() -> Dict[str, Any]: | |
| """ | |
| Get memory usage trends from collected samples. | |
| Returns: | |
| Dictionary with memory trends and statistics | |
| """ | |
| if not _memory_samples: | |
| return {"status": "no_data"} | |
| # Basic statistics | |
| current = _memory_samples[-1]["memory_mb"] if _memory_samples else 0.0 | |
| # Calculate 5-minute and 1-hour trends if we have enough data | |
| trends: Dict[str, Any] = { | |
| "current_mb": current, | |
| "peak_mb": _memory_peak, | |
| "samples_count": len(_memory_samples), | |
| } | |
| # Calculate trend over last 5 minutes | |
| recent_samples: List[MemorySample] = [ | |
| s for s in _memory_samples if time.time() - s["timestamp"] < 300 | |
| ] # Last 5 minutes | |
| if len(recent_samples) >= 2: | |
| start_mb: float = recent_samples[0]["memory_mb"] | |
| end_mb: float = recent_samples[-1]["memory_mb"] | |
| trends["trend_5min_mb"] = end_mb - start_mb | |
| # Calculate hourly trend if we have enough data | |
| hour_samples: List[MemorySample] = [s for s in _memory_samples if time.time() - s["timestamp"] < 3600] # Last hour | |
| if len(hour_samples) >= 2: | |
| start_mb: float = hour_samples[0]["memory_mb"] | |
| end_mb: float = hour_samples[-1]["memory_mb"] | |
| trends["trend_1hour_mb"] = end_mb - start_mb | |
| return trends | |
| def add_memory_middleware(app) -> None: | |
| """ | |
| Add middleware to Flask app for request-level memory monitoring. | |
| Args: | |
| app: Flask application instance | |
| """ | |
| try: | |
| def check_memory_before_request(): | |
| """Check memory before processing each request.""" | |
| try: | |
| from flask import request | |
| try: | |
| memory_status = check_render_memory_thresholds(f"request_{request.endpoint}") | |
| # If we're in emergency state, reject new requests | |
| if memory_status["status"] == "emergency": | |
| logger.critical( | |
| "Rejecting request due to critical memory usage: %s %.1fMB", | |
| request.path, | |
| memory_status["memory_mb"], | |
| ) | |
| return { | |
| "status": "error", | |
| "message": ("Service temporarily unavailable due to " "resource constraints"), | |
| "retry_after": 30, # Suggest retry after 30 seconds | |
| }, 503 | |
| except Exception as e: | |
| # Don't let memory monitoring failures affect requests | |
| logger.debug(f"Memory status check failed: {e}") | |
| except Exception as e: | |
| # Catch all other errors to prevent middleware from breaking the app | |
| logger.debug(f"Memory middleware error: {e}") | |
| def log_memory_after_request(response): | |
| """Log memory usage after request processing.""" | |
| try: | |
| memory_mb = get_memory_usage() | |
| logger.debug("Memory after request: %.1fMB", memory_mb) | |
| except Exception as e: | |
| logger.debug(f"After request memory logging failed: {e}") | |
| return response | |
| except Exception as e: | |
| # If we can't even add the middleware, log it but don't crash | |
| logger.warning(f"Failed to add memory middleware: {e}") | |
| # Define empty placeholder to avoid errors | |
| def memory_middleware_failed(): | |
| pass | |