""" Monitoring utilities specifically for Render production environment. """ import json import logging import os import time from datetime import datetime, timezone from typing import Any, Dict, List, Optional, TypedDict from .memory_utils import ( clean_memory, force_garbage_collection, get_memory_usage, log_memory_checkpoint, memory_summary, ) class MemorySample(TypedDict): """Type definition for memory sample records.""" timestamp: float memory_mb: float context: str class MemoryStatus(TypedDict): """Type definition for memory status results.""" timestamp: str memory_mb: float peak_memory_mb: float context: str status: str action_taken: Optional[str] memory_limit_mb: float logger = logging.getLogger(__name__) # Configure these thresholds based on your Render free tier limits RENDER_MEMORY_LIMIT_MB = 512 RENDER_WARNING_THRESHOLD_MB = 400 # 78% of limit RENDER_CRITICAL_THRESHOLD_MB = 450 # 88% of limit RENDER_EMERGENCY_THRESHOLD_MB = 480 # 94% of limit # Memory metrics tracking _memory_samples: List[MemorySample] = [] _memory_peak: float = 0.0 _memory_history_limit: int = 1000 # Keep last N samples to avoid unbounded growth _memory_last_dump_time: float = 0.0 def init_render_monitoring(log_interval: int = 10) -> None: """ Initialize Render-specific monitoring with shorter intervals Args: log_interval: Seconds between memory log entries """ # Set environment variables for memory monitoring os.environ["MEMORY_DEBUG"] = "1" os.environ["MEMORY_LOG_INTERVAL"] = str(log_interval) logger.info( "Initialized Render monitoring with %ds intervals (memory limit: %dMB)", log_interval, RENDER_MEMORY_LIMIT_MB, ) # Perform initial memory check memory_mb = get_memory_usage() logger.info("Initial memory: %.1fMB", memory_mb) # Record startup metrics _record_memory_sample("startup", memory_mb) def check_render_memory_thresholds(context: str = "periodic") -> MemoryStatus: """ Check current memory against Render thresholds and take action if needed. Args: context: Label for the check (e.g., "request", "background") Returns: Dictionary with memory status details """ memory_mb = get_memory_usage() _record_memory_sample(context, memory_mb) global _memory_peak if memory_mb > _memory_peak: _memory_peak = memory_mb log_memory_checkpoint(f"new_peak_memory_{context}", force=True) status = "normal" action_taken: Optional[str] = None # Progressive response based on severity if memory_mb > RENDER_EMERGENCY_THRESHOLD_MB: logger.critical( "EMERGENCY: Memory usage at %.1fMB - critically close to %.1fMB limit", memory_mb, RENDER_MEMORY_LIMIT_MB, ) status = "emergency" action_taken = "emergency_cleanup" # Take emergency action clean_memory("emergency") force_garbage_collection() elif memory_mb > RENDER_CRITICAL_THRESHOLD_MB: logger.warning( "CRITICAL: Memory usage at %.1fMB - approaching %.1fMB limit", memory_mb, RENDER_MEMORY_LIMIT_MB, ) status = "critical" action_taken = "aggressive_cleanup" clean_memory("critical") elif memory_mb > RENDER_WARNING_THRESHOLD_MB: logger.warning( "WARNING: Memory usage at %.1fMB - monitor closely (limit: %.1fMB)", memory_mb, RENDER_MEMORY_LIMIT_MB, ) status = "warning" action_taken = "light_cleanup" clean_memory("warning") result: MemoryStatus = { "timestamp": datetime.now(timezone.utc).isoformat(), # Timestamp of the check "memory_mb": memory_mb, # Current memory usage "peak_memory_mb": _memory_peak, # Peak memory usage recorded "context": context, # Context of the memory check "status": status, # Current status based on memory usage "action_taken": action_taken, # Action taken if any "memory_limit_mb": RENDER_MEMORY_LIMIT_MB, # Memory limit defined } # Periodically dump memory metrics to a file in /tmp _maybe_dump_memory_metrics() return result def _record_memory_sample(context: str, memory_mb: float) -> None: """Record a memory sample with timestamp for trend analysis.""" global _memory_samples sample: MemorySample = { "timestamp": time.time(), "memory_mb": memory_mb, "context": context, } _memory_samples.append(sample) # Prevent unbounded growth by limiting history if len(_memory_samples) > _memory_history_limit: _memory_samples = _memory_samples[-_memory_history_limit:] def _maybe_dump_memory_metrics() -> None: """Periodically save memory metrics to file for later analysis.""" global _memory_last_dump_time # Only dump once every 5 minutes now = time.time() if now - _memory_last_dump_time < 300: # 5 minutes return try: _memory_last_dump_time = now # Create directory if it doesn't exist dump_dir = "/tmp/render_metrics" os.makedirs(dump_dir, exist_ok=True) # Generate filename with timestamp timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") filename = f"{dump_dir}/memory_metrics_{timestamp}.json" # Dump the samples to a file with open(filename, "w") as f: json.dump( { "samples": _memory_samples, "peak_memory_mb": _memory_peak, "memory_limit_mb": RENDER_MEMORY_LIMIT_MB, "summary": memory_summary(), }, f, indent=2, ) logger.info("Memory metrics dumped to %s", filename) except Exception as e: logger.error("Failed to dump memory metrics: %s", e) def get_memory_trends() -> Dict[str, Any]: """ Get memory usage trends from collected samples. Returns: Dictionary with memory trends and statistics """ if not _memory_samples: return {"status": "no_data"} # Basic statistics current = _memory_samples[-1]["memory_mb"] if _memory_samples else 0.0 # Calculate 5-minute and 1-hour trends if we have enough data trends: Dict[str, Any] = { "current_mb": current, "peak_mb": _memory_peak, "samples_count": len(_memory_samples), } # Calculate trend over last 5 minutes recent_samples: List[MemorySample] = [ s for s in _memory_samples if time.time() - s["timestamp"] < 300 ] # Last 5 minutes if len(recent_samples) >= 2: start_mb: float = recent_samples[0]["memory_mb"] end_mb: float = recent_samples[-1]["memory_mb"] trends["trend_5min_mb"] = end_mb - start_mb # Calculate hourly trend if we have enough data hour_samples: List[MemorySample] = [s for s in _memory_samples if time.time() - s["timestamp"] < 3600] # Last hour if len(hour_samples) >= 2: start_mb: float = hour_samples[0]["memory_mb"] end_mb: float = hour_samples[-1]["memory_mb"] trends["trend_1hour_mb"] = end_mb - start_mb return trends def add_memory_middleware(app) -> None: """ Add middleware to Flask app for request-level memory monitoring. Args: app: Flask application instance """ try: @app.before_request def check_memory_before_request(): """Check memory before processing each request.""" try: from flask import request try: memory_status = check_render_memory_thresholds(f"request_{request.endpoint}") # If we're in emergency state, reject new requests if memory_status["status"] == "emergency": logger.critical( "Rejecting request due to critical memory usage: %s %.1fMB", request.path, memory_status["memory_mb"], ) return { "status": "error", "message": ("Service temporarily unavailable due to " "resource constraints"), "retry_after": 30, # Suggest retry after 30 seconds }, 503 except Exception as e: # Don't let memory monitoring failures affect requests logger.debug(f"Memory status check failed: {e}") except Exception as e: # Catch all other errors to prevent middleware from breaking the app logger.debug(f"Memory middleware error: {e}") @app.after_request def log_memory_after_request(response): """Log memory usage after request processing.""" try: memory_mb = get_memory_usage() logger.debug("Memory after request: %.1fMB", memory_mb) except Exception as e: logger.debug(f"After request memory logging failed: {e}") return response except Exception as e: # If we can't even add the middleware, log it but don't crash logger.warning(f"Failed to add memory middleware: {e}") # Define empty placeholder to avoid errors @app.before_request def memory_middleware_failed(): pass