""" Gunicorn configuration for low-memory environments like Render's free tier. """ import os # Bind to the port Render provides bind = f"0.0.0.0:{os.environ.get('PORT', 10000)}" # Use a single worker process. This is crucial for staying within the 512MB # memory limit, as each worker loads a copy of the application. workers = 1 # Use threads for concurrency within the single worker. This is more # memory-efficient than multiple processes. threads = 2 # Preload the application code before the worker processes are forked. # This allows for memory savings through copy-on-write. preload_app = False # Set the worker class to 'gthread' to enable threads. worker_class = "gthread" # Set a reasonable timeout for workers. timeout = 60 # Keep-alive timeout - important for Render health checks keepalive = 30 # Memory optimization: Restart worker periodically to mitigate leaks. # Increase threshold to reduce churn now that embedding load is stable. max_requests = 200 max_requests_jitter = 20 # Worker lifecycle settings for memory management worker_tmp_dir = "/dev/shm" # Use shared memory for temporary files if available # Additional memory optimizations worker_connections = 10 # Limit concurrent connections per worker backlog = 64 # Queue size for pending connections # Graceful shutdown graceful_timeout = 10 # Faster shutdown for memory recovery # Memory management hooks def when_ready(server): """Called just after the server is started.""" import gc server.log.info("Server is ready. Forcing garbage collection") gc.collect() def worker_init(worker): """Called just after a worker has been forked.""" import gc worker.log.info(f"Worker spawned (pid: {worker.pid})") gc.collect() def worker_exit(server, worker): """Called just after a worker has been exited.""" import gc server.log.info(f"Worker {worker.pid} exited. Cleaning memory") gc.collect()