msse-ai-engineering / gunicorn.conf.py
sethmcknight
Refactor test cases for improved readability and consistency
159faf0
"""
Gunicorn configuration for low-memory environments like Render's free tier.
"""
import os
# Bind to the port Render provides
bind = f"0.0.0.0:{os.environ.get('PORT', 10000)}"
# Use a single worker process. This is crucial for staying within the 512MB
# memory limit, as each worker loads a copy of the application.
workers = 1
# Use threads for concurrency within the single worker. This is more
# memory-efficient than multiple processes.
threads = 2
# Preload the application code before the worker processes are forked.
# This allows for memory savings through copy-on-write.
preload_app = False
# Set the worker class to 'gthread' to enable threads.
worker_class = "gthread"
# Set a reasonable timeout for workers.
timeout = 60
# Keep-alive timeout - important for Render health checks
keepalive = 30
# Memory optimization: Restart worker periodically to mitigate leaks.
# Increase threshold to reduce churn now that embedding load is stable.
max_requests = 200
max_requests_jitter = 20
# Worker lifecycle settings for memory management
worker_tmp_dir = "/dev/shm" # Use shared memory for temporary files if available
# Additional memory optimizations
worker_connections = 10 # Limit concurrent connections per worker
backlog = 64 # Queue size for pending connections
# Graceful shutdown
graceful_timeout = 10 # Faster shutdown for memory recovery
# Memory management hooks
def when_ready(server):
"""Called just after the server is started."""
import gc
server.log.info("Server is ready. Forcing garbage collection")
gc.collect()
def worker_init(worker):
"""Called just after a worker has been forked."""
import gc
worker.log.info(f"Worker spawned (pid: {worker.pid})")
gc.collect()
def worker_exit(server, worker):
"""Called just after a worker has been exited."""
import gc
server.log.info(f"Worker {worker.pid} exited. Cleaning memory")
gc.collect()