Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

File size: 1,935 Bytes

"""
Gunicorn configuration for low-memory environments like Render's free tier.
"""

import os

# Bind to the port Render provides
bind = f"0.0.0.0:{os.environ.get('PORT', 10000)}"

# Use a single worker process. This is crucial for staying within the 512MB
# memory limit, as each worker loads a copy of the application.
workers = 1

# Use threads for concurrency within the single worker. This is more
# memory-efficient than multiple processes.
threads = 2

# Preload the application code before the worker processes are forked.
# This allows for memory savings through copy-on-write.
preload_app = False

# Set the worker class to 'gthread' to enable threads.
worker_class = "gthread"

# Set a reasonable timeout for workers.
timeout = 60

# Keep-alive timeout - important for Render health checks
keepalive = 30

# Memory optimization: Restart worker periodically to mitigate leaks.
# Increase threshold to reduce churn now that embedding load is stable.
max_requests = 200
max_requests_jitter = 20

# Worker lifecycle settings for memory management
worker_tmp_dir = "/dev/shm"  # Use shared memory for temporary files if available

# Additional memory optimizations
worker_connections = 10  # Limit concurrent connections per worker
backlog = 64  # Queue size for pending connections

# Graceful shutdown
graceful_timeout = 10  # Faster shutdown for memory recovery


# Memory management hooks
def when_ready(server):
    """Called just after the server is started."""
    import gc

    server.log.info("Server is ready. Forcing garbage collection")
    gc.collect()


def worker_init(worker):
    """Called just after a worker has been forked."""
    import gc

    worker.log.info(f"Worker spawned (pid: {worker.pid})")
    gc.collect()


def worker_exit(server, worker):
    """Called just after a worker has been exited."""
    import gc

    server.log.info(f"Worker {worker.pid} exited. Cleaning memory")
    gc.collect()