File size: 1,935 Bytes
32e4125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f75da29
32e4125
 
 
 
159faf0
 
 
 
32e4125
 
 
 
 
 
 
 
 
f75da29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
Gunicorn configuration for low-memory environments like Render's free tier.
"""

import os

# Bind to the port Render provides
bind = f"0.0.0.0:{os.environ.get('PORT', 10000)}"

# Use a single worker process. This is crucial for staying within the 512MB
# memory limit, as each worker loads a copy of the application.
workers = 1

# Use threads for concurrency within the single worker. This is more
# memory-efficient than multiple processes.
threads = 2

# Preload the application code before the worker processes are forked.
# This allows for memory savings through copy-on-write.
preload_app = False

# Set the worker class to 'gthread' to enable threads.
worker_class = "gthread"

# Set a reasonable timeout for workers.
timeout = 60

# Keep-alive timeout - important for Render health checks
keepalive = 30

# Memory optimization: Restart worker periodically to mitigate leaks.
# Increase threshold to reduce churn now that embedding load is stable.
max_requests = 200
max_requests_jitter = 20

# Worker lifecycle settings for memory management
worker_tmp_dir = "/dev/shm"  # Use shared memory for temporary files if available

# Additional memory optimizations
worker_connections = 10  # Limit concurrent connections per worker
backlog = 64  # Queue size for pending connections

# Graceful shutdown
graceful_timeout = 10  # Faster shutdown for memory recovery


# Memory management hooks
def when_ready(server):
    """Called just after the server is started."""
    import gc

    server.log.info("Server is ready. Forcing garbage collection")
    gc.collect()


def worker_init(worker):
    """Called just after a worker has been forked."""
    import gc

    worker.log.info(f"Worker spawned (pid: {worker.pid})")
    gc.collect()


def worker_exit(server, worker):
    """Called just after a worker has been exited."""
    import gc

    server.log.info(f"Worker {worker.pid} exited. Cleaning memory")
    gc.collect()