Spaces:

sethmcknight
/

msse-ai-engineering

Sleeping

msse-ai-engineering / gunicorn.conf.py

sethmcknight

Refactor test cases for improved readability and consistency

159faf0 about 2 months ago

1.94 kB

	"""
	Gunicorn configuration for low-memory environments like Render's free tier.
	"""

	import os

	# Bind to the port Render provides
	bind = f"0.0.0.0:{os.environ.get('PORT', 10000)}"

	# Use a single worker process. This is crucial for staying within the 512MB
	# memory limit, as each worker loads a copy of the application.
	workers = 1

	# Use threads for concurrency within the single worker. This is more
	# memory-efficient than multiple processes.
	threads = 2

	# Preload the application code before the worker processes are forked.
	# This allows for memory savings through copy-on-write.
	preload_app = False

	# Set the worker class to 'gthread' to enable threads.
	worker_class = "gthread"

	# Set a reasonable timeout for workers.
	timeout = 60

	# Keep-alive timeout - important for Render health checks
	keepalive = 30

	# Memory optimization: Restart worker periodically to mitigate leaks.
	# Increase threshold to reduce churn now that embedding load is stable.
	max_requests = 200
	max_requests_jitter = 20

	# Worker lifecycle settings for memory management
	worker_tmp_dir = "/dev/shm" # Use shared memory for temporary files if available

	# Additional memory optimizations
	worker_connections = 10 # Limit concurrent connections per worker
	backlog = 64 # Queue size for pending connections

	# Graceful shutdown
	graceful_timeout = 10 # Faster shutdown for memory recovery


	# Memory management hooks
	def when_ready(server):
	"""Called just after the server is started."""
	import gc

	server.log.info("Server is ready. Forcing garbage collection")
	gc.collect()


	def worker_init(worker):
	"""Called just after a worker has been forked."""
	import gc

	worker.log.info(f"Worker spawned (pid: {worker.pid})")
	gc.collect()


	def worker_exit(server, worker):
	"""Called just after a worker has been exited."""
	import gc

	server.log.info(f"Worker {worker.pid} exited. Cleaning memory")
	gc.collect()