Spaces:
Sleeping
Sleeping
| set -euo pipefail | |
| # Default to 1 worker to prevent OOM on low-memory hosts | |
| WORKERS_VALUE="${WORKERS:-1}" | |
| TIMEOUT_VALUE="${TIMEOUT:-120}" | |
| PORT_VALUE="${PORT:-10000}" | |
| # Initialize the database with retries (handle DB cold start) | |
| echo "Initializing database..." | |
| MAX_RETRIES="${DB_INIT_RETRIES:-10}" | |
| SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}" | |
| i=1 | |
| until python scripts/init_pgvector.py; do | |
| if [ "$i" -ge "$MAX_RETRIES" ]; then | |
| echo "Database initialization failed after $i attempts; exiting." | |
| exit 1 | |
| fi | |
| echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..." | |
| sleep $((SLEEP_BASE * i)) | |
| i=$((i + 1)) | |
| done | |
| echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s" | |
| export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}" | |
| # Determine gunicorn config usage | |
| GUNICORN_CONFIG_ARG="" | |
| if [ -f gunicorn.conf.py ]; then | |
| GUNICORN_CONFIG_ARG="--config gunicorn.conf.py" | |
| else | |
| echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only." | |
| fi | |
| # Start gunicorn in background so we can trap signals and collect diagnostics | |
| gunicorn \ | |
| --bind 0.0.0.0:${PORT_VALUE} \ | |
| --workers "${WORKERS_VALUE}" \ | |
| --timeout "${TIMEOUT_VALUE}" \ | |
| --log-level info \ | |
| --access-logfile - \ | |
| --error-logfile - \ | |
| --capture-output \ | |
| ${GUNICORN_CONFIG_ARG} \ | |
| app:app & | |
| GUNICORN_PID=$! | |
| # Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait | |
| handle_term() { | |
| echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') =====" | |
| echo "--- Top processes by RSS ---" | |
| ps aux --sort=-rss | head -n 20 || true | |
| echo "--- /proc/meminfo (if available) ---" | |
| cat /proc/meminfo || true | |
| echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})" | |
| kill -TERM "${GUNICORN_PID}" 2>/dev/null || true | |
| # Wait for gunicorn to exit | |
| wait "${GUNICORN_PID}" || true | |
| echo "Gunicorn exited; wrapper exiting" | |
| exit 0 | |
| } | |
| trap 'handle_term' SIGTERM SIGINT | |
| # Readiness probe loop | |
| echo "Waiting for application readiness (health endpoint)..." | |
| READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait | |
| READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks | |
| ELAPSED=0 | |
| READY=0 | |
| while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do | |
| if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then | |
| echo "Gunicorn process exited prematurely during startup; aborting." >&2 | |
| exit 1 | |
| fi | |
| if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then | |
| READY=1 | |
| break | |
| fi | |
| sleep "$READY_INTERVAL" | |
| ELAPSED=$((ELAPSED + READY_INTERVAL)) | |
| done | |
| if [ "$READY" -ne 1 ]; then | |
| echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2 | |
| fi | |
| # Pre-warm (chat) if health is ready | |
| echo "Pre-warming application via /chat endpoint..." | |
| curl -sS -X POST http://localhost:${PORT_VALUE}/chat \ | |
| -H "Content-Type: application/json" \ | |
| -d '{"message":"pre-warm"}' \ | |
| --max-time 30 --fail >/dev/null 2>&1 || echo "Pre-warm request failed but continuing..." | |
| # Explicit embedding warm-up to surface ONNX model issues early. | |
| echo "Running embedding warm-up..." | |
| if python - <<'PY' | |
| import time, logging | |
| from src.embedding.embedding_service import EmbeddingService | |
| start = time.time() | |
| try: | |
| svc = EmbeddingService() | |
| emb = svc.embed_text("warmup") | |
| dur = (time.time() - start) * 1000 | |
| print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}") | |
| except Exception as e: | |
| dur = (time.time() - start) * 1000 | |
| print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}") | |
| raise SystemExit(1) | |
| PY | |
| then | |
| echo "Embedding warm-up succeeded." | |
| else | |
| echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2 | |
| kill -TERM "${GUNICORN_PID}" 2>/dev/null || true | |
| wait "${GUNICORN_PID}" || true | |
| exit 1 | |
| fi | |
| echo "Server is running (PID ${GUNICORN_PID})." | |
| # Wait for gunicorn to exit and forward its exit code | |
| wait "${GUNICORN_PID}" | |
| EXIT_CODE=$? | |
| echo "Gunicorn stopped with exit code ${EXIT_CODE}" | |
| exit "${EXIT_CODE}" | |