Spaces:
Sleeping
Sleeping
File size: 4,032 Bytes
c4b28eb d3fd68c c4b28eb 4b80514 c4b28eb 6338213 52852e0 6338213 d3fd68c 6338213 52852e0 c4b28eb 13846a7 6338213 45cf08e d3fd68c 6338213 45cf08e 6338213 45cf08e d3fd68c 9988b25 d3fd68c 45cf08e 9988b25 45cf08e 9988b25 45cf08e 9988b25 159faf0 45cf08e 9988b25 d3fd68c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
#!/usr/bin/env bash
set -euo pipefail
# Default to 1 worker to prevent OOM on low-memory hosts
WORKERS_VALUE="${WORKERS:-1}"
TIMEOUT_VALUE="${TIMEOUT:-120}"
PORT_VALUE="${PORT:-10000}"
# Initialize the database with retries (handle DB cold start)
echo "Initializing database..."
MAX_RETRIES="${DB_INIT_RETRIES:-10}"
SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}"
i=1
until python scripts/init_pgvector.py; do
if [ "$i" -ge "$MAX_RETRIES" ]; then
echo "Database initialization failed after $i attempts; exiting."
exit 1
fi
echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
sleep $((SLEEP_BASE * i))
i=$((i + 1))
done
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
# Determine gunicorn config usage
GUNICORN_CONFIG_ARG=""
if [ -f gunicorn.conf.py ]; then
GUNICORN_CONFIG_ARG="--config gunicorn.conf.py"
else
echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only."
fi
# Start gunicorn in background so we can trap signals and collect diagnostics
gunicorn \
--bind 0.0.0.0:${PORT_VALUE} \
--workers "${WORKERS_VALUE}" \
--timeout "${TIMEOUT_VALUE}" \
--log-level info \
--access-logfile - \
--error-logfile - \
--capture-output \
${GUNICORN_CONFIG_ARG} \
app:app &
GUNICORN_PID=$!
# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
handle_term() {
echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
echo "--- Top processes by RSS ---"
ps aux --sort=-rss | head -n 20 || true
echo "--- /proc/meminfo (if available) ---"
cat /proc/meminfo || true
echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
# Wait for gunicorn to exit
wait "${GUNICORN_PID}" || true
echo "Gunicorn exited; wrapper exiting"
exit 0
}
trap 'handle_term' SIGTERM SIGINT
# Readiness probe loop
echo "Waiting for application readiness (health endpoint)..."
READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait
READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks
ELAPSED=0
READY=0
while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do
if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then
echo "Gunicorn process exited prematurely during startup; aborting." >&2
exit 1
fi
if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then
READY=1
break
fi
sleep "$READY_INTERVAL"
ELAPSED=$((ELAPSED + READY_INTERVAL))
done
if [ "$READY" -ne 1 ]; then
echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2
fi
# Pre-warm (chat) if health is ready
echo "Pre-warming application via /chat endpoint..."
curl -sS -X POST http://localhost:${PORT_VALUE}/chat \
-H "Content-Type: application/json" \
-d '{"message":"pre-warm"}' \
--max-time 30 --fail >/dev/null 2>&1 || echo "Pre-warm request failed but continuing..."
# Explicit embedding warm-up to surface ONNX model issues early.
echo "Running embedding warm-up..."
if python - <<'PY'
import time, logging
from src.embedding.embedding_service import EmbeddingService
start = time.time()
try:
svc = EmbeddingService()
emb = svc.embed_text("warmup")
dur = (time.time() - start) * 1000
print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}")
except Exception as e:
dur = (time.time() - start) * 1000
print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}")
raise SystemExit(1)
PY
then
echo "Embedding warm-up succeeded."
else
echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2
kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
wait "${GUNICORN_PID}" || true
exit 1
fi
echo "Server is running (PID ${GUNICORN_PID})."
# Wait for gunicorn to exit and forward its exit code
wait "${GUNICORN_PID}"
EXIT_CODE=$?
echo "Gunicorn stopped with exit code ${EXIT_CODE}"
exit "${EXIT_CODE}"
|