File size: 4,032 Bytes
c4b28eb
d3fd68c
c4b28eb
4b80514
 
c4b28eb
 
 
6338213
52852e0
6338213
 
 
 
 
 
 
 
 
d3fd68c
 
6338213
 
52852e0
c4b28eb
13846a7
6338213
45cf08e
 
 
 
 
 
 
 
d3fd68c
 
6338213
 
 
45cf08e
6338213
 
 
45cf08e
d3fd68c
 
 
 
 
 
 
 
 
9988b25
d3fd68c
 
 
 
 
 
 
 
 
 
45cf08e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9988b25
45cf08e
 
9988b25
 
 
45cf08e
9988b25
159faf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45cf08e
9988b25
 
d3fd68c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env bash
set -euo pipefail

# Default to 1 worker to prevent OOM on low-memory hosts
WORKERS_VALUE="${WORKERS:-1}"
TIMEOUT_VALUE="${TIMEOUT:-120}"
PORT_VALUE="${PORT:-10000}"

# Initialize the database with retries (handle DB cold start)
echo "Initializing database..."
MAX_RETRIES="${DB_INIT_RETRIES:-10}"
SLEEP_BASE="${DB_INIT_SLEEP_BASE:-5}"

i=1
until python scripts/init_pgvector.py; do
  if [ "$i" -ge "$MAX_RETRIES" ]; then
    echo "Database initialization failed after $i attempts; exiting."
    exit 1
  fi
  echo "init_pgvector.py failed (attempt $i/$MAX_RETRIES). Sleeping $((SLEEP_BASE * i))s before retry..."
  sleep $((SLEEP_BASE * i))
  i=$((i + 1))
done

echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"

# Determine gunicorn config usage
GUNICORN_CONFIG_ARG=""
if [ -f gunicorn.conf.py ]; then
  GUNICORN_CONFIG_ARG="--config gunicorn.conf.py"
else
  echo "Warning: gunicorn.conf.py not found; starting with inline CLI options only."
fi

# Start gunicorn in background so we can trap signals and collect diagnostics
gunicorn \
  --bind 0.0.0.0:${PORT_VALUE} \
  --workers "${WORKERS_VALUE}" \
  --timeout "${TIMEOUT_VALUE}" \
  --log-level info \
  --access-logfile - \
  --error-logfile - \
  --capture-output \
  ${GUNICORN_CONFIG_ARG} \
  app:app &

GUNICORN_PID=$!

# Trap TERM and INT, log diagnostics, forward the signal to gunicorn, and wait
handle_term() {
  echo "===== SIGTERM received at $(date -u +'%Y-%m-%dT%H:%M:%SZ') ====="
  echo "--- Top processes by RSS ---"
  ps aux --sort=-rss | head -n 20 || true
  echo "--- /proc/meminfo (if available) ---"
  cat /proc/meminfo || true
  echo "Forwarding SIGTERM to gunicorn (pid ${GUNICORN_PID})"
  kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
  # Wait for gunicorn to exit
  wait "${GUNICORN_PID}" || true
  echo "Gunicorn exited; wrapper exiting"
  exit 0
}
trap 'handle_term' SIGTERM SIGINT

# Readiness probe loop
echo "Waiting for application readiness (health endpoint)..."
READY_TIMEOUT="${READY_TIMEOUT:-60}" # total seconds to wait
READY_INTERVAL="${READY_INTERVAL:-3}" # seconds between checks
ELAPSED=0
READY=0
while [ "$ELAPSED" -lt "$READY_TIMEOUT" ]; do
  if ! kill -0 "${GUNICORN_PID}" 2>/dev/null; then
    echo "Gunicorn process exited prematurely during startup; aborting." >&2
    exit 1
  fi
  if curl -fsS "http://localhost:${PORT_VALUE}/health" >/dev/null 2>&1; then
    READY=1
    break
  fi
  sleep "$READY_INTERVAL"
  ELAPSED=$((ELAPSED + READY_INTERVAL))
done
if [ "$READY" -ne 1 ]; then
  echo "Health endpoint not ready after ${READY_TIMEOUT}s; continuing but marking as degraded." >&2
fi

# Pre-warm (chat) if health is ready
echo "Pre-warming application via /chat endpoint..."
curl -sS -X POST http://localhost:${PORT_VALUE}/chat \
  -H "Content-Type: application/json" \
  -d '{"message":"pre-warm"}' \
  --max-time 30 --fail >/dev/null 2>&1 || echo "Pre-warm request failed but continuing..."

# Explicit embedding warm-up to surface ONNX model issues early.
echo "Running embedding warm-up..."
if python - <<'PY'
import time, logging
from src.embedding.embedding_service import EmbeddingService
start = time.time()
try:
    svc = EmbeddingService()
    emb = svc.embed_text("warmup")
    dur = (time.time() - start) * 1000
    print(f"Embedding warm-up successful; dim={len(emb)}; duration_ms={dur:.1f}")
except Exception as e:
    dur = (time.time() - start) * 1000
    print(f"Embedding warm-up FAILED after {dur:.1f}ms: {e}")
    raise SystemExit(1)
PY
then
  echo "Embedding warm-up succeeded."
else
  echo "Embedding warm-up failed; terminating startup to allow redeploy/retry." >&2
  kill -TERM "${GUNICORN_PID}" 2>/dev/null || true
  wait "${GUNICORN_PID}" || true
  exit 1
fi

echo "Server is running (PID ${GUNICORN_PID})."

# Wait for gunicorn to exit and forward its exit code
wait "${GUNICORN_PID}"
EXIT_CODE=$?
echo "Gunicorn stopped with exit code ${EXIT_CODE}"
exit "${EXIT_CODE}"