Spaces:
Sleeping
Sleeping
Seth McKnight
commited on
Commit
·
52852e0
1
Parent(s):
dca679b
Postgres vector migration (#84)
Browse files* feat: Implement PostgreSQL with pgvector as ChromaDB alternative
- Add PostgresVectorService with full pgvector integration
- Create PostgresVectorAdapter for ChromaDB compatibility
- Update config to support vector storage type selection
- Add factory pattern for seamless backend switching
- Include migration script with data optimization
- Add comprehensive tests for PostgreSQL implementation
- Update dependencies and environment configuration
- Expected memory reduction: 300-350MB (from 400MB+ to 50-150MB)
This enables deployment on Render's 512MB free tier by using persistent
PostgreSQL storage instead of in-memory ChromaDB.
* Add pgvector init script, update migration docs, and test adjustments
* feat: Default to postgres and automate DB init
- run.sh +4 -0
- src/config.py +1 -1
run.sh
CHANGED
|
@@ -6,6 +6,10 @@ WORKERS_VALUE="${WORKERS:-1}"
|
|
| 6 |
TIMEOUT_VALUE="${TIMEOUT:-120}"
|
| 7 |
PORT_VALUE="${PORT:-10000}"
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
|
| 10 |
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
|
| 11 |
exec gunicorn --bind 0.0.0.0:${PORT_VALUE} --workers "${WORKERS_VALUE}" --timeout "${TIMEOUT_VALUE}" app:app
|
|
|
|
| 6 |
TIMEOUT_VALUE="${TIMEOUT:-120}"
|
| 7 |
PORT_VALUE="${PORT:-10000}"
|
| 8 |
|
| 9 |
+
# Initialize the database
|
| 10 |
+
echo "Initializing database..."
|
| 11 |
+
python scripts/init_pgvector.py
|
| 12 |
+
|
| 13 |
echo "Starting gunicorn on port ${PORT_VALUE} with ${WORKERS_VALUE} workers and timeout ${TIMEOUT_VALUE}s"
|
| 14 |
export PYTHONPATH="/app${PYTHONPATH:+:$PYTHONPATH}"
|
| 15 |
exec gunicorn --bind 0.0.0.0:${PORT_VALUE} --workers "${WORKERS_VALUE}" --timeout "${TIMEOUT_VALUE}" app:app
|
src/config.py
CHANGED
|
@@ -15,7 +15,7 @@ CORPUS_DIRECTORY = "synthetic_policies"
|
|
| 15 |
|
| 16 |
# Vector Database Settings
|
| 17 |
VECTOR_STORAGE_TYPE = os.getenv(
|
| 18 |
-
"VECTOR_STORAGE_TYPE", "
|
| 19 |
) # "chroma" or "postgres"
|
| 20 |
VECTOR_DB_PERSIST_PATH = "data/chroma_db" # Used for ChromaDB
|
| 21 |
DATABASE_URL = os.getenv("DATABASE_URL") # Used for PostgreSQL
|
|
|
|
| 15 |
|
| 16 |
# Vector Database Settings
|
| 17 |
VECTOR_STORAGE_TYPE = os.getenv(
|
| 18 |
+
"VECTOR_STORAGE_TYPE", "postgres"
|
| 19 |
) # "chroma" or "postgres"
|
| 20 |
VECTOR_DB_PERSIST_PATH = "data/chroma_db" # Used for ChromaDB
|
| 21 |
DATABASE_URL = os.getenv("DATABASE_URL") # Used for PostgreSQL
|