my-gradio-app / scripts /check_rag_status.py
Nguyen Trong Lap
Recreate history without binary blobs
eeb0f9c
"""
Check RAG System Status - Verify all vector stores
Checks all 6 specialized ChromaDB databases
"""
from pathlib import Path
import sys
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
# Vector store definitions
VECTOR_STORES = {
'medical_diseases': {
'name': 'ViMedical Diseases',
'path': 'rag/vector_store/medical_diseases',
'expected_size': 50, # MB
'test_query': 'đau đầu triệu chứng'
},
'mental_health': {
'name': 'Mental Health',
'path': 'rag/vector_store/mental_health',
'expected_size': 80,
'test_query': 'stress anxiety depression'
},
'nutrition': {
'name': 'Nutrition Plans',
'path': 'rag/vector_store/nutrition',
'expected_size': 20,
'test_query': 'diet meal plan calories'
},
'vietnamese_nutrition': {
'name': 'Vietnamese Food',
'path': 'rag/vector_store/vietnamese_nutrition',
'expected_size': 5,
'test_query': 'phở cơm nutrition'
},
'fitness': {
'name': 'Fitness Exercises',
'path': 'rag/vector_store/fitness',
'expected_size': 10,
'test_query': 'gym workout exercise'
},
'symptom_qa': {
'name': 'Medical Q&A',
'path': 'rag/vector_store/symptom_qa',
'expected_size': 8,
'test_query': 'triệu chứng bệnh'
},
'general_health_qa': {
'name': 'General Health Q&A',
'path': 'rag/vector_store/general_health_qa',
'expected_size': 7,
'test_query': 'sức khỏe tổng quát'
}
}
def check_vector_store(store_info):
"""Check individual vector store"""
print(f"\n📦 {store_info['name']}")
print("-" * 50)
store_path = Path(store_info['path'])
# Check existence
if not store_path.exists():
print(f"❌ Not found: {store_info['path']}")
print(f" Reason: Directory does not exist")
return {'status': False, 'reason': 'Directory not found'}
print(f"✅ Exists: {store_info['path']}")
# Check size
total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file())
size_mb = total_size / (1024 * 1024)
expected = store_info['expected_size']
print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)")
if size_mb < 0.1:
print("⚠️ Database seems empty")
print(" Reason: Database size < 0.1 MB (likely not built)")
return {'status': False, 'reason': 'Database empty or not built'}
# Try to load and query
try:
import chromadb
client = chromadb.PersistentClient(path=str(store_path))
collections = client.list_collections()
if not collections:
print("⚠️ No collections found")
print(" Reason: ChromaDB has no collections")
return {'status': False, 'reason': 'No collections in database'}
collection = collections[0]
count = collection.count()
print(f"📚 Documents: {count:,} chunks")
if count == 0:
print("⚠️ Collection is empty")
print(" Reason: Collection exists but has 0 documents")
return {'status': False, 'reason': 'Collection is empty (0 documents)'}
# Test query
try:
results = collection.query(
query_texts=[store_info['test_query']],
n_results=1
)
if results and results['documents'] and results['documents'][0]:
print("✅ Query test passed")
return {'status': True, 'reason': None}
else:
print("⚠️ Query returned no results")
print(" Reason: Query executed but found no matching documents")
return {'status': False, 'reason': 'Query returned no results'}
except Exception as e:
print(f"⚠️ Query test failed: {e}")
print(f" Reason: {str(e)}")
return {'status': False, 'reason': f'Query failed: {str(e)}'}
except ImportError:
print("⚠️ ChromaDB not installed")
print(" Reason: pip install chromadb")
return {'status': False, 'reason': 'ChromaDB package not installed'}
except Exception as e:
print(f"⚠️ Error: {e}")
print(f" Reason: {str(e)}")
return {'status': False, 'reason': f'Error loading database: {str(e)}'}
def check_rag_status():
"""Check all RAG vector stores"""
print("="*60)
print("🔍 RAG System Status Check")
print("="*60)
# Check base directory
base_path = Path('rag/vector_store')
if not base_path.exists():
print("\n❌ Vector store directory not found!")
print(f" Expected: {base_path}")
print("\n💡 Solution:")
print(" bash scripts/setup_rag.sh")
return False
print(f"\n✅ Base directory exists: {base_path}")
# Check each vector store
results = {}
for store_id, store_info in VECTOR_STORES.items():
results[store_id] = check_vector_store(store_info)
# Summary
print("\n" + "="*60)
print("📊 Summary")
print("="*60)
total = len(results)
passed = sum(1 for v in results.values() if v['status'])
for store_id, result in results.items():
status = "✅" if result['status'] else "❌"
name = VECTOR_STORES[store_id]['name']
print(f"{status} {name}")
if not result['status'] and result['reason']:
print(f" └─ {result['reason']}")
print("\n" + "="*60)
print(f"Result: {passed}/{total} databases OK")
if passed == total:
print("\n🎉 All vector stores are ready!")
print("\nNext steps:")
print(" python app.py")
print(" Open http://localhost:7860")
print("="*60)
return True
else:
print("\n⚠️ Some databases are missing or have issues")
print("\n💡 Solutions:")
print("\n1️⃣ Quick fix (rebuild all):")
print(" bash scripts/setup_rag.sh")
print("\n2️⃣ Rebuild specific databases:")
# Map store_id to script
script_map = {
'medical_diseases': 'python data_mining/mining_vimedical.py',
'mental_health': 'python data_mining/mining_mentalchat.py',
'nutrition': 'python data_mining/mining_nutrition.py',
'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py',
'fitness': 'python data_mining/mining_fitness.py',
'symptom_qa': 'python data_mining/mining_medical_qa.py',
'general_health_qa': 'python data_mining/mining_medical_qa.py'
}
for store_id, result in results.items():
if not result['status']:
name = VECTOR_STORES[store_id]['name']
script = script_map.get(store_id, 'Unknown')
print(f"\n ❌ {name}:")
print(f" Reason: {result['reason']}")
print(f" Fix: {script}")
print("\n" + "="*60)
return False
if __name__ == '__main__':
success = check_rag_status()
exit(0 if success else 1)