Spaces:
Sleeping
Sleeping
| import unittest | |
| import numpy as np | |
| import os | |
| import shutil | |
| from hierarchy_engine import HierarchicalIndex | |
| class TestHierarchicalIndex(unittest.TestCase): | |
| def setUp(self): | |
| self.test_dir = "test_hierarchy_data" | |
| os.makedirs(self.test_dir, exist_ok=True) | |
| self.index_path = os.path.join(self.test_dir, "test_index.pkl") | |
| # Create dummy data | |
| # 100 vectors, 128 dimensions | |
| self.vectors = np.random.rand(100, 128).astype('float32') | |
| self.ids = [f"id_{i}" for i in range(100)] | |
| self.payloads = [{"info": f"data_{i}"} for i in range(100)] | |
| def tearDown(self): | |
| if os.path.exists(self.test_dir): | |
| shutil.rmtree(self.test_dir) | |
| def test_build_and_search(self): | |
| # Initialize index with small cluster numbers for testing | |
| index = HierarchicalIndex(layer2_clusters=5, layer1_clusters=3) | |
| # Build index | |
| index.build(self.vectors, self.ids, self.payloads) | |
| # Check if layers are populated | |
| self.assertIsNotNone(index.layer2_centroids) | |
| self.assertTrue(len(index.layer1_centroids) > 0) | |
| self.assertTrue(len(index.layer1_children) > 0) | |
| # Test Search | |
| query_vector = self.vectors[0] # Search for the first vector itself | |
| results = index.search(query_vector, top_k=5, beam_size=5) | |
| self.assertTrue(len(results) > 0) | |
| # The first result should ideally be the vector itself (id_0) or very close | |
| # Note: K-Means is heuristic, so it might not always find the exact match if beam size is small, | |
| # but with beam_size=5 and small data, it likely will. | |
| found_ids = [r['id'] for r in results] | |
| self.assertIn("id_0", found_ids) | |
| # Check result structure | |
| self.assertIn('score', results[0]) | |
| self.assertIn('payload', results[0]) | |
| self.assertEqual(results[0]['payload']['info'], "data_0") | |
| def test_save_and_load(self): | |
| index = HierarchicalIndex(layer2_clusters=2, layer1_clusters=2) | |
| index.build(self.vectors, self.ids, self.payloads) | |
| index.save(self.index_path) | |
| loaded_index = HierarchicalIndex.load(self.index_path) | |
| self.assertEqual(loaded_index.layer2_k, index.layer2_k) | |
| self.assertEqual(len(loaded_index.ids), len(index.ids)) | |
| # Verify search works on loaded index | |
| query_vector = self.vectors[10] | |
| results = loaded_index.search(query_vector, top_k=1) | |
| self.assertEqual(results[0]['id'], "id_10") | |
| def test_empty_build(self): | |
| # Test robustness with empty data | |
| index = HierarchicalIndex() | |
| # Should probably handle this gracefully or raise specific error | |
| # Based on code, KMeans will raise error if n_samples < n_clusters | |
| # So we expect an error or we should catch it. | |
| # For now let's just see if it crashes hard or standard exception. | |
| pass | |
| if __name__ == '__main__': | |
| unittest.main() | |