Skip to content

Commit 4197fcb

Browse files
committed
Compound index optimization
1 parent 6d58759 commit 4197fcb

File tree

2 files changed

+194
-2
lines changed

2 files changed

+194
-2
lines changed

schema_migration.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def migrate_schema(
6666
index_options['name'] = source_index_name
6767
index_list.append((index_keys, index_options))
6868

69-
# TODO: Optimize compound indexes
69+
if collection_config.optimize_compound_indexes:
70+
index_list = self._optimize_compound_indexes(index_list)
7071

7172
for index_keys, index_options in index_list:
7273
if self._is_ts_ttl_index(index_keys, index_options):
@@ -99,4 +100,65 @@ def _is_ts_ttl_index(self, index_keys: List[Tuple], index_options: dict) -> bool
99100
"""
100101
if 'expireAfterSeconds' in index_options and any('_ts' == index_key[0] for index_key in index_keys):
101102
return True
102-
return False
103+
return False
104+
105+
def _optimize_compound_indexes(self, index_list: List[Tuple]) -> List[Tuple]:
106+
"""
107+
Optimize compound indexes for the given collection configuration.
108+
"""
109+
compound_indexes = []
110+
not_compound_indexes = []
111+
for index in index_list:
112+
keys, options = index
113+
if self._is_compound_index(index):
114+
compound_indexes.append(index)
115+
else:
116+
not_compound_indexes.append(index)
117+
118+
# Sort compound indexes by the number of keys in descending order
119+
compound_indexes.sort(key=lambda x: len(x[0]), reverse=True)
120+
121+
optimized_compound_indexes = []
122+
for compound_index in compound_indexes:
123+
keys, options = compound_index
124+
is_redundant = False
125+
for optimized_index in optimized_compound_indexes:
126+
optimized_keys, optimized_options = optimized_index
127+
if self._is_subarray(keys, optimized_keys):
128+
is_redundant = True
129+
break
130+
if not is_redundant:
131+
optimized_compound_indexes.append(compound_index)
132+
return optimized_compound_indexes + not_compound_indexes
133+
134+
def _is_compound_index(self, index: Tuple) -> bool:
135+
"""
136+
Check if the given index is a compound index.
137+
138+
:param index: The index to check.
139+
:return: True if the index is compound, False otherwise.
140+
"""
141+
not_compound_options = ['unique', 'sparse', 'expireAfterSeconds']
142+
keys, options = index
143+
if len(keys) > 1 and not any(opt in options for opt in not_compound_options):
144+
return True
145+
return False
146+
147+
def _is_subarray(self, sub: List, main: List) -> bool:
148+
"""
149+
Check if the list `sub` is an subarray of the list `main`.
150+
151+
:param sub: The list to check as a subset.
152+
:param main: The list to check against.
153+
:return: True if `sub` is an subarray of `main`, False otherwise.
154+
"""
155+
sub_len = len(sub)
156+
main_len = len(main)
157+
158+
if sub_len > main_len:
159+
return False
160+
161+
for i in range(main_len - sub_len + 1):
162+
if main[i:i + sub_len] == sub:
163+
return True
164+
return False

test.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,136 @@ def tearDown(self):
4545
self.source_client.drop_database(self.db_name)
4646
self.dest_client.drop_database(self.db_name)
4747

48+
def test_optimize_compound_indexes_true(self):
49+
"""
50+
Test that the migrate_schema method optimizes compound indexes.
51+
"""
52+
# Create the source collection and index information
53+
source_collection = self.source_client[self.db_name]["test_optimize"]
54+
source_collection.create_index([("a", 1), ("b", 1), ("c", 1), ("d", 1)])
55+
source_collection.create_index([("a", 1), ("b", 1), ("c", 1)])
56+
source_collection.create_index([("b", 1), ("c", 1)])
57+
source_collection.create_index([("c", 1), ("d", 1)])
58+
source_collection.create_index([("b", 1), ("d", 1)])
59+
source_collection.create_index([("d", 1)])
60+
61+
collection_config_sections = []
62+
collection_config_sections.append(CollectionConfigSection([f'{self.db_name}.*'], [], False, False, True))
63+
migrate_all_config = json.loads(self._generate_config(collection_config_sections))
64+
collection_configs = JsonParser(migrate_all_config, self.source_client).parse_json()
65+
66+
# Create a SchemaMigration instance and call migrate_schema
67+
schema_migration = SchemaMigration()
68+
schema_migration.migrate_schema(self.source_client, self.dest_client, collection_configs)
69+
70+
# Verify that the indexes were optimized in the destination collection
71+
dest_collection = self.dest_client[self.db_name]["test_optimize"]
72+
dest_indexes = dest_collection.index_information()
73+
self.assertIn("a_1_b_1_c_1_d_1", dest_indexes, "Compound index on 'a', 'b', 'c', 'd' was not migrated successfully.")
74+
self.assertIn("b_1_d_1", dest_indexes, "Compound index on 'b' and 'd' was not migrated successfully.")
75+
self.assertIn("d_1", dest_indexes, "Compound index on 'd' was not migrated successfully.")
76+
self.assertNotIn("a_1_b_1_c_1", dest_indexes, "Compound index on 'a', 'b', 'c' was not optimized successfully.")
77+
self.assertNotIn("b_1_c_1", dest_indexes, "Compound index on 'b' and 'c' was not optimized successfully.")
78+
self.assertNotIn("c_1_d_1", dest_indexes, "Compound index on 'c' and 'd' was not optimized successfully.")
79+
80+
def test_optimize_compound_indexes_false(self):
81+
"""
82+
Test that the migrate_schema method doesn't optimizes compound indexes.
83+
"""
84+
# Create the source collection and index information
85+
source_collection = self.source_client[self.db_name]["test_optimize"]
86+
source_collection.create_index([("a", 1), ("b", 1), ("c", 1), ("d", 1)])
87+
source_collection.create_index([("a", 1), ("b", 1), ("c", 1)])
88+
source_collection.create_index([("b", 1), ("c", 1)])
89+
source_collection.create_index([("c", 1), ("d", 1)])
90+
source_collection.create_index([("b", 1), ("d", 1)])
91+
source_collection.create_index([("d", 1)])
92+
93+
collection_config_sections = []
94+
collection_config_sections.append(CollectionConfigSection([f'{self.db_name}.*'], [], False, False, False))
95+
migrate_all_config = json.loads(self._generate_config(collection_config_sections))
96+
collection_configs = JsonParser(migrate_all_config, self.source_client).parse_json()
97+
98+
# Create a SchemaMigration instance and call migrate_schema
99+
schema_migration = SchemaMigration()
100+
schema_migration.migrate_schema(self.source_client, self.dest_client, collection_configs)
101+
102+
# Verify that the indexes were not optimized in the destination collection
103+
dest_collection = self.dest_client[self.db_name]["test_optimize"]
104+
dest_indexes = dest_collection.index_information()
105+
self.assertIn("a_1_b_1_c_1_d_1", dest_indexes, "Compound index on 'a', 'b', 'c', 'd' was not migrated successfully.")
106+
self.assertIn("a_1_b_1_c_1", dest_indexes, "Compound index on 'a', 'b', 'c' was not migrated successfully.")
107+
self.assertIn("b_1_c_1", dest_indexes, "Compound index on 'b' and 'c' was not migrated successfully.")
108+
self.assertIn("c_1_d_1", dest_indexes, "Compound index on 'c' and 'd' was not migrated successfully.")
109+
self.assertIn("b_1_d_1", dest_indexes, "Compound index on 'b' and 'd' was not migrated successfully.")
110+
self.assertIn("d_1", dest_indexes, "Compound index on 'd' was not migrated successfully.")
111+
112+
def test_optimize_compound_indexes_filters_index_with_options(self):
113+
"""
114+
Test that the migrate_schema method doesn't optimizes compound indexes when it has options.
115+
"""
116+
# Create the source collection and index information
117+
self.source_client[self.db_name].command({
118+
'customAction': 'CreateCollection',
119+
'collection': 'test_optimize',
120+
'indexes': [{ 'key': { 'a': 1, 'b': 1, 'c': 1 }, 'name': 'a_1_b_1_c_1', 'unique': True }]
121+
})
122+
self.source_client[self.db_name]['test_optimize'].create_index([("a", 1), ("b", 1), ("c", 1), ("d", 1)])
123+
124+
collection_config_sections = []
125+
collection_config_sections.append(CollectionConfigSection([f'{self.db_name}.*'], [], False, False, True))
126+
migrate_all_config = json.loads(self._generate_config(collection_config_sections))
127+
collection_configs = JsonParser(migrate_all_config, self.source_client).parse_json()
128+
129+
# Create a SchemaMigration instance and call migrate_schema
130+
schema_migration = SchemaMigration()
131+
schema_migration.migrate_schema(self.source_client, self.dest_client, collection_configs)
132+
133+
# Verify that the indexes were created in the destination collection
134+
dest_collection = self.dest_client[self.db_name]["test_optimize"]
135+
dest_indexes = dest_collection.index_information()
136+
self.assertIn("a_1_b_1_c_1_d_1", dest_indexes, "Compound index on 'a', 'b', 'c', 'd' was not migrated successfully.")
137+
self.assertIn("a_1_b_1_c_1", dest_indexes, "Compound index on 'a', 'b', 'c' was not migrated successfully.")
138+
self.assertEqual(dest_indexes["a_1_b_1_c_1"]["unique"], True, "Unique option is not set.")
139+
140+
def test_optimize_compound_indexes_filters_index_with_other_indexes(self):
141+
"""
142+
Test that the migrate_schema method doesn't optimizes compound indexes when it has options.
143+
"""
144+
# Create the source collection and index information
145+
self.source_client[self.db_name].command({
146+
'customAction': 'CreateCollection',
147+
'collection': 'test_optimize',
148+
'indexes': [
149+
{ 'key': { 'a': 1, 'b': 1, 'c': 1 }, 'name': 'a_1_b_1_c_1', 'unique': True },
150+
{ 'key': { 'b': 1, 'c': 1 }, 'name': 'b_1_c_1', 'unique': True, 'partialFilterExpression': {"a": {"$gt": 0}}}
151+
]
152+
})
153+
source_collection = self.source_client[self.db_name]['test_optimize']
154+
source_collection.create_index([("a", 1), ("b", 1), ("c", 1), ("d", 1)])
155+
source_collection.create_index([("a", 1), ("b", 1)])
156+
source_collection.create_index([("d", 1)], expireAfterSeconds=10)
157+
158+
collection_config_sections = []
159+
collection_config_sections.append(CollectionConfigSection([f'{self.db_name}.*'], [], False, False, True))
160+
migrate_all_config = json.loads(self._generate_config(collection_config_sections))
161+
collection_configs = JsonParser(migrate_all_config, self.source_client).parse_json()
162+
163+
# Create a SchemaMigration instance and call migrate_schema
164+
schema_migration = SchemaMigration()
165+
schema_migration.migrate_schema(self.source_client, self.dest_client, collection_configs)
166+
167+
# Verify that the indexes in destination
168+
dest_collection = self.dest_client[self.db_name]["test_optimize"]
169+
dest_indexes = dest_collection.index_information()
170+
self.assertIn("a_1_b_1_c_1_d_1", dest_indexes, "Compound index on 'a', 'b', 'c', 'd' was not migrated successfully.")
171+
self.assertIn("a_1_b_1_c_1", dest_indexes, "Compound index on 'a', 'b', 'c' was not migrated successfully.")
172+
self.assertEqual(dest_indexes["a_1_b_1_c_1"]["unique"], True, "Unique option is not set.")
173+
self.assertIn("b_1_c_1", dest_indexes, "Compound index on 'b' and 'c' was not migrated successfully.")
174+
self.assertTrue("partialFilterExpression" in dest_indexes["b_1_c_1"], "Partial filter expression is not set.")
175+
self.assertIn("d_1", dest_indexes, "Compound index on 'd' was not migrated successfully.")
176+
self.assertEqual(dest_indexes["d_1"]["expireAfterSeconds"], 10, "TTL index on 'd' field has incorrect expireAfterSeconds.")
177+
48178
def test_ts_ttl_throws_error(self):
49179
"""
50180
Test that the migrate_schema method throws an error when a TTL index is created on a _ts field.

0 commit comments

Comments
 (0)