Skip to content

Commit c26d28c

Browse files
s26 p3: added support for composite key indexes in DDL (#875)
* s26 p3: added support for composite key indexes in DDL * (fmt): add newline at end of file * s26-p3: added comments to ddl file * s26-p3: added new composite key index scan test case * s26-p3: formatting for ddl --------- Co-authored-by: Aditya Chopra <adeecc11@gmail.com>
1 parent 928a22c commit c26d28c

File tree

2 files changed

+250
-36
lines changed

2 files changed

+250
-36
lines changed

src/common/bustub_ddl.cpp

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//
77
// Identification: src/common/bustub_ddl.cpp
88
//
9-
// Copyright (c) 2015-2025, Carnegie Mellon University Database Group
9+
// Copyright (c) 2015-2026, Carnegie Mellon University Database Group
1010
//
1111
//===----------------------------------------------------------------------===//
1212

@@ -37,6 +37,7 @@
3737
#include "common/util/string_util.h"
3838
#include "concurrency/lock_manager.h"
3939
#include "concurrency/transaction.h"
40+
#include "container/hash/hash_function.h"
4041
#include "execution/execution_engine.h"
4142
#include "execution/executor_context.h"
4243
#include "execution/executors/mock_scan_executor.h"
@@ -50,6 +51,7 @@
5051
#include "recovery/log_manager.h"
5152
#include "storage/disk/disk_manager.h"
5253
#include "storage/disk/disk_manager_memory.h"
54+
#include "storage/index/generic_key.h"
5355
#include "type/value_factory.h"
5456

5557
namespace bustub {
@@ -69,18 +71,39 @@ void BusTubInstance::HandleCreateStatement(Transaction *txn, const CreateStateme
6971
}
7072
auto key_schema = Schema::CopySchema(&info->schema_, col_ids);
7173

72-
// TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
73-
// and create index with different key type that can hold multiple keys based on number of index columns.
74-
//
75-
// You can also create clustered index that directly stores value inside the index by modifying the value type.
76-
77-
if (col_ids.empty() || col_ids.size() > 2) {
78-
throw NotImplementedException("only support creating index with exactly one or two columns");
74+
if (col_ids.empty()) {
75+
throw NotImplementedException("Primary key cannot be empty");
7976
}
8077

81-
index = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
82-
txn, stmt.table_ + "_pk", stmt.table_, info->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
83-
IntegerHashFunctionType{}, true);
78+
// We compute the size (in bytes) of the index key
79+
uint32_t key_size = col_ids.size() * 4;
80+
81+
// We create an index of sufficient size depending on the key size.
82+
//! NOTE: Currently, we support key sizes of at most 64 bytes.
83+
//! This can be easily extended to support larger key sizes.
84+
if (key_size <= 4) {
85+
index = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(txn, stmt.table_ + "_pk", stmt.table_,
86+
info->schema_, key_schema, col_ids, 4,
87+
HashFunction<GenericKey<4>>{}, true);
88+
} else if (key_size <= 8) {
89+
index = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(txn, stmt.table_ + "_pk", stmt.table_,
90+
info->schema_, key_schema, col_ids, 8,
91+
HashFunction<GenericKey<8>>{}, true);
92+
} else if (key_size <= 16) {
93+
index = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(txn, stmt.table_ + "_pk", stmt.table_,
94+
info->schema_, key_schema, col_ids, 16,
95+
HashFunction<GenericKey<16>>{}, true);
96+
} else if (key_size <= 32) {
97+
index = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(txn, stmt.table_ + "_pk", stmt.table_,
98+
info->schema_, key_schema, col_ids, 32,
99+
HashFunction<GenericKey<32>>{}, true);
100+
} else if (key_size <= 64) {
101+
index = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(txn, stmt.table_ + "_pk", stmt.table_,
102+
info->schema_, key_schema, col_ids, 64,
103+
HashFunction<GenericKey<64>>{}, true);
104+
} else {
105+
throw NotImplementedException("Unsupported: primary key size exceeds 64 bytes");
106+
}
84107
}
85108
l.unlock();
86109

@@ -108,41 +131,56 @@ void BusTubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
108131
}
109132
auto key_schema = Schema::CopySchema(&stmt.table_->schema_, col_ids);
110133

111-
// TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
112-
// and create index with different key type that can hold multiple keys based on number of index columns.
113-
//
114-
// You can also create clustered index that directly stores value inside the index by modifying the value type.
115-
116-
if (col_ids.empty() || col_ids.size() > 2) {
117-
throw NotImplementedException("only support creating index with exactly one or two columns");
134+
if (col_ids.empty()) {
135+
throw NotImplementedException("Index columns cannot be empty");
118136
}
119137

120138
std::unique_lock<std::shared_mutex> l(catalog_lock_);
121139
std::shared_ptr<IndexInfo> info = nullptr;
122140

123-
if (stmt.index_type_.empty()) {
124-
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
125-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
126-
IntegerHashFunctionType{}, false); // create default index
127-
} else if (stmt.index_type_ == "hash") {
128-
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
129-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
130-
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
131-
} else if (stmt.index_type_ == "bplustree") {
132-
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
133-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
134-
IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex);
141+
IndexType index_type = IndexType::BPlusTreeIndex; // Default
142+
if (stmt.index_type_ == "hash") {
143+
index_type = IndexType::HashTableIndex;
144+
} else if (stmt.index_type_ == "bplustree" || stmt.index_type_.empty()) {
145+
index_type = IndexType::BPlusTreeIndex;
135146
} else if (stmt.index_type_ == "stl_ordered") {
136-
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
137-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
138-
IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex);
147+
index_type = IndexType::STLOrderedIndex;
139148
} else if (stmt.index_type_ == "stl_unordered") {
140-
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
141-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
142-
IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex);
149+
index_type = IndexType::STLUnorderedIndex;
143150
} else {
144151
UNIMPLEMENTED("unsupported index type " + stmt.index_type_);
145152
}
153+
154+
// We compute the size (in bytes) of the index key
155+
uint32_t key_size = col_ids.size() * 4;
156+
157+
// We create an index of sufficient size depending on the key size.
158+
//! NOTE: Currently, we support key sizes of at most 64 bytes.
159+
//! This can be easily extended to support larger key sizes.
160+
if (key_size <= 4) {
161+
info = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(
162+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 4,
163+
HashFunction<GenericKey<4>>{}, false, index_type);
164+
} else if (key_size <= 8) {
165+
info = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(
166+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 8,
167+
HashFunction<GenericKey<8>>{}, false, index_type);
168+
} else if (key_size <= 16) {
169+
info = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(
170+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 16,
171+
HashFunction<GenericKey<16>>{}, false, index_type);
172+
} else if (key_size <= 32) {
173+
info = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(
174+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 32,
175+
HashFunction<GenericKey<32>>{}, false, index_type);
176+
} else if (key_size <= 64) {
177+
info = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(
178+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 64,
179+
HashFunction<GenericKey<64>>{}, false, index_type);
180+
} else {
181+
throw NotImplementedException("Unsupported: index key size exceeds 64 bytes");
182+
}
183+
146184
l.unlock();
147185

148186
if (info == nullptr) {
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# 6 pts
2+
3+
statement ok
4+
set force_optimizer_starter_rule=yes
5+
6+
# Create a table
7+
statement ok
8+
create table t1(v1 int, v2 int, v3 int);
9+
10+
query
11+
insert into t1 values (1, 50, 645), (2, 40, 721), (4, 20, 445), (5, 10, 445), (3, 30, 645);
12+
----
13+
5
14+
15+
# Build index
16+
statement ok
17+
create index t1v1v2 on t1(v1, v2);
18+
19+
statement ok
20+
explain select * from t1 where v1 = 1 and v2 = 50;
21+
22+
query +ensure:index_scan
23+
select * from t1 where v1 = 1 and v2 = 50;
24+
----
25+
1 50 645
26+
27+
statement ok
28+
explain select * from t1 where v1 = 1;
29+
30+
query +ensure:index_scan
31+
select * from t1 where v1 = 1;
32+
----
33+
1 50 645
34+
35+
query +ensure:index_scan
36+
select * from t1 where v1 = 0;
37+
----
38+
39+
query +ensure:index_scan
40+
select * from t1 where v1 = 5 and v2 = 10 and v3 = 445;
41+
----
42+
5 10 445
43+
44+
# Create another table
45+
statement ok
46+
create table t2(v1 int, v2 int, v3 int, v4 int);
47+
48+
query
49+
insert into t2 values (1, 50, 6, 7), (2, 10, 8, 9), (4, 20, 7, 11), (5, 40, 3, 15), (3, 30, 100, 200);
50+
----
51+
5
52+
53+
# Build index
54+
statement ok
55+
create index t2v2 on t2(v2);
56+
57+
statement ok
58+
create index t2v3v4 on t2(v3, v4);
59+
60+
query +ensure:index_scan
61+
select * from t2 where v2 = 50;
62+
----
63+
1 50 6 7
64+
65+
query +ensure:index_scan
66+
select * from t2 where v3 = 8;
67+
----
68+
2 10 8 9
69+
70+
query +ensure:index_scan
71+
select * from t2 where v3 = 8 and v4 = 9;
72+
----
73+
2 10 8 9
74+
75+
query +ensure:seq_scan
76+
select * from t2 where v1 = 5;
77+
----
78+
5 40 3 15
79+
80+
query +ensure:seq_scan
81+
select * from t2 where v4 = 15;
82+
----
83+
5 40 3 15
84+
85+
query +ensure:index_scan
86+
select * from t2 where v2 = 20 and v3 = 7;
87+
----
88+
4 20 7 11
89+
90+
query +ensure:index_scan
91+
select * from t2 where v1 = 4 and v3 = 7;
92+
----
93+
4 20 7 11
94+
95+
# Create another table
96+
statement ok
97+
create table t3(v1 int, v2 int, v3 int, v4 int);
98+
99+
# Build index
100+
statement ok
101+
create index t3v1v2v3 on t3(v1, v2, v3);
102+
103+
query
104+
insert into t3 values (1, 21, 31, 41), (1, 21, 32, 42), (1, 22, 31, 42), (2, 22, 33, 43), (3, 23, 34, 44);
105+
----
106+
5
107+
108+
query +ensure:index_scan
109+
select * from t3 where v1 = 1;
110+
----
111+
1 21 31 41
112+
1 21 32 42
113+
1 22 31 42
114+
115+
query +ensure:index_scan
116+
select * from t3 where v1 = 1 and v2 = 21;
117+
----
118+
1 21 31 41
119+
1 21 32 42
120+
121+
query +ensure:index_scan
122+
select * from t3 where v1 = 1 and v2 = 21 and v3 = 31;
123+
----
124+
1 21 31 41
125+
126+
query +ensure:index_scan
127+
select * from t3 where v1 = 1 and v2 = 21 and v3 = 31 and v4 = 41;
128+
----
129+
1 21 31 41
130+
131+
query +ensure:seq_scan
132+
select * from t3 where v2 = 22;
133+
----
134+
1 22 31 42
135+
2 22 33 43
136+
137+
# Create another table
138+
statement ok
139+
create table t4(v1 int, v2 int, v3 int);
140+
141+
query
142+
insert into t4 values (1, 21, 32), (1, 22, 33), (2, 23, 34), (3, 24, 35), (4, 25, 38);
143+
----
144+
5
145+
146+
# Build index
147+
statement ok
148+
create index t4v1v2 on t4(v1, v2);
149+
150+
statement ok
151+
create index t4v3 on t4(v3);
152+
153+
query +ensure:index_scan
154+
select * from t4 where v1 = 1;
155+
----
156+
1 21 32
157+
1 22 33
158+
159+
# Insert elements
160+
query
161+
insert into t4 values (1, 23, 36), (1, 24, 37), (3, 25, 39);
162+
----
163+
3
164+
165+
query +ensure:index_scan
166+
select * from t4 where v1 = 1;
167+
----
168+
1 21 32
169+
1 22 33
170+
1 23 36
171+
1 24 37
172+
173+
query +ensure:index_scan
174+
select * from t4 where v3 = 39;
175+
----
176+
3 25 39

0 commit comments

Comments
 (0)