s26 p3: added support for composite key indexes in DDL (#875)

stephentea · chopradish · web-flow · commit c26d28cf07ac · 2026-03-16T13:32:59.000-04:00
* s26 p3: added support for composite key indexes in DDL

* (fmt): add newline at end of file

* s26-p3: added comments to ddl file

* s26-p3: added new composite key index scan test case

* s26-p3: formatting for ddl

---------

Co-authored-by: Aditya Chopra &lt;adeecc11@gmail.com&gt;
diff --git a/src/common/bustub_ddl.cpp b/src/common/bustub_ddl.cpp
@@ -6,7 +6,7 @@
 //
 // Identification: src/common/bustub_ddl.cpp
 //
-// Copyright (c) 2015-2025, Carnegie Mellon University Database Group
+// Copyright (c) 2015-2026, Carnegie Mellon University Database Group
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,6 +37,7 @@
 #include "common/util/string_util.h"
 #include "concurrency/lock_manager.h"
 #include "concurrency/transaction.h"
+#include "container/hash/hash_function.h"
 #include "execution/execution_engine.h"
 #include "execution/executor_context.h"
 #include "execution/executors/mock_scan_executor.h"
@@ -50,6 +51,7 @@
 #include "recovery/log_manager.h"
 #include "storage/disk/disk_manager.h"
 #include "storage/disk/disk_manager_memory.h"
+#include "storage/index/generic_key.h"
 #include "type/value_factory.h"
 
 namespace bustub {
@@ -69,18 +71,39 @@ void BusTubInstance::HandleCreateStatement(Transaction *txn, const CreateStateme
     }
     auto key_schema = Schema::CopySchema(&info->schema_, col_ids);
 
-    // TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
-    // and create index with different key type that can hold multiple keys based on number of index columns.
-    //
-    // You can also create clustered index that directly stores value inside the index by modifying the value type.
-
-    if (col_ids.empty() || col_ids.size() > 2) {
-      throw NotImplementedException("only support creating index with exactly one or two columns");
+    if (col_ids.empty()) {
+      throw NotImplementedException("Primary key cannot be empty");
     }
 
-    index = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.table_ + "_pk", stmt.table_, info->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, true);
+    // We compute the size (in bytes) of the index key
+    uint32_t key_size = col_ids.size() * 4;
+
+    // We create an index of sufficient size depending on the key size.
+    //! NOTE: Currently, we support key sizes of at most 64 bytes.
+    //!       This can be easily extended to support larger key sizes.
+    if (key_size <= 4) {
+      index = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(txn, stmt.table_ + "_pk", stmt.table_,
+                                                                              info->schema_, key_schema, col_ids, 4,
+                                                                              HashFunction<GenericKey<4>>{}, true);
+    } else if (key_size <= 8) {
+      index = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(txn, stmt.table_ + "_pk", stmt.table_,
+                                                                              info->schema_, key_schema, col_ids, 8,
+                                                                              HashFunction<GenericKey<8>>{}, true);
+    } else if (key_size <= 16) {
+      index = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(txn, stmt.table_ + "_pk", stmt.table_,
+                                                                                info->schema_, key_schema, col_ids, 16,
+                                                                                HashFunction<GenericKey<16>>{}, true);
+    } else if (key_size <= 32) {
+      index = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(txn, stmt.table_ + "_pk", stmt.table_,
+                                                                                info->schema_, key_schema, col_ids, 32,
+                                                                                HashFunction<GenericKey<32>>{}, true);
+    } else if (key_size <= 64) {
+      index = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(txn, stmt.table_ + "_pk", stmt.table_,
+                                                                                info->schema_, key_schema, col_ids, 64,
+                                                                                HashFunction<GenericKey<64>>{}, true);
+    } else {
+      throw NotImplementedException("Unsupported: primary key size exceeds 64 bytes");
+    }
   }
   l.unlock();
 
@@ -108,41 +131,56 @@ void BusTubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
   }
   auto key_schema = Schema::CopySchema(&stmt.table_->schema_, col_ids);
 
-  // TODO(spring2023): If you want to support composite index key for leaderboard optimization, remove this assertion
-  // and create index with different key type that can hold multiple keys based on number of index columns.
-  //
-  // You can also create clustered index that directly stores value inside the index by modifying the value type.
-
-  if (col_ids.empty() || col_ids.size() > 2) {
-    throw NotImplementedException("only support creating index with exactly one or two columns");
+  if (col_ids.empty()) {
+    throw NotImplementedException("Index columns cannot be empty");
   }
 
   std::unique_lock<std::shared_mutex> l(catalog_lock_);
   std::shared_ptr<IndexInfo> info = nullptr;
 
-  if (stmt.index_type_.empty()) {
-    info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, false);  // create default index
-  } else if (stmt.index_type_ == "hash") {
-    info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
-  } else if (stmt.index_type_ == "bplustree") {
-    info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex);
+  IndexType index_type = IndexType::BPlusTreeIndex;  // Default
+  if (stmt.index_type_ == "hash") {
+    index_type = IndexType::HashTableIndex;
+  } else if (stmt.index_type_ == "bplustree" || stmt.index_type_.empty()) {
+    index_type = IndexType::BPlusTreeIndex;
   } else if (stmt.index_type_ == "stl_ordered") {
-    info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex);
+    index_type = IndexType::STLOrderedIndex;
   } else if (stmt.index_type_ == "stl_unordered") {
-    info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
-        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
-        IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex);
+    index_type = IndexType::STLUnorderedIndex;
   } else {
     UNIMPLEMENTED("unsupported index type " + stmt.index_type_);
   }
+
+  // We compute the size (in bytes) of the index key
+  uint32_t key_size = col_ids.size() * 4;
+
+  // We create an index of sufficient size depending on the key size.
+  //! NOTE: Currently, we support key sizes of at most 64 bytes.
+  //!       This can be easily extended to support larger key sizes.
+  if (key_size <= 4) {
+    info = catalog_->CreateIndex<GenericKey<4>, RID, GenericComparator<4>>(
+        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 4,
+        HashFunction<GenericKey<4>>{}, false, index_type);
+  } else if (key_size <= 8) {
+    info = catalog_->CreateIndex<GenericKey<8>, RID, GenericComparator<8>>(
+        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 8,
+        HashFunction<GenericKey<8>>{}, false, index_type);
+  } else if (key_size <= 16) {
+    info = catalog_->CreateIndex<GenericKey<16>, RID, GenericComparator<16>>(
+        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 16,
+        HashFunction<GenericKey<16>>{}, false, index_type);
+  } else if (key_size <= 32) {
+    info = catalog_->CreateIndex<GenericKey<32>, RID, GenericComparator<32>>(
+        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 32,
+        HashFunction<GenericKey<32>>{}, false, index_type);
+  } else if (key_size <= 64) {
+    info = catalog_->CreateIndex<GenericKey<64>, RID, GenericComparator<64>>(
+        txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, 64,
+        HashFunction<GenericKey<64>>{}, false, index_type);
+  } else {
+    throw NotImplementedException("Unsupported: index key size exceeds 64 bytes");
+  }
+
   l.unlock();
 
   if (info == nullptr) {
diff --git a/test/sql/p3.22-composite-key-index-scan.slt b/test/sql/p3.22-composite-key-index-scan.slt
@@ -0,0 +1,176 @@
+# 6 pts
+
+statement ok
+set force_optimizer_starter_rule=yes
+
+# Create a table
+statement ok
+create table t1(v1 int, v2 int, v3 int);
+
+query
+insert into t1 values (1, 50, 645), (2, 40, 721), (4, 20, 445), (5, 10, 445), (3, 30, 645);
+----
+5
+
+# Build index
+statement ok
+create index t1v1v2 on t1(v1, v2);
+
+statement ok
+explain select * from t1 where v1 = 1 and v2 = 50;
+
+query +ensure:index_scan
+select * from t1 where v1 = 1 and v2 = 50;
+----
+1 50 645
+
+statement ok
+explain select * from t1 where v1 = 1;
+
+query +ensure:index_scan
+select * from t1 where v1 = 1;
+----
+1 50 645
+
+query +ensure:index_scan
+select * from t1 where v1 = 0;
+----
+
+query +ensure:index_scan
+select * from t1 where v1 = 5 and v2 = 10 and v3 = 445;
+----
+5 10 445
+
+# Create another table
+statement ok
+create table t2(v1 int, v2 int, v3 int, v4 int);
+
+query
+insert into t2 values (1, 50, 6, 7), (2, 10, 8, 9), (4, 20, 7, 11), (5, 40, 3, 15), (3, 30, 100, 200);
+----
+5
+
+# Build index
+statement ok
+create index t2v2 on t2(v2);
+
+statement ok
+create index t2v3v4 on t2(v3, v4);
+
+query +ensure:index_scan
+select * from t2 where v2 = 50;
+----
+1 50 6 7
+
+query +ensure:index_scan
+select * from t2 where v3 = 8;
+----
+2 10 8 9
+
+query +ensure:index_scan
+select * from t2 where v3 = 8 and v4 = 9;
+----
+2 10 8 9
+
+query +ensure:seq_scan
+select * from t2 where v1 = 5;
+----
+5 40 3 15
+
+query +ensure:seq_scan
+select * from t2 where v4 = 15;
+----
+5 40 3 15
+
+query +ensure:index_scan
+select * from t2 where v2 = 20 and v3 = 7;
+----
+4 20 7 11
+
+query +ensure:index_scan
+select * from t2 where v1 = 4 and v3 = 7;
+----
+4 20 7 11
+
+# Create another table
+statement ok
+create table t3(v1 int, v2 int, v3 int, v4 int);
+
+# Build index
+statement ok
+create index t3v1v2v3 on t3(v1, v2, v3);
+
+query
+insert into t3 values (1, 21, 31, 41), (1, 21, 32, 42), (1, 22, 31, 42), (2, 22, 33, 43), (3, 23, 34, 44);
+----
+5
+
+query +ensure:index_scan
+select * from t3 where v1 = 1;
+----
+1 21 31 41
+1 21 32 42
+1 22 31 42
+
+query +ensure:index_scan
+select * from t3 where v1 = 1 and v2 = 21;
+----
+1 21 31 41
+1 21 32 42
+
+query +ensure:index_scan
+select * from t3 where v1 = 1 and v2 = 21 and v3 = 31;
+----
+1 21 31 41
+
+query +ensure:index_scan
+select * from t3 where v1 = 1 and v2 = 21 and v3 = 31 and v4 = 41;
+----
+1 21 31 41
+
+query +ensure:seq_scan
+select * from t3 where v2 = 22;
+----
+1 22 31 42
+2 22 33 43
+
+# Create another table
+statement ok
+create table t4(v1 int, v2 int, v3 int);
+
+query
+insert into t4 values (1, 21, 32), (1, 22, 33), (2, 23, 34), (3, 24, 35), (4, 25, 38);
+----
+5
+
+# Build index
+statement ok
+create index t4v1v2 on t4(v1, v2);
+
+statement ok
+create index t4v3 on t4(v3);
+
+query +ensure:index_scan
+select * from t4 where v1 = 1;
+----
+1 21 32
+1 22 33
+
+# Insert elements
+query
+insert into t4 values (1, 23, 36), (1, 24, 37), (3, 25, 39);
+----
+3
+
+query +ensure:index_scan
+select * from t4 where v1 = 1;
+----
+1 21 32
+1 22 33
+1 23 36
+1 24 37
+
+query +ensure:index_scan
+select * from t4 where v3 = 39;
+----
+3 25 39