Skip to content

Commit a5c45ee

Browse files
authored
feat: refactor type system and add vector type, support stl index (#682)
Signed-off-by: Alex Chi Z <[email protected]>
1 parent 82912b1 commit a5c45ee

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+769
-167
lines changed

src/binder/bind_create.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "fmt/ranges.h"
4747
#include "nodes/nodes.hpp"
4848
#include "nodes/primnodes.hpp"
49+
#include "nodes/value.hpp"
4950
#include "pg_definitions.hpp"
5051
#include "postgres_parser.hpp"
5152
#include "type/type_id.h"
@@ -86,6 +87,16 @@ auto Binder::BindColumnDefinition(duckdb_libpgquery::PGColumnDef *cdef) -> Colum
8687
return {colname, TypeId::VARCHAR, varchar_max_length};
8788
}
8889

90+
if (name == "vector") {
91+
auto exprs = BindExpressionList(cdef->typeName->typmods);
92+
if (exprs.size() != 1) {
93+
throw bustub::Exception("should specify vector length");
94+
}
95+
const auto &vector_length_val = dynamic_cast<const BoundConstant &>(*exprs[0]);
96+
uint32_t vector_length = std::stoi(vector_length_val.ToString());
97+
return {colname, TypeId::VECTOR, vector_length};
98+
}
99+
89100
throw NotImplementedException(fmt::format("unsupported type: {}", name));
90101
}
91102

@@ -156,19 +167,51 @@ auto Binder::BindCreate(duckdb_libpgquery::PGCreateStmt *pg_stmt) -> std::unique
156167

157168
auto Binder::BindIndex(duckdb_libpgquery::PGIndexStmt *stmt) -> std::unique_ptr<IndexStatement> {
158169
std::vector<std::unique_ptr<BoundColumnRef>> cols;
170+
std::vector<std::string> col_options;
159171
auto table = BindBaseTableRef(stmt->relation->relname, std::nullopt);
160172

161173
for (auto cell = stmt->indexParams->head; cell != nullptr; cell = cell->next) {
162174
auto index_element = reinterpret_cast<duckdb_libpgquery::PGIndexElem *>(cell->data.ptr_value);
163175
if (index_element->name != nullptr) {
164176
auto column_ref = ResolveColumn(*table, std::vector{std::string(index_element->name)});
165177
cols.emplace_back(std::make_unique<BoundColumnRef>(dynamic_cast<const BoundColumnRef &>(*column_ref)));
178+
std::string opt;
179+
if (index_element->opclass != nullptr) {
180+
for (auto c = index_element->opclass->head; c != nullptr; c = lnext(c)) {
181+
opt = reinterpret_cast<duckdb_libpgquery::PGValue *>(c->data.ptr_value)->val.str;
182+
break;
183+
}
184+
}
185+
col_options.emplace_back(opt);
166186
} else {
167187
throw NotImplementedException("create index by expr is not supported yet");
168188
}
169189
}
170190

171-
return std::make_unique<IndexStatement>(stmt->idxname, std::move(table), std::move(cols));
191+
std::string index_type;
192+
193+
if (stmt->accessMethod != nullptr) {
194+
index_type = stmt->accessMethod;
195+
if (index_type == "art") {
196+
index_type = "";
197+
}
198+
}
199+
200+
std::vector<std::pair<std::string, int>> options;
201+
202+
if (stmt->options != nullptr) {
203+
for (auto c = stmt->options->head; c != nullptr; c = lnext(c)) {
204+
auto def_elem = reinterpret_cast<duckdb_libpgquery::PGDefElem *>(c->data.ptr_value);
205+
int val;
206+
if (def_elem->arg != nullptr) {
207+
val = reinterpret_cast<duckdb_libpgquery::PGValue *>(def_elem->arg)->val.ival;
208+
}
209+
options.emplace_back(def_elem->defname, val);
210+
}
211+
}
212+
213+
return std::make_unique<IndexStatement>(stmt->idxname, std::move(table), std::move(cols), std::move(index_type),
214+
std::move(col_options), std::move(options));
172215
}
173216

174217
} // namespace bustub

src/binder/bind_select.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <iterator>
33
#include <memory>
44
#include <optional>
5+
#include <string>
56
#include <vector>
67
#include "binder/binder.h"
78
#include "binder/bound_expression.h"
@@ -467,6 +468,10 @@ auto Binder::BindConstant(duckdb_libpgquery::PGAConst *node) -> std::unique_ptr<
467468
BUSTUB_ENSURE(val.val.ival <= BUSTUB_INT32_MAX, "value out of range");
468469
return std::make_unique<BoundConstant>(ValueFactory::GetIntegerValue(static_cast<int32_t>(val.val.ival)));
469470
}
471+
case duckdb_libpgquery::T_PGFloat: {
472+
double parsed_val = std::stod(std::string(val.val.str));
473+
return std::make_unique<BoundConstant>(ValueFactory::GetDecimalValue(parsed_val));
474+
}
470475
case duckdb_libpgquery::T_PGString: {
471476
return std::make_unique<BoundConstant>(ValueFactory::GetVarcharValue(val.val.str));
472477
}

src/binder/statement/index_statement.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77
namespace bustub {
88

99
IndexStatement::IndexStatement(std::string index_name, std::unique_ptr<BoundBaseTableRef> table,
10-
std::vector<std::unique_ptr<BoundColumnRef>> cols)
10+
std::vector<std::unique_ptr<BoundColumnRef>> cols, std::string index_type,
11+
std::vector<std::string> col_options, std::vector<std::pair<std::string, int>> options)
1112
: BoundStatement(StatementType::INDEX_STATEMENT),
1213
index_name_(std::move(index_name)),
1314
table_(std::move(table)),
14-
cols_(std::move(cols)) {}
15+
cols_(std::move(cols)),
16+
index_type_(std::move(index_type)),
17+
col_options_(std::move(col_options)),
18+
options_(std::move(options)) {}
1519

1620
auto IndexStatement::ToString() const -> std::string {
17-
return fmt::format("BoundIndex {{ index_name={}, table={}, cols={} }}", index_name_, *table_, cols_);
21+
return fmt::format("BoundIndex {{ index_name={}, table={}, cols={}, using={}, col_options=[{}], options=[{}] }}",
22+
index_name_, *table_, cols_, index_type_, fmt::join(col_options_, ","), fmt::join(options_, ","));
1823
}
1924

2025
} // namespace bustub

src/catalog/column.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,28 @@
1414

1515
#include <sstream>
1616
#include <string>
17+
#include "type/type_id.h"
1718

1819
namespace bustub {
1920

2021
auto Column::ToString(bool simplified) const -> std::string {
2122
if (simplified) {
2223
std::ostringstream os;
2324
os << column_name_ << ":" << Type::TypeIdToString(column_type_);
25+
if (column_type_ == VARCHAR) {
26+
os << "(" << length_ << ")";
27+
}
28+
if (column_type_ == VECTOR) {
29+
os << "(" << length_ / sizeof(double) << ")";
30+
}
2431
return (os.str());
2532
}
2633

2734
std::ostringstream os;
2835

2936
os << "Column[" << column_name_ << ", " << Type::TypeIdToString(column_type_) << ", "
3037
<< "Offset:" << column_offset_ << ", ";
31-
32-
if (IsInlined()) {
33-
os << "FixedLength:" << fixed_length_;
34-
} else {
35-
os << "VarLength:" << variable_length_;
36-
}
38+
os << "Length:" << length_;
3739
os << "]";
3840
return (os.str());
3941
}

src/catalog/schema.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ Schema::Schema(const std::vector<Column> &columns) {
2929
}
3030
// set column offset
3131
column.column_offset_ = curr_offset;
32-
curr_offset += column.GetFixedLength();
32+
if (column.IsInlined()) {
33+
curr_offset += column.GetStorageSize();
34+
} else {
35+
curr_offset += sizeof(uint32_t);
36+
}
3337

3438
// add column
3539
this->columns_.push_back(column);

src/common/bustub_ddl.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,37 @@ void BustubInstance::HandleIndexStatement(Transaction *txn, const IndexStatement
106106
}
107107

108108
std::unique_lock<std::shared_mutex> l(catalog_lock_);
109-
auto info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
110-
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
111-
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
109+
IndexInfo *info = nullptr;
110+
111+
if (stmt.index_type_.empty()) {
112+
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
113+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
114+
IntegerHashFunctionType{}, false); // create default index
115+
} else if (stmt.index_type_ == "hash") {
116+
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
117+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
118+
IntegerHashFunctionType{}, false, IndexType::HashTableIndex);
119+
} else if (stmt.index_type_ == "bplustree") {
120+
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
121+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
122+
IntegerHashFunctionType{}, false, IndexType::BPlusTreeIndex);
123+
} else if (stmt.index_type_ == "stl_ordered") {
124+
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
125+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
126+
IntegerHashFunctionType{}, false, IndexType::STLOrderedIndex);
127+
} else if (stmt.index_type_ == "stl_unordered") {
128+
info = catalog_->CreateIndex<IntegerKeyType, IntegerValueType, IntegerComparatorType>(
129+
txn, stmt.index_name_, stmt.table_->table_, stmt.table_->schema_, key_schema, col_ids, TWO_INTEGER_SIZE,
130+
IntegerHashFunctionType{}, false, IndexType::STLUnorderedIndex);
131+
} else {
132+
UNIMPLEMENTED("unsupported index type " + stmt.index_type_);
133+
}
112134
l.unlock();
113135

114136
if (info == nullptr) {
115137
throw bustub::Exception("Failed to create index");
116138
}
117-
WriteOneCell(fmt::format("Index created with id = {}", info->index_oid_), writer);
139+
WriteOneCell(fmt::format("Index created with id = {} with type = {}", info->index_oid_, info->index_type_), writer);
118140
}
119141

120142
void BustubInstance::HandleExplainStatement(Transaction *txn, const ExplainStatement &stmt, ResultWriter &writer) {

src/execution/plan_node.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ auto SeqScanPlanNode::InferScanSchema(const BoundBaseTableRef &table) -> Schema
1818
std::vector<Column> schema;
1919
for (const auto &column : table.schema_.GetColumns()) {
2020
auto col_name = fmt::format("{}.{}", table.GetBoundTableName(), column.GetName());
21-
schema.emplace_back(Column(col_name, column));
21+
schema.emplace_back(col_name, column);
2222
}
2323
return Schema(schema);
2424
}
@@ -38,12 +38,7 @@ auto ProjectionPlanNode::InferProjectionSchema(const std::vector<AbstractExpress
3838
std::vector<Column> schema;
3939
for (const auto &expr : expressions) {
4040
auto type_id = expr->GetReturnType();
41-
if (type_id != TypeId::VARCHAR) {
42-
schema.emplace_back("<unnamed>", type_id);
43-
} else {
44-
// TODO(chi): infer the correct VARCHAR length. Maybe it doesn't matter for executors?
45-
schema.emplace_back("<unnamed>", type_id, VARCHAR_DEFAULT_LENGTH);
46-
}
41+
schema.emplace_back(expr->GetReturnType().WithColumnName("<unnamed>"));
4742
}
4843
return Schema(schema);
4944
}
@@ -55,7 +50,7 @@ auto ProjectionPlanNode::RenameSchema(const Schema &schema, const std::vector<st
5550
}
5651
size_t idx = 0;
5752
for (const auto &column : schema.GetColumns()) {
58-
output.emplace_back(Column(col_names[idx++], column));
53+
output.emplace_back(col_names[idx++], column);
5954
}
6055
return Schema(output);
6156
}
@@ -66,30 +61,21 @@ auto AggregationPlanNode::InferAggSchema(const std::vector<AbstractExpressionRef
6661
std::vector<Column> output;
6762
output.reserve(group_bys.size() + aggregates.size());
6863
for (const auto &column : group_bys) {
69-
// TODO(chi): correctly process VARCHAR column
70-
if (column->GetReturnType() == TypeId::VARCHAR) {
71-
output.emplace_back(Column("<unnamed>", column->GetReturnType(), 128));
72-
} else {
73-
output.emplace_back(Column("<unnamed>", column->GetReturnType()));
74-
}
64+
output.emplace_back(column->GetReturnType().WithColumnName("<unnamed>"));
7565
}
7666
for (size_t idx = 0; idx < aggregates.size(); idx++) {
7767
// TODO(chi): correctly infer agg call return type
78-
output.emplace_back(Column("<unnamed>", TypeId::INTEGER));
68+
output.emplace_back("<unnamed>", TypeId::INTEGER);
7969
}
8070
return Schema(output);
8171
}
8272

8373
auto WindowFunctionPlanNode::InferWindowSchema(const std::vector<AbstractExpressionRef> &columns) -> Schema {
8474
std::vector<Column> output;
75+
output.reserve(columns.size());
8576
// TODO(avery): correctly infer window call return type
8677
for (const auto &column : columns) {
87-
// TODO(chi): correctly process VARCHAR column
88-
if (column->GetReturnType() == TypeId::VARCHAR) {
89-
output.emplace_back(Column("<unnamed>", column->GetReturnType(), 128));
90-
} else {
91-
output.emplace_back(Column("<unnamed>", column->GetReturnType()));
92-
}
78+
output.emplace_back(column->GetReturnType().WithColumnName("<unnamed>"));
9379
}
9480
return Schema(output);
9581
}

src/include/binder/statement/index_statement.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <memory>
1111
#include <string>
12+
#include <utility>
1213
#include <vector>
1314

1415
#include "binder/bound_statement.h"
@@ -21,7 +22,8 @@ namespace bustub {
2122
class IndexStatement : public BoundStatement {
2223
public:
2324
explicit IndexStatement(std::string index_name, std::unique_ptr<BoundBaseTableRef> table,
24-
std::vector<std::unique_ptr<BoundColumnRef>> cols);
25+
std::vector<std::unique_ptr<BoundColumnRef>> cols, std::string index_type,
26+
std::vector<std::string> col_options, std::vector<std::pair<std::string, int>> options);
2527

2628
/** Name of the index */
2729
std::string index_name_;
@@ -32,6 +34,12 @@ class IndexStatement : public BoundStatement {
3234
/** Name of the columns */
3335
std::vector<std::unique_ptr<BoundColumnRef>> cols_;
3436

37+
/** Using */
38+
std::string index_type_;
39+
40+
std::vector<std::string> col_options_;
41+
std::vector<std::pair<std::string, int>> options_;
42+
3543
auto ToString() const -> std::string override;
3644
};
3745

0 commit comments

Comments
 (0)