Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit 78b85dc

Browse files
schedutronpervazea
authored andcommitted
Refactor insert plan for better error handling and more efficiency (#1063)
* Better include path checks * Fix validator script * Fix local includes in existing files * functools.reduce() instead of reduce() * Refactor insert for error handling * Add tests for exception handling * Move exception handling from planner to query transformer * Fix linting * Resolve merge conflict (remove indicators) * Extract schema from catalog instead of physical table * Fix linting * Add braces * Re-fix linting * Refactor bad insert tests * Add database deletion in test * Lint insert tests file * Add not null test * Remove obsolete commented code and add important todo * Remove exception type getter * Remove changed rows equality checks * Reduce nesting * Fix insertion segfault * Remove rows_changed variable completely * Code cleanup * Add more not null logic * Add GetColumnNames() implementation * Make specify a map * Format code * Minor test changes * Format tests * Cleanups * Add semicolon, lint * Use commit instead of abort * Make specified a set * Add missing txn commit
1 parent ed09cb8 commit 78b85dc

File tree

4 files changed

+182
-15
lines changed

4 files changed

+182
-15
lines changed

src/catalog/table_catalog.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,22 @@ TableCatalogObject::GetColumnObjects(bool cached_only) {
257257
return column_objects;
258258
}
259259

260+
/* @brief get all column objects of this table into cache
261+
* @return map from column name to cached column object
262+
*/
263+
std::unordered_map<std::string, std::shared_ptr<ColumnCatalogObject>>
264+
TableCatalogObject::GetColumnNames(bool cached_only) {
265+
if (!valid_column_objects && !cached_only) {
266+
auto column_objects = GetColumnObjects();
267+
std::unordered_map<std::string, std::shared_ptr<ColumnCatalogObject> > column_names;
268+
for (auto& pair : column_objects) {
269+
auto column = pair.second;
270+
column_names[column->GetColumnName()] = column;
271+
}
272+
}
273+
return column_names;
274+
}
275+
260276
/* @brief get column object with column id from cache
261277
* @param column_id
262278
* @param cached_only if cached only, return nullptr on a cache miss

src/optimizer/query_to_operator_transformer.cpp

Lines changed: 66 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,18 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include <cmath>
14-
#include "settings/settings_manager.h"
1514

15+
#include "catalog/column_catalog.h"
1616
#include "catalog/database_catalog.h"
17+
#include "catalog/table_catalog.h"
1718
#include "expression/expression_util.h"
1819
#include "expression/subquery_expression.h"
1920
#include "optimizer/operator_expression.h"
2021
#include "optimizer/operators.h"
2122
#include "optimizer/query_node_visitor.h"
2223
#include "optimizer/query_to_operator_transformer.h"
23-
24-
#include "planner/seq_scan_plan.h"
25-
2624
#include "parser/statements.h"
27-
28-
#include "catalog/manager.h"
25+
#include "settings/settings_manager.h"
2926

3027
using std::vector;
3128
using std::shared_ptr;
@@ -245,11 +242,71 @@ void QueryToOperatorTransformer::Visit(parser::InsertStatement *op) {
245242
op->select->Accept(this);
246243
insert_expr->PushChild(output_expr_);
247244
output_expr_ = insert_expr;
245+
return;
246+
}
247+
// column_objects represents the columns for the current table as defined in
248+
// its schema
249+
auto column_objects = target_table->GetColumnObjects();
250+
// INSERT INTO table_name VALUES (val1, val2, ...), (val_a, val_b, ...), ...
251+
if (op->columns.empty()) {
252+
for (const auto &values : op->insert_values) {
253+
if (values.size() > column_objects.size()) {
254+
throw CatalogException(
255+
"ERROR: INSERT has more expressions than target columns");
256+
} else if (values.size() < column_objects.size()) {
257+
for (oid_t i = values.size(); i != column_objects.size(); ++i) {
258+
// check whether null values are allowed in the rest of the columns
259+
if (column_objects[i]->IsNotNull()) {
260+
// TODO: Add check for default value's existence for the current
261+
// column
262+
throw CatalogException(
263+
StringUtil::Format("ERROR: null value in column \"%s\" "
264+
"violates not-null constraint",
265+
column_objects[i]->GetColumnName().c_str()));
266+
}
267+
}
268+
}
269+
}
248270
} else {
249-
auto insert_expr = std::make_shared<OperatorExpression>(
250-
LogicalInsert::make(target_table, &op->columns, &op->insert_values));
251-
output_expr_ = insert_expr;
271+
// INSERT INTO table_name (col1, col2, ...) VALUES (val1, val2, ...), ...
272+
auto num_columns = op->columns.size();
273+
for (const auto &tuple : op->insert_values) { // check size of each tuple
274+
if (tuple.size() > num_columns) {
275+
throw CatalogException(
276+
"ERROR: INSERT has more expressions than target columns");
277+
} else if (tuple.size() < num_columns) {
278+
throw CatalogException(
279+
"ERROR: INSERT has more target columns than expressions");
280+
}
281+
}
282+
283+
// set below contains names of columns mentioned in the insert statement
284+
std::unordered_set<std::string> specified;
285+
auto column_names = target_table->GetColumnNames();
286+
287+
for (const auto col : op->columns) {
288+
if (column_names.find(col) == column_names.end()) {
289+
throw CatalogException(StringUtil::Format(
290+
"ERROR: column \"%s\" of relation \"%s\" does not exist",
291+
col.c_str(), target_table->GetTableName().c_str()));
292+
}
293+
specified.insert(col);
294+
}
295+
296+
for (auto column : column_names) {
297+
// this loop checks not null constraint for unspecified columns
298+
if (specified.find(column.first) == specified.end() &&
299+
column.second->IsNotNull()) {
300+
// TODO: Add check for default value's existence for the current column
301+
throw CatalogException(StringUtil::Format(
302+
"ERROR: null value in column \"%s\" violates not-null constraint",
303+
column.first.c_str()));
304+
}
305+
}
252306
}
307+
auto insert_expr = std::make_shared<OperatorExpression>(
308+
LogicalInsert::make(target_table, &op->columns, &op->insert_values));
309+
output_expr_ = insert_expr;
253310
}
254311

255312
void QueryToOperatorTransformer::Visit(parser::DeleteStatement *op) {

src/planner/insert_plan.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ InsertPlan::InsertPlan(
6666
is_prepared_stmt = true;
6767
}
6868
}
69+
// for remaining columns, insert defaults
70+
for (uint32_t column_id = values.size(); column_id != schema_col_count;
71+
++column_id) {
72+
SetDefaultValue(column_id);
73+
}
6974
}
7075
} else {
7176
// INSERT INTO table_name (col1, col2, ...) VALUES (val1, val2, ...);

test/sql/insert_sql_test.cpp

Lines changed: 95 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <memory>
1414

1515
#include "catalog/catalog.h"
16+
#include "common/exception.h"
1617
#include "common/harness.h"
1718
#include "concurrency/transaction_manager_factory.h"
1819
#include "executor/create_executor.h"
@@ -103,6 +104,12 @@ void CreateAndLoadTable7() {
103104
TestingSQLUtil::ExecuteSQLQuery("INSERT INTO test7 VALUES (55, 8, 999);");
104105
}
105106

107+
void CreateAndLoadTable8() {
108+
// Create a table with some defaults
109+
TestingSQLUtil::ExecuteSQLQuery(
110+
"CREATE TABLE test8(num1 int, num2 int, num3 int not null);");
111+
}
112+
106113
TEST_F(InsertSQLTests, InsertOneValue) {
107114
auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
108115
auto txn = txn_manager.BeginTransaction();
@@ -204,7 +211,7 @@ TEST_F(InsertSQLTests, InsertSpecifyColumns) {
204211
catalog::Catalog::GetInstance()->CreateDatabase(DEFAULT_DB_NAME, txn);
205212
txn_manager.CommitTransaction(txn);
206213

207-
CreateAndLoadTable();
214+
CreateAndLoadTable8();
208215

209216
std::vector<ResultValue> result;
210217
std::vector<FieldInfo> tuple_descriptor;
@@ -214,7 +221,8 @@ TEST_F(InsertSQLTests, InsertSpecifyColumns) {
214221
new optimizer::Optimizer());
215222

216223
// INSERT a tuple with columns specified
217-
std::string query("INSERT INTO test (b, a, c) VALUES (99, 8, 111);");
224+
std::string query(
225+
"INSERT INTO test8 (num3, num2, num1) VALUES (99, 8, 111);");
218226

219227
txn = txn_manager.BeginTransaction();
220228
auto plan = TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn);
@@ -228,12 +236,12 @@ TEST_F(InsertSQLTests, InsertSpecifyColumns) {
228236

229237
// SELECT to find out if the tuple is correctly inserted
230238
TestingSQLUtil::ExecuteSQLQueryWithOptimizer(
231-
optimizer, "SELECT * FROM test WHERE a=8", result, tuple_descriptor,
239+
optimizer, "SELECT * FROM test8 WHERE num2=8", result, tuple_descriptor,
232240
rows_changed, error_message);
233241
EXPECT_EQ(3, result.size());
234-
EXPECT_EQ("8", TestingSQLUtil::GetResultValueAsString(result, 0));
235-
EXPECT_EQ("99", TestingSQLUtil::GetResultValueAsString(result, 1));
236-
EXPECT_EQ("111", TestingSQLUtil::GetResultValueAsString(result, 2));
242+
EXPECT_EQ("111", TestingSQLUtil::GetResultValueAsString(result, 0));
243+
EXPECT_EQ("8", TestingSQLUtil::GetResultValueAsString(result, 1));
244+
EXPECT_EQ("99", TestingSQLUtil::GetResultValueAsString(result, 2));
237245

238246
// free the database just created
239247
txn = txn_manager.BeginTransaction();
@@ -620,6 +628,87 @@ TEST_F(InsertSQLTests, NonExistentTable) {
620628
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
621629
peloton::CatalogException);
622630
txn_manager.CommitTransaction(txn);
631+
632+
// free the database just created
633+
txn = txn_manager.BeginTransaction();
634+
catalog::Catalog::GetInstance()->DropDatabaseWithName(DEFAULT_DB_NAME, txn);
635+
txn_manager.CommitTransaction(txn);
636+
}
637+
638+
TEST_F(InsertSQLTests, BadInserts) {
639+
auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance();
640+
auto txn = txn_manager.BeginTransaction();
641+
catalog::Catalog::GetInstance()->CreateDatabase(DEFAULT_DB_NAME, txn);
642+
txn_manager.CommitTransaction(txn);
643+
644+
CreateAndLoadTable8();
645+
646+
std::vector<ResultValue> result;
647+
std::vector<FieldInfo> tuple_descriptor;
648+
std::string error_message;
649+
std::unique_ptr<optimizer::AbstractOptimizer> optimizer(
650+
new optimizer::Optimizer());
651+
// Insert a tuple with more values than target columns
652+
std::string query("INSERT INTO test8 VALUES(1, 2, 3, 4);");
653+
txn = txn_manager.BeginTransaction();
654+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
655+
peloton::CatalogException);
656+
txn_manager.CommitTransaction(txn);
657+
658+
// Insert a tuple with more target columns (implicit) than values, violating
659+
// not null constraint for num3
660+
query = "INSERT INTO test VALUES(3, 4);";
661+
txn = txn_manager.BeginTransaction();
662+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
663+
peloton::CatalogException);
664+
txn_manager.CommitTransaction(txn);
665+
666+
// Insert a tuple with more target columns than values
667+
query = "INSERT INTO test8(num1, num3) VALUES(3);";
668+
txn = txn_manager.BeginTransaction();
669+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
670+
peloton::CatalogException);
671+
txn_manager.CommitTransaction(txn);
672+
673+
// Insert a tuple with more values than target columns (multiple tuples)
674+
query = "INSERT INTO test8(num1, num3) VALUES (1, 2), (3, 4), (3, 4, 5);";
675+
txn = txn_manager.BeginTransaction();
676+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
677+
peloton::CatalogException);
678+
txn_manager.CommitTransaction(txn);
679+
680+
// Insert a tuple with more target columns than values (multiple tuples)
681+
query = "INSERT INTO test8(num1, num3) VALUES (6, 7), (5);";
682+
txn = txn_manager.BeginTransaction();
683+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
684+
peloton::CatalogException);
685+
txn_manager.CommitTransaction(txn);
686+
687+
// Insert a tuple with a nonexistent target column
688+
query = "INSERT INTO test8(numx) VALUES(3);";
689+
txn = txn_manager.BeginTransaction();
690+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
691+
peloton::CatalogException);
692+
txn_manager.CommitTransaction(txn);
693+
694+
// Insert a tuple with a nonexistent target column (non-singleton tuple)
695+
query = "INSERT INTO test8(num1, num4) VALUES(3, 4);";
696+
txn = txn_manager.BeginTransaction();
697+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
698+
peloton::CatalogException);
699+
txn_manager.CommitTransaction(txn);
700+
701+
// Insert a tuple row with not-null field (num3) unspecified
702+
query = "INSERT INTO test8 VALUES(1, 2)";
703+
txn = txn_manager.BeginTransaction();
704+
EXPECT_THROW(TestingSQLUtil::GeneratePlanWithOptimizer(optimizer, query, txn),
705+
peloton::CatalogException);
706+
txn_manager.CommitTransaction(txn);
707+
708+
// free the database just created
709+
txn = txn_manager.BeginTransaction();
710+
catalog::Catalog::GetInstance()->DropDatabaseWithName(DEFAULT_DB_NAME, txn);
711+
txn_manager.CommitTransaction(txn);
623712
}
624713

625714
} // namespace test

0 commit comments

Comments
 (0)