Skip to content
This repository was archived by the owner on Feb 20, 2023. It is now read-only.

Commit 72bebca

Browse files
authored
Self-Driving Index/Knob Actions (#1426)
1 parent 3b07e8f commit 72bebca

36 files changed

+1132
-59
lines changed

CMakeLists.txt

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ print_sys_info("OS_NAME;OS_RELEASE;OS_VERSION;OS_PLATFORM")
152152
# CMake options, specify with -DNOISEPAGE_{option}=On.
153153
# NOISEPAGE_BUILD_BENCHMARKS : Enable building benchmarks as part of the ALL target. Default ON.
154154
# NOISEPAGE_BUILD_TESTS : Enable building tests as part of the ALL (but the Self-Driving test) target. Default ON.
155-
# NOISEPAGE_BUILD_SELF_DRIVING_TESTS : Enable building self-driving end-to-end tests. Default OFF
155+
# NOISEPAGE_BUILD_SELF_DRIVING_E2E_TESTS : Enable building self-driving end-to-end tests. Default OFF
156156
# NOISEPAGE_GENERATE_COVERAGE : Enable C++ code coverage. Default OFF.
157157
# NOISEPAGE_UNITTEST_OUTPUT_ON_FAILURE : Enable verbose unittest failures. Default OFF. Can be very verbose.
158158
# NOISEPAGE_UNITY_BUILD : Enable unity (aka jumbo) builds. Default OFF.
@@ -185,8 +185,8 @@ option(NOISEPAGE_BUILD_TESTS
185185
"Enable building tests as part of the ALL target."
186186
ON)
187187

188-
option(NOISEPAGE_BUILD_SELF_DRIVING_TESTS
189-
"Enable building self-driving tests as part of the ALL target."
188+
option(NOISEPAGE_BUILD_SELF_DRIVING_E2E_TESTS
189+
"Enable building self-driving end-to-end tests as part of the ALL target."
190190
OFF)
191191

192192
option(NOISEPAGE_GENERATE_COVERAGE
@@ -815,12 +815,12 @@ add_custom_target(jumbotests
815815
-T Test # Run tests and log it to Testing/*/Test.xml.
816816
--timeout 3000 # 3000 second timeout per test.
817817
USES_TERMINAL)
818-
add_custom_target(self_driving_test # For now, this target is specifically used for self-driving pipeline
818+
add_custom_target(self_driving_e2e_test # For now, this target is specifically used for self-driving pipeline
819819
ctest
820820
--resource-spec-file ${BUILD_SUPPORT_DATA_DIR}/ctest_resource_specs.json # For controlling conflicting tests.
821821
${UNITTEST_OUTPUT_ON_FAILURE} # Whether to print output when a test fails.
822822
-j ${NOISEPAGE_TEST_PARALLELISM} # Maximum number of parallel jobs.
823-
-L self_driving_test # Run all tests that have a label like this. See footgun warning above.
823+
-L self_driving_e2e_test # Run all tests that have a label like this. See footgun warning above.
824824
--no-compress-output # Output verbosely so that it can be logged.
825825
-T Test # Run tests and log it to Testing/*/Test.xml.
826826
--timeout 3000 # 3000 second timeout per test.
@@ -907,8 +907,9 @@ function(add_noisepage_test
907907
add_dependencies(${TEST_LABEL} ${TEST_NAME})
908908
endfunction()
909909

910-
# NOTE:Self-driving End-To-End tests are not included below because the test require dependencies to other part of the system, thus could not be run as standalone tests.
911-
# They will be added to a separate test target: self_driving_test. See NOISEPAGE_SELF_DRIVING_TEST_SOURCES for details
910+
# NOTE:Self-driving End-To-End tests are not included below because the test require dependencies to other part of
911+
# the system, thus could not be run as standalone tests. They will be added to a separate test target:
912+
# self_driving_e2e_test. See NOISEPAGE_SELF_DRIVING_E2E_TEST_SOURCES for details.
912913
file(GLOB_RECURSE NOISEPAGE_TEST_SOURCES
913914
"test/binder/*.cpp"
914915
"test/catalog/*.cpp"
@@ -921,6 +922,7 @@ file(GLOB_RECURSE NOISEPAGE_TEST_SOURCES
921922
"test/optimizer/*.cpp"
922923
"test/parser/*.cpp"
923924
"test/planner/*.cpp"
925+
"test/self_driving/*.cpp"
924926
"test/settings/*.cpp"
925927
"test/storage/*.cpp"
926928
"test/traffic_cop/*.cpp"
@@ -945,22 +947,22 @@ foreach (NOISEPAGE_TEST_CPP ${NOISEPAGE_TEST_SOURCES})
945947
endforeach ()
946948

947949

948-
file(GLOB_RECURSE NOISEPAGE_SELF_DRIVING_TEST_SOURCES
949-
"test/self_driving/*.cpp"
950+
file(GLOB_RECURSE NOISEPAGE_SELF_DRIVING_E2E_TEST_SOURCES
951+
"test/self_driving_e2e/*.cpp"
950952
)
951953

952-
foreach (NOISEPAGE_TEST_CPP ${NOISEPAGE_SELF_DRIVING_TEST_SOURCES})
954+
foreach (NOISEPAGE_TEST_CPP ${NOISEPAGE_SELF_DRIVING_E2E_TEST_SOURCES})
953955
file(RELATIVE_PATH NOISEPAGE_TEST_CPP_REL "${PROJECT_SOURCE_DIR}/test" ${NOISEPAGE_TEST_CPP})
954956
get_filename_component(NOISEPAGE_TEST_DIR ${NOISEPAGE_TEST_CPP_REL} DIRECTORY)
955957
get_filename_component(NOISEPAGE_TEST ${NOISEPAGE_TEST_CPP} NAME_WE)
956958

957-
if (NOT ${NOISEPAGE_BUILD_SELF_DRIVING_TESTS})
959+
if (NOT ${NOISEPAGE_BUILD_SELF_DRIVING_E2E_TESTS})
958960
set(EXCLUDE_OR_NOT "EXCLUDE_ALL")
959961
else ()
960962
set(EXCLUDE_OR_NOT "NO_EXCLUDE")
961963
endif ()
962964

963-
add_noisepage_test(${NOISEPAGE_TEST} ${NOISEPAGE_TEST_CPP} self_driving_test ${EXCLUDE_OR_NOT} NO_UNITY)
965+
add_noisepage_test(${NOISEPAGE_TEST} ${NOISEPAGE_TEST_CPP} self_driving_e2e_test ${EXCLUDE_OR_NOT} NO_UNITY)
964966
endforeach ()
965967

966968

@@ -1024,6 +1026,7 @@ add_jumbotest("test/network" "network_test;")
10241026
add_jumbotest("test/optimizer" "hyperloglog_test;")
10251027
add_jumbotest("test/parser" "")
10261028
add_jumbotest("test/planner" "")
1029+
add_jumbotest("test/self_driving" "")
10271030
add_jumbotest("test/settings" "")
10281031
add_jumbotest("test/storage" "block_access_controller_test;block_compactor_test;bwtree_test;bwtree_index_test;data_table_test;data_table_concurrent_test;hash_index_test;large_garbage_collector_test;log_test;tuple_access_strategy_test;")
10291032
add_jumbotest("test/traffic_cop" "traffic_cop_test;")

Jenkinsfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ pipeline {
495495
sh script: '''
496496
cd build
497497
export BUILD_ABS_PATH=`pwd`
498-
timeout 10m ninja self_driving_test
499-
''', label: 'Running self-driving test'
498+
timeout 10m ninja self_driving_e2e_test
499+
''', label: 'Running self-driving end-to-end test'
500500

501501
sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.'
502502
}

script/model/training_util/global_data_constructing_util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def get_data(input_path, mini_model_map, model_results_path, warmup_period, use_
3232
:return: (GlobalResourceData list, GlobalImpactData list)
3333
"""
3434
cache_file = input_path + '/global_model_data.pickle'
35-
headers_file = input_path + '/global_model_headers.pickle'
3635
if os.path.exists(cache_file):
3736
with open(cache_file, 'rb') as pickle_file:
3837
resource_data_list, impact_data_list, data_info.RAW_FEATURES_CSV_INDEX, data_info.RAW_TARGET_CSV_INDEX, data_info.INPUT_CSV_INDEX, data_info.TARGET_CSV_INDEX = pickle.load(pickle_file)

src/include/optimizer/index_util.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,11 @@ class IndexUtil {
7474
* @param allow_cves Allow CVEs
7575
* @param scan_type IndexScanType to utilize
7676
* @param bounds Relevant bounds for the index scan
77-
* @returns Whether index can be used
77+
* @returns The first element represents whether the index can be used, the second element represents whether
78+
* all the predicates' indexable columns that we suport are covered by the given index (schema). Note that a "true"
79+
* value for the second element does not guarantee an optimal index.
7880
*/
79-
static bool SatisfiesPredicateWithIndex(
81+
static std::pair<bool, bool> SatisfiesPredicateWithIndex(
8082
catalog::CatalogAccessor *accessor, catalog::table_oid_t tbl_oid, const std::string &tbl_alias,
8183
catalog::index_oid_t index_oid, const std::vector<AnnotatedExpression> &predicates, bool allow_cves,
8284
planner::IndexScanType *scan_type,
@@ -94,9 +96,10 @@ class IndexUtil {
9496
* @param allow_cves Allow ColumnValueExpressions
9597
* @param idx_scan_type IndexScanType to utilize
9698
* @param bounds Relevant bounds for the index scan
97-
* @returns Whether predicate can be utilized
99+
* @returns The first element represents whether predicate can be utilized, the second element represents whether
100+
* all the predicates' indexable columns that we suport are covered by the given index (schema)
98101
*/
99-
static bool CheckPredicates(
102+
static std::pair<bool, bool> CheckPredicates(
100103
const catalog::IndexSchema &schema, catalog::table_oid_t tbl_oid, const std::string &tbl_alias,
101104
const std::unordered_map<catalog::col_oid_t, catalog::indexkeycol_oid_t> &lookup,
102105
const std::unordered_set<catalog::col_oid_t> &mapped_cols, const std::vector<AnnotatedExpression> &predicates,

src/include/optimizer/physical_operators.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,14 @@ class IndexScan : public OperatorNodeContents<IndexScan> {
143143
* @param is_for_update whether the scan is used for update
144144
* @param scan_type IndexScanType
145145
* @param bounds Bounds for IndexScan
146+
* @param cover_all_columns whether the index covers all indexable columns (that we support) in the predicates
146147
* @return an IndexScan operator
147148
*/
148149
static Operator Make(catalog::db_oid_t database_oid, catalog::table_oid_t tbl_oid, catalog::index_oid_t index_oid,
149150
std::vector<AnnotatedExpression> &&predicates, bool is_for_update,
150151
planner::IndexScanType scan_type,
151-
std::unordered_map<catalog::indexkeycol_oid_t, std::vector<planner::IndexExpression>> bounds);
152+
std::unordered_map<catalog::indexkeycol_oid_t, std::vector<planner::IndexExpression>> bounds,
153+
bool cover_all_columns);
152154

153155
/**
154156
* Copy
@@ -197,6 +199,11 @@ class IndexScan : public OperatorNodeContents<IndexScan> {
197199
return bounds_;
198200
}
199201

202+
/**
203+
* @return whether the index covers all predicate columns
204+
*/
205+
bool GetCoverAllColumns() const { return cover_all_columns_; }
206+
200207
private:
201208
/**
202209
* OID of the database
@@ -232,6 +239,12 @@ class IndexScan : public OperatorNodeContents<IndexScan> {
232239
* Bounds
233240
*/
234241
std::unordered_map<catalog::indexkeycol_oid_t, std::vector<planner::IndexExpression>> bounds_;
242+
243+
/**
244+
*
245+
* The index covers all indexable columns in the predicates
246+
*/
247+
bool cover_all_columns_;
235248
};
236249

237250
/**

src/include/planner/plannodes/index_scan_plan_node.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
110110
return *this;
111111
}
112112

113+
/**
114+
* @param cover_all_columns whether the index covers all predicate columns
115+
* @return builder object
116+
*/
117+
Builder &SetCoverAllColumns(bool cover_all_columns) {
118+
cover_all_columns_ = cover_all_columns;
119+
return *this;
120+
}
121+
113122
/**
114123
* Build the Index scan plan node
115124
* @return plan node
@@ -125,6 +134,7 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
125134
std::unordered_map<catalog::indexkeycol_oid_t, IndexExpression> lo_index_cols_{};
126135
std::unordered_map<catalog::indexkeycol_oid_t, IndexExpression> hi_index_cols_{};
127136
uint64_t index_size_{0};
137+
bool cover_all_columns_{false};
128138
};
129139

130140
private:
@@ -141,6 +151,7 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
141151
* @param lo_index_cols lower bound of the scan (or exact key when scan type = Exact).
142152
* @param hi_index_cols upper bound of the scan
143153
* @param index_size number of tuples in index
154+
* @param cover_all_columns whether the index covers all predicate columns
144155
* @param plan_node_id Plan node id
145156
*/
146157
IndexScanPlanNode(std::vector<std::unique_ptr<AbstractPlanNode>> &&children,
@@ -151,7 +162,7 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
151162
std::unordered_map<catalog::indexkeycol_oid_t, IndexExpression> &&lo_index_cols,
152163
std::unordered_map<catalog::indexkeycol_oid_t, IndexExpression> &&hi_index_cols,
153164
uint32_t scan_limit, bool scan_has_limit, uint32_t scan_offset, bool scan_has_offset,
154-
uint64_t index_size, uint64_t table_num_tuple, plan_node_id_t plan_node_id);
165+
uint64_t index_size, uint64_t table_num_tuple, bool cover_all_columns, plan_node_id_t plan_node_id);
155166

156167
public:
157168
/**
@@ -217,6 +228,11 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
217228
*/
218229
PlanNodeType GetPlanNodeType() const override { return PlanNodeType::INDEXSCAN; }
219230

231+
/**
232+
* @return whether the index covers all predicate columns
233+
*/
234+
bool GetCoverAllColumns() const { return cover_all_columns_; }
235+
220236
/**
221237
* @return the hashed value of this plan node
222238
*/
@@ -237,6 +253,7 @@ class IndexScanPlanNode : public AbstractScanPlanNode {
237253
std::unordered_map<catalog::indexkeycol_oid_t, IndexExpression> hi_index_cols_{};
238254
uint64_t table_num_tuple_;
239255
uint64_t index_size_;
256+
bool cover_all_columns_;
240257
};
241258

242259
DEFINE_JSON_HEADER_DECLARATIONS(IndexScanPlanNode);
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <vector>
5+
6+
#include "common/resource_tracker.h"
7+
#include "self_driving/pilot/action/action_defs.h"
8+
9+
namespace noisepage::selfdriving::pilot {
10+
11+
/**
12+
* The abstract class for self-driving actions
13+
*/
14+
class AbstractAction {
15+
public:
16+
/**
17+
* Constructor for the base AbstractAction.
18+
* @param family The family that this action belongs to
19+
*/
20+
explicit AbstractAction(ActionType family) : action_family_(family), id_(action_id_counter++) {}
21+
22+
virtual ~AbstractAction() = default;
23+
24+
/**
25+
* Set the estimated runtime metrics for this action
26+
* @param estimated_metrics The metrics to set to
27+
*/
28+
void SetEstimatedMetrics(const common::ResourceTracker::Metrics &estimated_metrics) {
29+
estimated_metrics_ = estimated_metrics;
30+
}
31+
32+
/** @return The estimated runtime metrics for this action */
33+
const common::ResourceTracker::Metrics &GetActualMetrics() { return estimated_metrics_; }
34+
35+
/** @return This action's ID */
36+
action_id_t GetActionID() const { return id_; }
37+
38+
/** @return This action's family */
39+
ActionType GetActionFamily() const { return action_family_; }
40+
41+
/**
42+
* Add an equivalent action
43+
* @param id Action ID
44+
*/
45+
void AddEquivalentAction(action_id_t id) { equivalent_action_ids_.emplace_back(id); }
46+
47+
/**
48+
* Get the equivalent action ids
49+
* @return Action ID vector
50+
*/
51+
const std::vector<action_id_t> &GetEquivalentActions() const { return equivalent_action_ids_; }
52+
53+
/**
54+
* Add a reverse action
55+
* @param id Action ID
56+
*/
57+
void AddReverseAction(action_id_t id) { reverse_action_ids_.emplace_back(id); }
58+
59+
/**
60+
* Get the reverse action ids
61+
* @return Action ID vector
62+
*/
63+
const std::vector<action_id_t> &GetReverseActions() const { return reverse_action_ids_; }
64+
65+
/**
66+
* Get the SQL command to apply the action
67+
* @return Action SQL command
68+
*/
69+
virtual const std::string &GetSQLCommand() { return sql_command_; }
70+
71+
protected:
72+
std::string sql_command_; ///< The SQL commaned used to apply the action
73+
74+
private:
75+
static action_id_t action_id_counter;
76+
77+
common::ResourceTracker::Metrics estimated_metrics_{};
78+
79+
ActionType action_family_;
80+
81+
/** ID is unique for an action among on planning process (one MCTS) */
82+
action_id_t id_;
83+
84+
std::vector<action_id_t> equivalent_action_ids_;
85+
std::vector<action_id_t> reverse_action_ids_;
86+
};
87+
88+
} // namespace noisepage::selfdriving::pilot
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
3+
#include "common/strong_typedef.h"
4+
5+
namespace noisepage::selfdriving::pilot {
6+
7+
/**
8+
* typedef for action ID
9+
*/
10+
STRONG_TYPEDEF_HEADER(action_id_t, int32_t);
11+
12+
/**
13+
* Metric types
14+
*/
15+
enum class ActionType : uint8_t {
16+
CREATE_INDEX,
17+
DROP_INDEX,
18+
CHANGE_KNOB,
19+
};
20+
21+
} // namespace noisepage::selfdriving::pilot
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <utility>
5+
6+
#include "self_driving/pilot/action/abstract_action.h"
7+
#include "settings/settings_param.h"
8+
9+
namespace noisepage::settings {
10+
class SettingsManager;
11+
}
12+
13+
namespace noisepage::selfdriving::pilot {
14+
15+
/**
16+
* Represent a change knob self-driving action
17+
*/
18+
template <class T>
19+
class ChangeKnobAction : public AbstractAction {
20+
public:
21+
/**
22+
* Construct ChangeKnobAction
23+
* @param param Which knob param
24+
* @param param_name Name of the param
25+
* @param change_value The value to change that knob with
26+
* @param settings_manager SettingsManager (used to find our the current knob value before applying the change_value)
27+
*/
28+
ChangeKnobAction(settings::Param param, std::string param_name, T change_value,
29+
common::ManagedPointer<settings::SettingsManager> settings_manager)
30+
: AbstractAction(ActionType::CHANGE_KNOB),
31+
param_(param),
32+
param_name_(std::move(param_name)),
33+
change_value_(change_value),
34+
settings_manager_(settings_manager) {}
35+
36+
const std::string &GetSQLCommand() override;
37+
38+
private:
39+
settings::Param param_;
40+
std::string param_name_;
41+
T change_value_;
42+
common::ManagedPointer<settings::SettingsManager> settings_manager_;
43+
};
44+
45+
} // namespace noisepage::selfdriving::pilot

0 commit comments

Comments
 (0)