diff --git a/Jenkinsfile-utils.groovy b/Jenkinsfile-utils.groovy index b21045f911..1ccb5471c0 100644 --- a/Jenkinsfile-utils.groovy +++ b/Jenkinsfile-utils.groovy @@ -226,6 +226,7 @@ void stageModeling() { '-DCMAKE_BUILD_TYPE=Release -DNOISEPAGE_UNITY_BUILD=ON -DNOISEPAGE_USE_JEMALLOC=ON' ]) buildNoisePageTarget("execution_runners") + buildNoisePageTarget("compilation_runner") // The forecaster_standalone script runs TPC-C with query trace enabled. // The forecaster_standalone script uses SET to enable query trace. @@ -256,6 +257,11 @@ void stageModeling() { ../benchmark/execution_runners --execution_runner_rows_limit=100 --rerun=0 --warm_num=1 ''', label: 'OU model training data generation' + sh script :''' + cd build/bin + ../benchmark/compilation_runner + ''', label: 'Compilation model training data generation' + // Recompile the noisepage DBMS in Debug mode with code coverage. buildNoisePage([buildCommand:'ninja noisepage', cmake: '-DCMAKE_BUILD_TYPE=Debug -DNOISEPAGE_UNITY_BUILD=OFF -DNOISEPAGE_GENERATE_COVERAGE=ON' diff --git a/benchmark/runner/compilation_runner.cpp b/benchmark/runner/compilation_runner.cpp new file mode 100644 index 0000000000..1605b30582 --- /dev/null +++ b/benchmark/runner/compilation_runner.cpp @@ -0,0 +1,68 @@ +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "benchmark_util/data_table_benchmark_util.h" +#include "common/dedicated_thread_registry.h" +#include "common/scoped_timer.h" +#include "execution/compiler/executable_query.h" +#include "execution/exec/execution_settings.h" +#include "execution/execution_util.h" +#include "execution/vm/module.h" +#include "loggers/execution_logger.h" +#include "metrics/metrics_thread.h" +#include "storage/garbage_collector_thread.h" +#include "storage/write_ahead_log/log_manager.h" + +#define LOG_TEST_LOG_FILE_NAME "benchmark.txt" + +namespace noisepage::runner { + +class CompilationRunner : public benchmark::Fixture {}; + +// NOLINTNEXTLINE +BENCHMARK_DEFINE_F(CompilationRunner, Compilation)(benchmark::State &state) { + noisepage::LoggersUtil::Initialize(); + execution::ExecutionUtil::InitTPL("./bytecode_handlers_ir.bc"); + + auto *const metrics_manager = new metrics::MetricsManager(); + metrics_manager->EnableMetric(metrics::MetricsComponent::COMPILATION); + metrics_manager->RegisterThread(); + + const std::string &path = "../../sample_tpl/tpl_tests.txt"; + std::ifstream tpl_tests(path); + + std::string input_line; + size_t identifier = 0; + while (std::getline(tpl_tests, input_line)) { + if (input_line.find(".tpl") != std::string::npos && input_line[0] != '#') { + // We have found a valid test + std::string tpl = input_line.substr(0, input_line.find(',')); + std::string target = "../sample_tpl/" + tpl; + + std::ifstream input(target); + std::string contents((std::istreambuf_iterator(input)), (std::istreambuf_iterator())); + EXECUTION_LOG_INFO("Running compilation on {}", target); + + execution::exec::ExecutionSettings exec_settings; + auto exec_query = execution::compiler::ExecutableQuery(contents, nullptr, false, 16, exec_settings, + transaction::timestamp_t(0)); + for (const auto &fragment : exec_query.GetFragments()) { + fragment->GetModule()->CompileToMachineCode(execution::query_id_t(identifier)); + } + + identifier++; + } + } + + metrics_manager->Aggregate(); + metrics_manager->ToOutput(nullptr); + metrics_manager->UnregisterThread(); + delete metrics_manager; + noisepage::LoggersUtil::ShutDown(); +} + +BENCHMARK_REGISTER_F(CompilationRunner, Compilation)->Unit(benchmark::kMillisecond)->UseManualTime()->Iterations(1); + +} // namespace noisepage::runner diff --git a/benchmark/runner/execution_runners.cpp b/benchmark/runner/execution_runners.cpp index 244760f842..298b8bb0e0 100644 --- a/benchmark/runner/execution_runners.cpp +++ b/benchmark/runner/execution_runners.cpp @@ -33,7 +33,7 @@ #include "runner/execution_runners_argument_generator.h" #include "runner/execution_runners_data_config.h" #include "runner/execution_runners_settings.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_defs.h" #include "self_driving/modeling/operating_unit_recorder.h" #include "storage/index/bplustree.h" diff --git a/script/self_driving/modeling/data/opunit_data.py b/script/self_driving/modeling/data/opunit_data.py index 4bfa57019e..be6db088c8 100755 --- a/script/self_driving/modeling/data/opunit_data.py +++ b/script/self_driving/modeling/data/opunit_data.py @@ -41,6 +41,8 @@ def get_ou_runner_data(filename, model_results_path, txn_sample_rate, model_map= if "execution" in filename: # Handle the execution data return _execution_get_ou_runner_data(filename, model_map, predict_cache, trim) + if "compilation" in filename: + return _compilation_get_data(filename) if "gc" in filename or "log" in filename: # Handle of the gc or log data with interval-based conversion return _interval_get_ou_runner_data(filename, model_results_path) @@ -48,6 +50,18 @@ def get_ou_runner_data(filename, model_results_path, txn_sample_rate, model_map= return _default_get_ou_runner_data(filename) +def _compilation_get_data(filename): + df = pd.read_csv(filename, skipinitialspace=True) + headers = list(df.columns.values) + data_info.instance.parse_csv_header(headers, False) + file_name = os.path.splitext(os.path.basename(filename))[0] + + # Remove the first two (query_id and the name) + x = df.iloc[:, 2:-data_info.instance.METRICS_OUTPUT_NUM].values + y = df.iloc[:, -data_info.instance.OU_MODEL_TARGET_NUM:].values + return [OpUnitData(OpUnit[file_name.upper()], x, y)] + + def _default_get_ou_runner_data(filename): # In the default case, the data does not need any pre-processing and the file name indicates the opunit df = pd.read_csv(filename, skipinitialspace=True) diff --git a/script/self_driving/modeling/training_util/data_transforming_util.py b/script/self_driving/modeling/training_util/data_transforming_util.py index bb8227315c..e2dc2639fa 100644 --- a/script/self_driving/modeling/training_util/data_transforming_util.py +++ b/script/self_driving/modeling/training_util/data_transforming_util.py @@ -128,6 +128,7 @@ def _cardinality_linear_predict_transform(x, y): OpUnit.DISK_LOG_CONSUMER_TASK: None, OpUnit.TXN_BEGIN: None, OpUnit.TXN_COMMIT: None, + OpUnit.COMPILATION: None, # Execution engine opunits OpUnit.SEQ_SCAN: _num_rows_linear_transformer, @@ -184,6 +185,7 @@ def _num_rows_cardinality_linear_train_transform(x): OpUnit.DISK_LOG_CONSUMER_TASK: None, OpUnit.TXN_BEGIN: None, OpUnit.TXN_COMMIT: None, + OpUnit.COMPILATION: None, # Execution engine opunits OpUnit.SEQ_SCAN: None, diff --git a/script/self_driving/modeling/type.py b/script/self_driving/modeling/type.py index 8cb504404c..3b90f1ecae 100644 --- a/script/self_driving/modeling/type.py +++ b/script/self_driving/modeling/type.py @@ -70,6 +70,9 @@ class OpUnit(enum.IntEnum): BIND_COMMAND = 40, EXECUTE_COMMAND = 41 + # Compilation + COMPILATION = 42 + class ExecutionFeature(enum.IntEnum): # Debugging information @@ -100,6 +103,13 @@ class ExecutionFeature(enum.IntEnum): READONLY_UNLINKED = 18, INTERVAL = 19, + # Compilation input features + NAME = 20, + CODE_SIZE = 21, + DATA_SIZE = 22, + FUNCTIONS_SIZE = 23, + STATIC_LOCALS_SIZE = 24 + class ConcurrentCountingMode(enum.Enum): """How to identify the concurrent running operations (for a GroupedOpUnitData) diff --git a/src/execution/ast/context.cpp b/src/execution/ast/context.cpp index 30d42ee2a1..2cab732180 100644 --- a/src/execution/ast/context.cpp +++ b/src/execution/ast/context.cpp @@ -26,7 +26,7 @@ #include "execution/sql/table_vector_iterator.h" #include "execution/sql/thread_state_container.h" #include "execution/sql/value.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" // #include "execution/util/csv_reader.h" Fix later. #include "execution/util/execution_common.h" diff --git a/src/execution/ast/type.cpp b/src/execution/ast/type.cpp index 2ea4e771fc..66b1229df5 100644 --- a/src/execution/ast/type.cpp +++ b/src/execution/ast/type.cpp @@ -16,7 +16,7 @@ #include "execution/sql/thread_state_container.h" #include "execution/sql/value.h" #include "execution/sql/vector_projection_iterator.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" // #include "execution/util/csv_reader.h" Fix later. namespace noisepage::execution::ast { diff --git a/src/execution/compiler/compilation_context.cpp b/src/execution/compiler/compilation_context.cpp index 07ff7eb1f9..ecad5557e3 100644 --- a/src/execution/compiler/compilation_context.cpp +++ b/src/execution/compiler/compilation_context.cpp @@ -46,6 +46,7 @@ #include "execution/compiler/operator/update_translator.h" #include "execution/compiler/pipeline.h" #include "execution/exec/execution_settings.h" +#include "execution/vm/module.h" #include "parser/expression/abstract_expression.h" #include "parser/expression/column_value_expression.h" #include "parser/expression/comparison_expression.h" @@ -73,6 +74,7 @@ #include "planner/plannodes/seq_scan_plan_node.h" #include "planner/plannodes/set_op_plan_node.h" #include "planner/plannodes/update_plan_node.h" +#include "self_driving/modeling/compilation_operating_unit.h" #include "self_driving/modeling/operating_unit_recorder.h" #include "spdlog/fmt/fmt.h" @@ -124,6 +126,17 @@ ast::FunctionDecl *CompilationContext::GenerateTearDownFunction() { return builder.Finish(); } +std::unique_ptr CompilationContext::GenerateCompilationOperatingUnits( + const std::vector> &fragments) { + auto units = std::make_unique(); + for (const auto &fragment : fragments) { + auto module = fragment->GetModule(); + auto bytecode_module = module->GetBytecodeModule(); + units->RecordCompilationModule(bytecode_module); + } + return units; +} + void CompilationContext::GeneratePlan(const planner::AbstractPlanNode &plan, common::ManagedPointer plan_meta_data) { exec_ctx_ = @@ -194,7 +207,9 @@ void CompilationContext::GeneratePlan(const planner::AbstractPlanNode &plan, // Compile and finish. fragments.emplace_back(main_builder.Compile(query_->GetExecutionSettings().GetCompilerSettings())); - query_->Setup(std::move(fragments), query_state_.GetSize(), codegen_.ReleasePipelineOperatingUnits()); + auto units = GenerateCompilationOperatingUnits(fragments); + query_->Setup(std::move(fragments), query_state_.GetSize(), codegen_.ReleasePipelineOperatingUnits(), + std::move(units)); } // static diff --git a/src/execution/compiler/executable_query.cpp b/src/execution/compiler/executable_query.cpp index b8346fcf6c..ee2786c227 100644 --- a/src/execution/compiler/executable_query.cpp +++ b/src/execution/compiler/executable_query.cpp @@ -10,7 +10,8 @@ #include "execution/sema/error_reporter.h" #include "execution/vm/module.h" #include "loggers/execution_logger.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/compilation_operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "transaction/transaction_context.h" namespace noisepage::execution::compiler { @@ -27,7 +28,9 @@ ExecutableQuery::Fragment::Fragment(std::vector &&functions, std::v ExecutableQuery::Fragment::~Fragment() = default; -void ExecutableQuery::Fragment::Run(byte query_state[], vm::ExecutionMode mode) const { +void ExecutableQuery::Fragment::ResetCompiledModule() { module_->ResetCompiledModule(); } + +void ExecutableQuery::Fragment::Run(execution::query_id_t query_id, byte query_state[], vm::ExecutionMode mode) const { using Function = std::function; auto exec_ctx = *reinterpret_cast(query_state); @@ -36,7 +39,7 @@ void ExecutableQuery::Fragment::Run(byte query_state[], vm::ExecutionMode mode) } for (const auto &func_name : functions_) { Function func; - if (!module_->GetFunction(func_name, mode, &func)) { + if (!module_->GetFunction(query_id, func_name, mode, &func)) { throw EXECUTION_EXCEPTION(fmt::format("Could not find function '{}' in query fragment.", func_name), common::ErrorCode::ERRCODE_INTERNAL_ERROR); } @@ -44,7 +47,7 @@ void ExecutableQuery::Fragment::Run(byte query_state[], vm::ExecutionMode mode) func(query_state); } catch (const AbortException &e) { for (const auto &teardown_name : teardown_fn_) { - if (!module_->GetFunction(teardown_name, mode, &func)) { + if (!module_->GetFunction(query_id, teardown_name, mode, &func)) { throw EXECUTION_EXCEPTION(fmt::format("Could not find teardown function '{}' in query fragment.", func_name), common::ErrorCode::ERRCODE_INTERNAL_ERROR); } @@ -130,7 +133,7 @@ ExecutableQuery::ExecutableQuery(const std::string &contents, std::vector> fragments; fragments.emplace_back(std::move(fragment)); - Setup(std::move(fragments), query_state_size, nullptr); + Setup(std::move(fragments), query_state_size, nullptr, nullptr); if (is_file) { // acquire the output format @@ -142,7 +145,8 @@ ExecutableQuery::ExecutableQuery(const std::string &contents, ExecutableQuery::~ExecutableQuery() = default; void ExecutableQuery::Setup(std::vector> &&fragments, const std::size_t query_state_size, - std::unique_ptr pipeline_operating_units) { + std::unique_ptr pipeline_operating_units, + std::unique_ptr compilation_operating_units) { NOISEPAGE_ASSERT( std::all_of(fragments.begin(), fragments.end(), [](const auto &fragment) { return fragment->IsCompiled(); }), "All query fragments are not compiled!"); @@ -152,6 +156,7 @@ void ExecutableQuery::Setup(std::vector> &&fragments, fragments_ = std::move(fragments); query_state_size_ = query_state_size; pipeline_operating_units_ = std::move(pipeline_operating_units); + compilation_operating_units_ = std::move(compilation_operating_units); EXECUTION_LOG_TRACE("Query has {} fragment{} with {}-byte query state.", fragments_.size(), fragments_.size() > 1 ? "s" : "", query_state_size_); @@ -167,9 +172,22 @@ void ExecutableQuery::Run(common::ManagedPointer exec_ct exec_ctx->SetPipelineOperatingUnits(GetPipelineOperatingUnits()); exec_ctx->SetQueryId(query_id_); + if (!exec_ctx->GetExecutionSettings().GetIsCompilationCacheEnabled()) { + // This model assumes that an ExecutableQuery is tied to the lifetime of a specific + // connection (via the ProtocolInterpreter). If at any point in the future, this + // assumption proves to be incorrect, this would need to be revisited. + // + // Particularly, to reliably bypass the compilation cache, module and/or invocation + // state (i.e., CompiledModule) would need to be moved to thread-local or + // per-execution state (i.e., into the ExecutionContext). + for (const auto &fragment : fragments_) { + fragment->ResetCompiledModule(); + } + } + // Now run through fragments. for (const auto &fragment : fragments_) { - fragment->Run(query_state.get(), mode); + fragment->Run(query_id_, query_state.get(), mode); } // We do not currently re-use ExecutionContexts. However, this is unset to help ensure diff --git a/src/execution/exec/execution_context.cpp b/src/execution/exec/execution_context.cpp index d69d615260..298b889f5b 100644 --- a/src/execution/exec/execution_context.cpp +++ b/src/execution/exec/execution_context.cpp @@ -7,7 +7,7 @@ #include "metrics/metrics_store.h" #include "parser/expression/constant_value_expression.h" #include "replication/primary_replication_manager.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_util.h" #include "storage/recovery/recovery_manager.h" #include "transaction/transaction_context.h" diff --git a/src/execution/exec/execution_settings.cpp b/src/execution/exec/execution_settings.cpp index ed84717a12..52dcd3dd70 100644 --- a/src/execution/exec/execution_settings.cpp +++ b/src/execution/exec/execution_settings.cpp @@ -10,6 +10,7 @@ void ExecutionSettings::UpdateFromSettingsManager(common::ManagedPointerGetInt(settings::Param::num_parallel_execution_threads); is_counters_enabled_ = settings->GetBool(settings::Param::counters_enable); is_pipeline_metrics_enabled_ = settings->GetBool(settings::Param::pipeline_metrics_enable); + is_compilation_cache_enabled_ = settings->GetBool(settings::Param::enable_compilation_cache); } } diff --git a/src/execution/sql/sorter.cpp b/src/execution/sql/sorter.cpp index 96a96f6dc9..2adc8a9eee 100644 --- a/src/execution/sql/sorter.cpp +++ b/src/execution/sql/sorter.cpp @@ -14,7 +14,7 @@ #include "execution/util/stage_timer.h" #include "ips4o/ips4o.hpp" #include "loggers/execution_logger.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_defs.h" namespace noisepage::execution::sql { diff --git a/src/execution/vm/module.cpp b/src/execution/vm/module.cpp index 92f79efc48..5a5ff046bf 100644 --- a/src/execution/vm/module.cpp +++ b/src/execution/vm/module.cpp @@ -7,7 +7,11 @@ #include #include +#include "common/resource_tracker.h" +#include "common/thread_context.h" #include "loggers/execution_logger.h" +#include "metrics/metrics_store.h" +#include "self_driving/modeling/compilation_operating_unit.h" #define XBYAK_NO_OP_NAMES #include "xbyak/xbyak.h" @@ -22,18 +26,19 @@ namespace noisepage::execution::vm { class Module::AsyncCompileTask : public tbb::task { public: // Construct an asynchronous compilation task to compile the the module - explicit AsyncCompileTask(Module *module) : module_(module) {} + explicit AsyncCompileTask(Module *module, execution::query_id_t query_id) : module_(module), query_id_(query_id) {} // Execute tbb::task *execute() override { // This simply invokes Module::CompileToMachineCode() asynchronously. - module_->CompileToMachineCode(); + module_->CompileToMachineCode(query_id_); // Done. There's no next task, so return null. return nullptr; } private: Module *module_; + execution::query_id_t query_id_; }; // --------------------------------------------------------- @@ -49,6 +54,7 @@ Module::Module(std::unique_ptr bytecode_module, std::unique_ptr< jit_module_(std::move(llvm_module)), functions_(std::make_unique[]>(bytecode_module_->GetFunctionCount())), bytecode_trampolines_(std::make_unique(bytecode_module_->GetFunctionCount())), + compiled_flag_(std::make_unique()), metadata_(std::move(metadata)) { // Create the trampolines for all bytecode functions for (const auto &func : bytecode_module_->GetFunctionsInfo()) { @@ -265,18 +271,35 @@ void Module::CreateFunctionTrampoline(FunctionId func_id) { bytecode_trampolines_[func_id] = std::move(trampoline); } -void Module::CompileToMachineCode() { - std::call_once(compiled_flag_, [this]() { +void Module::CompileToMachineCode(execution::query_id_t query_id) { + std::call_once(*compiled_flag_, [this, query_id]() { // Exit if the module has already been compiled. This might happen if // requested to execute in adaptive mode by concurrent threads. if (jit_module_ != nullptr) { return; } + common::ResourceTracker tracker; + bool metrics_enabled = false; + if (common::thread_context.metrics_store_ != nullptr && + common::thread_context.metrics_store_->ComponentToRecord(metrics::MetricsComponent::COMPILATION)) { + metrics_enabled = true; + tracker.Start(); + } + // JIT the module. LLVMEngine::CompilerOptions options; jit_module_ = LLVMEngine::Compile(*bytecode_module_, options); + if (metrics_enabled) { + tracker.Stop(); + + auto feature = selfdriving::CompilationOperatingUnit(bytecode_module_.get()); + const auto &metrics = tracker.GetMetrics(); + common::thread_context.metrics_store_->RecordCompilationData(query_id, bytecode_module_->GetName(), feature, + metrics); + } + // JIT completed successfully. For each function in the module, pull out its // compiled implementation into the function cache, atomically replacing any // previous implementation. @@ -288,9 +311,19 @@ void Module::CompileToMachineCode() { }); } -void Module::CompileToMachineCodeAsync() { - auto *compile_task = new (tbb::task::allocate_root()) AsyncCompileTask(this); +void Module::CompileToMachineCodeAsync(execution::query_id_t query_id) { + auto *compile_task = new (tbb::task::allocate_root()) AsyncCompileTask(this, query_id); tbb::task::enqueue(*compile_task); } +void Module::ResetCompiledModule() { + compiled_flag_ = std::make_unique(); + jit_module_ = nullptr; + + const auto num_functions = bytecode_module_->GetFunctionCount(); + for (uint32_t idx = 0; idx < num_functions; idx++) { + functions_[idx] = bytecode_trampolines_[idx].GetCode(); + } +} + } // namespace noisepage::execution::vm diff --git a/src/include/execution/compiler/codegen.h b/src/include/execution/compiler/codegen.h index 87b2fadb5c..ed5a8874e5 100644 --- a/src/include/execution/compiler/codegen.h +++ b/src/include/execution/compiler/codegen.h @@ -18,7 +18,7 @@ #include "execution/sql/sql.h" #include "parser/expression_defs.h" #include "planner/plannodes/plan_node_defs.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" namespace noisepage::catalog { class CatalogAccessor; diff --git a/src/include/execution/compiler/compilation_context.h b/src/include/execution/compiler/compilation_context.h index 254952af51..4ebea66905 100644 --- a/src/include/execution/compiler/compilation_context.h +++ b/src/include/execution/compiler/compilation_context.h @@ -152,6 +152,10 @@ class CompilationContext { void PrepareOut(const planner::AbstractPlanNode &plan, Pipeline *pipeline); private: + // Generate the CompilationOperatingUnit for all modules + std::unique_ptr GenerateCompilationOperatingUnits( + const std::vector> &fragments); + // Unique ID used as a prefix for all generated functions to ensure uniqueness. uint32_t unique_id_; diff --git a/src/include/execution/compiler/executable_query.h b/src/include/execution/compiler/executable_query.h index 94ab1b5d91..9a6d2f0758 100644 --- a/src/include/execution/compiler/executable_query.h +++ b/src/include/execution/compiler/executable_query.h @@ -15,6 +15,7 @@ namespace noisepage { namespace selfdriving { class PipelineOperatingUnits; +class CompilationOperatingUnits; namespace pilot { class PilotUtil; } // namespace pilot @@ -48,6 +49,7 @@ namespace runner { class ExecutionRunners; class ExecutionRunners_SEQ0_OutputRunners_Benchmark; class ExecutionRunners_SEQ10_0_IndexInsertRunners_Benchmark; +class CompilationRunner_Compilation_Benchmark; } // namespace runner } // namespace noisepage @@ -83,10 +85,11 @@ class ExecutableQuery { /** * Run this fragment using the provided opaque query state object. + * @param query_id Query identifier * @param query_state The query state. * @param mode The execution mode to run the query with. */ - void Run(std::byte query_state[], vm::ExecutionMode mode) const; + void Run(execution::query_id_t query_id, std::byte query_state[], vm::ExecutionMode mode) const; /** * @return True if this fragment is compiled and executable. @@ -96,6 +99,17 @@ class ExecutableQuery { /** @return The metadata of this module. */ const vm::ModuleMetadata &GetModuleMetadata() const; + /** @return module */ + common::ManagedPointer GetModule() const { return common::ManagedPointer(module_); } + + /** + * Resets the compilation module. This will effectively force the module to be + * recompiled the next time it is required. @note that this function is not + * thread-safe. It is the caller's responsibility to ensure that only 1 thread + * is invoking this function and the module is not in use by any other thread. + */ + void ResetCompiledModule(); + private: // The functions that must be run (in the provided order) to execute this // query fragment. @@ -133,9 +147,11 @@ class ExecutableQuery { * @param query_state_size The size of the state structure this query needs. This value is * represented in bytes. * @param pipeline_operating_units The pipeline operating units that were generated with the fragments. + * @param compilation_operating_units The compilation operating units generated from fragments. */ void Setup(std::vector> &&fragments, std::size_t query_state_size, - std::unique_ptr pipeline_operating_units); + std::unique_ptr pipeline_operating_units, + std::unique_ptr compilation_operating_units); /** * Execute the query. @@ -193,6 +209,9 @@ class ExecutableQuery { // The pipeline operating units that were generated as part of this query. std::unique_ptr pipeline_operating_units_; + // The compilation operating units that were generated as part of this query. + std::unique_ptr compilation_operating_units_; + // For mini_runners.cpp /** Legacy constructor that creates a hardcoded fragment with main(ExecutionContext*)->int32. */ @@ -221,6 +240,7 @@ class ExecutableQuery { friend class noisepage::selfdriving::pilot::PilotUtil; friend class noisepage::execution::compiler::CompilationContext; // SetQueryId friend class noisepage::runner::ExecutionRunners_SEQ10_0_IndexInsertRunners_Benchmark; + friend class noisepage::runner::CompilationRunner_Compilation_Benchmark; }; } // namespace noisepage::execution::compiler diff --git a/src/include/execution/exec/execution_context.h b/src/include/execution/exec/execution_context.h index 77537059ba..9556d68d19 100644 --- a/src/include/execution/exec/execution_context.h +++ b/src/include/execution/exec/execution_context.h @@ -14,7 +14,7 @@ #include "execution/util/region.h" #include "metrics/metrics_defs.h" #include "planner/plannodes/output_schema.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_defs.h" #include "transaction/transaction_context.h" diff --git a/src/include/execution/exec/execution_settings.h b/src/include/execution/exec/execution_settings.h index a6f28a559f..c661a76476 100644 --- a/src/include/execution/exec/execution_settings.h +++ b/src/include/execution/exec/execution_settings.h @@ -82,6 +82,9 @@ class EXPORT ExecutionSettings { /** @return True if parallel query execution is enabled. */ constexpr bool GetIsParallelQueryExecutionEnabled() const { return is_parallel_execution_enabled_; } + /** @return True if compilation cache is enabled. */ + bool GetIsCompilationCacheEnabled() const { return is_compilation_cache_enabled_; } + /** @return True if counters are enabled. */ bool GetIsCountersEnabled() const { return is_counters_enabled_; } @@ -104,6 +107,7 @@ class EXPORT ExecutionSettings { bool is_pipeline_metrics_enabled_{common::Constants::IS_PIPELINE_METRICS_ENABLED}; int number_of_parallel_execution_threads_{common::Constants::NUM_PARALLEL_EXECUTION_THREADS}; bool is_static_partitioner_enabled_{common::Constants::IS_STATIC_PARTITIONER_ENABLED}; + bool is_compilation_cache_enabled_{true}; compiler::CompilerSettings compiler_settings_{}; ///< The settings for compiling the TPL input. // MiniRunners needs to set query_identifier and pipeline_operating_units_. diff --git a/src/include/execution/vm/bytecode_module.h b/src/include/execution/vm/bytecode_module.h index 3689dc825f..691a96c80c 100644 --- a/src/include/execution/vm/bytecode_module.h +++ b/src/include/execution/vm/bytecode_module.h @@ -99,6 +99,11 @@ class BytecodeModule { */ const std::vector &GetStaticLocalsInfo() const { return static_locals_; } + /** + * @return Size of the data section + */ + size_t GetDataSize() const { return data_.size(); } + /** * @return The number of functions defined in this module. */ diff --git a/src/include/execution/vm/module.h b/src/include/execution/vm/module.h index 44ce8082d0..2ad71e9784 100644 --- a/src/include/execution/vm/module.h +++ b/src/include/execution/vm/module.h @@ -14,6 +14,10 @@ #include "execution/vm/module_metadata.h" #include "execution/vm/vm_defs.h" +namespace noisepage::runner { +class CompilationRunner_Compilation_Benchmark; +} // namespace noisepage::runner + namespace noisepage::execution::vm { namespace test { @@ -74,13 +78,15 @@ class Module { * an interpreted version and a compiled version. * @tparam Ret Ret The C/C++ return type of the function * @tparam ArgTypes ArgTypes The C/C++ argument types to the function + * @param query_id Query Identifier of the caller * @param name The name of the function the caller wants. * @param exec_mode The mode of the function that the caller wants. * @param[out] func The function wrapper we use to wrap the TPL function. * @return True if the function was found and the output parameter was set. */ template - bool GetFunction(const std::string &name, ExecutionMode exec_mode, std::function *func); + bool GetFunction(execution::query_id_t query_id, const std::string &name, ExecutionMode exec_mode, + std::function *func); /** * Return the raw function implementation for the function in this module with the given function @@ -106,9 +112,18 @@ class Module { /** @return The non-essential metadata for this module. */ const ModuleMetadata &GetMetadata() const { return metadata_; } + /** + * Resets the compilation module. This will effectively force the module to be + * recompiled the next time it is required. @note that this function is not + * thread-safe. It is the caller's responsibility to ensure that only 1 thread + * is invoking this function and the module is not in use by any other thread. + */ + void ResetCompiledModule(); + private: friend class VM; // For the VM to access raw bytecode. friend class test::BytecodeTrampolineTest; // For the tests to check private methods. + friend class noisepage::runner::CompilationRunner_Compilation_Benchmark; // This class encapsulates the ability to asynchronously JIT compile a module. class AsyncCompileTask; @@ -151,11 +166,11 @@ class Module { } // Compile this module into machine code. This is a blocking call. - void CompileToMachineCode(); + void CompileToMachineCode(execution::query_id_t query_id); // Compile this module into machine code. This is a non-blocking call that // triggers a compilation in the background. - void CompileToMachineCodeAsync(); + void CompileToMachineCodeAsync(execution::query_id_t query_id); private: // The module containing all TBC (i.e., bytecode) for the TPL program. @@ -174,7 +189,7 @@ class Module { std::unique_ptr bytecode_trampolines_; // Flag to indicate if the JIT compilation has occurred. - std::once_flag compiled_flag_; + std::unique_ptr compiled_flag_; ModuleMetadata metadata_; ///< Non-essential metadata about the TPL module. }; @@ -199,7 +214,7 @@ inline void CopyAll(uint8_t *buffer, const HeadT &head, const RestT &... rest) { } // namespace detail template -inline bool Module::GetFunction(const std::string &name, const ExecutionMode exec_mode, +inline bool Module::GetFunction(execution::query_id_t query_id, const std::string &name, const ExecutionMode exec_mode, std::function *func) { // Lookup function const FunctionInfo *func_info = bytecode_module_->LookupFuncInfoByName(name); @@ -217,7 +232,7 @@ inline bool Module::GetFunction(const std::string &name, const ExecutionMode exe switch (exec_mode) { case ExecutionMode::Adaptive: { - CompileToMachineCodeAsync(); + CompileToMachineCodeAsync(query_id); FALLTHROUGH; } case ExecutionMode::Interpret: { @@ -246,7 +261,7 @@ inline bool Module::GetFunction(const std::string &name, const ExecutionMode exe break; } case ExecutionMode::Compiled: { - CompileToMachineCode(); + CompileToMachineCode(query_id); *func = [this, func_info](ArgTypes... args) -> Ret { void *raw_func = functions_[func_info->GetId()].load(std::memory_order_relaxed); auto *jit_f = reinterpret_cast(raw_func); diff --git a/src/include/main/db_main.h b/src/include/main/db_main.h index acde4d38b3..2b3b084d82 100644 --- a/src/include/main/db_main.h +++ b/src/include/main/db_main.h @@ -963,6 +963,7 @@ class DBMain { metrics::MetricsOutput query_trace_metrics_output_ = metrics::MetricsOutput::CSV; bool pipeline_metrics_ = false; uint8_t pipeline_metrics_sample_rate_ = 10; + bool compilation_metrics_ = false; bool transaction_metrics_ = false; bool logging_metrics_ = false; uint8_t logging_metrics_sample_rate_ = 100; @@ -1065,6 +1066,7 @@ class DBMain { forecast_sample_limit_ = settings_manager->GetInt(settings::Param::forecast_sample_limit); pipeline_metrics_ = settings_manager->GetBool(settings::Param::pipeline_metrics_enable); pipeline_metrics_sample_rate_ = settings_manager->GetInt(settings::Param::pipeline_metrics_sample_rate); + compilation_metrics_ = settings_manager->GetBool(settings::Param::compilation_metrics_enable); logging_metrics_sample_rate_ = settings_manager->GetInt(settings::Param::logging_metrics_sample_rate); transaction_metrics_ = settings_manager->GetBool(settings::Param::transaction_metrics_enable); logging_metrics_ = settings_manager->GetBool(settings::Param::logging_metrics_enable); @@ -1100,6 +1102,7 @@ class DBMain { metrics_manager->SetMetricOutput(metrics::MetricsComponent::QUERY_TRACE, query_trace_metrics_output_); if (pipeline_metrics_) metrics_manager->EnableMetric(metrics::MetricsComponent::EXECUTION_PIPELINE); + if (compilation_metrics_) metrics_manager->EnableMetric(metrics::MetricsComponent::COMPILATION); if (transaction_metrics_) metrics_manager->EnableMetric(metrics::MetricsComponent::TRANSACTION); if (logging_metrics_) metrics_manager->EnableMetric(metrics::MetricsComponent::LOGGING); if (gc_metrics_) metrics_manager->EnableMetric(metrics::MetricsComponent::GARBAGECOLLECTION); diff --git a/src/include/metrics/bind_command_metric.h b/src/include/metrics/bind_command_metric.h index c4dc138629..ef6973023c 100644 --- a/src/include/metrics/bind_command_metric.h +++ b/src/include/metrics/bind_command_metric.h @@ -13,7 +13,7 @@ #include "common/resource_tracker.h" #include "metrics/abstract_metric.h" #include "metrics/metrics_util.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "transaction/transaction_defs.h" namespace noisepage::metrics { diff --git a/src/include/metrics/compilation_metric.h b/src/include/metrics/compilation_metric.h new file mode 100644 index 0000000000..e23301feb7 --- /dev/null +++ b/src/include/metrics/compilation_metric.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include //NOLINT +#include +#include +#include +#include +#include +#include + +#include "catalog/catalog_defs.h" +#include "common/resource_tracker.h" +#include "metrics/abstract_metric.h" +#include "self_driving/modeling/compilation_operating_unit.h" + +namespace noisepage::selfdriving::pilot { +class PilotUtil; +class Pilot; +} // namespace noisepage::selfdriving::pilot + +namespace noisepage::metrics { + +/** + * Raw data object for holding stats collected for the execution engine + */ +class CompilationMetricRawData : public AbstractRawData { + public: + void Aggregate(AbstractRawData *const other) override { + auto other_db_metric = dynamic_cast(other); + if (!other_db_metric->compilation_data_.empty()) { + compilation_data_.splice(compilation_data_.cend(), other_db_metric->compilation_data_); + } + } + + /** + * @return the type of the metric this object is holding the data for + */ + MetricsComponent GetMetricType() const override { return MetricsComponent::COMPILATION; } + + /** + * Writes the data out to ofstreams + * @param outfiles vector of ofstreams to write to that have been opened by the MetricsManager + */ + void ToCSV(std::vector *const outfiles) final { + NOISEPAGE_ASSERT(outfiles->size() == FILES.size(), "Number of files passed to metric is wrong."); + NOISEPAGE_ASSERT(std::count_if(outfiles->cbegin(), outfiles->cend(), + [](const std::ofstream &outfile) { return !outfile.is_open(); }) == 0, + "Not all files are open."); + + auto &outfile = (*outfiles)[0]; + for (auto &data : compilation_data_) { + outfile << data.query_id_.UnderlyingValue() << ", "; + outfile << data.module_name_ << ", "; + outfile << data.feature_.GetCodeSize() << ", "; + outfile << data.feature_.GetDataSize() << ", "; + outfile << data.feature_.GetFunctionsSize() << ", "; + outfile << data.feature_.GetStaticLocalsSize() << ", "; + + data.resource_metrics_.ToCSV(outfile); + outfile << std::endl; + } + compilation_data_.clear(); + } + + /** + * Files to use for writing to CSV. + */ + static constexpr std::array FILES = {"./compilation.csv"}; + + /** + * Columns to use for writing to CSV. + * Note: This includes the columns for the input feature, but not the output (resource counters) + */ + static constexpr std::array FEATURE_COLUMNS = { + "query_id, name, code_size, data_size, functions_size, static_locals_size"}; + + private: + friend class CompilationMetric; + friend class selfdriving::pilot::PilotUtil; + friend class selfdriving::pilot::Pilot; + FRIEND_TEST(MetricsTests, CompilationTest); + struct CompilationData; + + void RecordCompilationData(execution::query_id_t query_id, const std::string &module_name, + const selfdriving::CompilationOperatingUnit &feature, + const common::ResourceTracker::Metrics &resource_metrics) { + compilation_data_.emplace_back(query_id, module_name, feature, resource_metrics); + } + + struct CompilationData { + CompilationData(execution::query_id_t query_id, std::string module_name, + const selfdriving::CompilationOperatingUnit &feature, + const common::ResourceTracker::Metrics &resource_metrics) + : query_id_(query_id), + module_name_(std::move(module_name)), + feature_(feature), + resource_metrics_(resource_metrics) {} + + const execution::query_id_t query_id_; + const std::string module_name_; + const selfdriving::CompilationOperatingUnit feature_; + const common::ResourceTracker::Metrics resource_metrics_; + }; + + std::list compilation_data_; +}; + +/** + * Metrics collected for compilation of queries + */ +class CompilationMetric : public AbstractMetric { + private: + friend class MetricsStore; + + void RecordCompilationMetric(execution::query_id_t query_id, std::string module_name, + const selfdriving::CompilationOperatingUnit &feature, + const common::ResourceTracker::Metrics &resource_metrics) { + std::replace(module_name.begin(), module_name.end(), ',', '_'); + std::replace(module_name.begin(), module_name.end(), ';', '_'); + GetRawData()->RecordCompilationData(query_id, module_name, feature, resource_metrics); + } +}; +} // namespace noisepage::metrics diff --git a/src/include/metrics/execute_command_metric.h b/src/include/metrics/execute_command_metric.h index 1386eca2f7..8ed4ad1920 100644 --- a/src/include/metrics/execute_command_metric.h +++ b/src/include/metrics/execute_command_metric.h @@ -13,7 +13,7 @@ #include "common/resource_tracker.h" #include "metrics/abstract_metric.h" #include "metrics/metrics_util.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "transaction/transaction_defs.h" namespace noisepage::metrics { diff --git a/src/include/metrics/metrics_defs.h b/src/include/metrics/metrics_defs.h index 8a5ac4fc8a..9491a2c32d 100644 --- a/src/include/metrics/metrics_defs.h +++ b/src/include/metrics/metrics_defs.h @@ -14,6 +14,7 @@ enum class MetricsComponent : uint8_t { BIND_COMMAND, EXECUTE_COMMAND, QUERY_TRACE, + COMPILATION }; /** @@ -28,6 +29,6 @@ enum class MetricsOutput : uint8_t { CSV_AND_DB, }; -constexpr uint8_t NUM_COMPONENTS = 8; +constexpr uint8_t NUM_COMPONENTS = 9; } // namespace noisepage::metrics diff --git a/src/include/metrics/metrics_store.h b/src/include/metrics/metrics_store.h index ac42500f7d..541c00d0e3 100644 --- a/src/include/metrics/metrics_store.h +++ b/src/include/metrics/metrics_store.h @@ -14,6 +14,7 @@ #include "metrics/abstract_metric.h" #include "metrics/abstract_raw_data.h" #include "metrics/bind_command_metric.h" +#include "metrics/compilation_metric.h" #include "metrics/execute_command_metric.h" #include "metrics/execution_metric.h" #include "metrics/garbage_collection_metric.h" @@ -225,6 +226,19 @@ class MetricsStore { query_trace_metric_->RecordQueryTrace(db_oid, query_id, timestamp, param); } + /** + * Record compilation metric + * @param query_id query identifier + * @param module_name name of the module + * @param feature CompilationOperatingUnit describing the module + * @param resource_metrics resource metrics + */ + void RecordCompilationData(execution::query_id_t query_id, const std::string &module_name, + const selfdriving::CompilationOperatingUnit &feature, + const common::ResourceTracker::Metrics &resource_metrics) { + compilation_metric_->RecordCompilationMetric(query_id, module_name, feature, resource_metrics); + } + /** * @param component metrics component to test * @return true if metrics enabled for this component, false otherwise @@ -274,6 +288,7 @@ class MetricsStore { std::unique_ptr pipeline_metric_; std::unique_ptr bind_command_metric_; std::unique_ptr execute_command_metric_; + std::unique_ptr compilation_metric_; const std::bitset &enabled_metrics_; const std::array, NUM_COMPONENTS> &samples_mask_; diff --git a/src/include/metrics/pipeline_metric.h b/src/include/metrics/pipeline_metric.h index 629b0700db..4506151379 100644 --- a/src/include/metrics/pipeline_metric.h +++ b/src/include/metrics/pipeline_metric.h @@ -13,7 +13,7 @@ #include "common/resource_tracker.h" #include "metrics/abstract_metric.h" #include "metrics/metrics_util.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_util.h" #include "transaction/transaction_defs.h" diff --git a/src/include/self_driving/modeling/compilation_operating_unit.h b/src/include/self_driving/modeling/compilation_operating_unit.h new file mode 100644 index 0000000000..d02d869e6c --- /dev/null +++ b/src/include/self_driving/modeling/compilation_operating_unit.h @@ -0,0 +1,121 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "execution/exec_defs.h" +#include "execution/vm/bytecode_module.h" + +namespace noisepage::selfdriving { + +/** + * CompilationOperatingUnit is used to record a compilation of a single bytecode + * module. A bytecode module can contain multiple functions. The features for + * a given CompilationOperatingUnit contains the following metadata: + * - Size of the code section + * - Size of the data section + * - Number of functions in the module + * - Number of static locals + */ +class CompilationOperatingUnit { + public: + CompilationOperatingUnit() = default; + + /** + * Constructor for CompilationOperatingUnit + * @param code_size Size of the code section + * @param data_size Size of the static data section + * @param functions_size Number of functions + * @param static_locals_size Number of static locals + */ + CompilationOperatingUnit(size_t code_size, size_t data_size, size_t functions_size, size_t static_locals_size) + : code_size_(code_size), + data_size_(data_size), + functions_size_(functions_size), + static_locals_size_(static_locals_size) {} + + /** + * Constructor for CompilationOperatingUnit from an existing one + * @param other Existing CompilationOperatingUnit to copy from + */ + CompilationOperatingUnit(const CompilationOperatingUnit &other) = default; + + /** + * Constructor for CompilationOperatingUnit from a bytecode module + * @param module bytecode module + */ + explicit CompilationOperatingUnit(const execution::vm::BytecodeModule *module) + : code_size_(module->GetInstructionCount()), + data_size_(module->GetDataSize()), + functions_size_(module->GetFunctionCount()), + static_locals_size_(module->GetStaticLocalsCount()) {} + + /** + * Returns a vector of doubles consisting of the features + */ + void GetAllAttributes(std::vector *all_attributes) const { + all_attributes->push_back(code_size_); + all_attributes->push_back(data_size_); + all_attributes->push_back(functions_size_); + all_attributes->push_back(static_locals_size_); + } + + /** @return size of the code section */ + size_t GetCodeSize() const { return code_size_; } + + /** @return size of the data section */ + size_t GetDataSize() const { return data_size_; } + + /** @return number of the functions */ + size_t GetFunctionsSize() const { return functions_size_; } + + /** @return number of static locals */ + size_t GetStaticLocalsSize() const { return static_locals_size_; } + + private: + size_t code_size_; + size_t data_size_; + size_t functions_size_; + size_t static_locals_size_; +}; + +/** + * CompilationOperatingUnits manages the storage/association of modules to the features + * for a given query execution/compilation. This class is required since a query can + * potentially have multiple modules. + */ +class CompilationOperatingUnits { + public: + /** + * Constructor + */ + CompilationOperatingUnits() = default; + + /** Adds a compilation module to the features being tracked */ + void RecordCompilationModule(const execution::vm::BytecodeModule *module) { + auto ou = CompilationOperatingUnit(module); + UNUSED_ATTRIBUTE auto res = units_.insert(std::make_pair(module->GetName(), ou)); + NOISEPAGE_ASSERT(res.second, "Recording duplicate module commpilation entry"); + } + + /** @return Gets the mapping from module name to CompilationOperatingUnit */ + const std::unordered_map &GetCompilationUnits() const { return units_; } + + /** @return all tracked CompilationOperatingUnit as a vector */ + std::vector GetCompilationUnitsVector() const { + std::vector units; + for (auto &it : units_) { + units.emplace_back(it.second); + } + + return units; + } + + private: + std::unordered_map units_{}; +}; + +} // namespace noisepage::selfdriving diff --git a/src/include/self_driving/modeling/operating_unit.h b/src/include/self_driving/modeling/execution_operating_unit.h similarity index 100% rename from src/include/self_driving/modeling/operating_unit.h rename to src/include/self_driving/modeling/execution_operating_unit.h diff --git a/src/include/self_driving/modeling/operating_unit_recorder.h b/src/include/self_driving/modeling/operating_unit_recorder.h index 59bba017f0..c80d894ee7 100644 --- a/src/include/self_driving/modeling/operating_unit_recorder.h +++ b/src/include/self_driving/modeling/operating_unit_recorder.h @@ -10,7 +10,7 @@ #include "common/managed_pointer.h" #include "execution/sql/sql.h" #include "planner/plannodes/plan_visitor.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" namespace noisepage::catalog { class CatalogAccessor; diff --git a/src/include/self_driving/modeling/operating_unit_util.h b/src/include/self_driving/modeling/operating_unit_util.h index ed5ae96ea4..3d8d99d518 100644 --- a/src/include/self_driving/modeling/operating_unit_util.h +++ b/src/include/self_driving/modeling/operating_unit_util.h @@ -6,7 +6,7 @@ #include #include "parser/expression/abstract_expression.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_defs.h" namespace noisepage::selfdriving { diff --git a/src/include/settings/settings_callbacks.h b/src/include/settings/settings_callbacks.h index 3b86b8d1a2..03e2a6f49f 100644 --- a/src/include/settings/settings_callbacks.h +++ b/src/include/settings/settings_callbacks.h @@ -72,6 +72,10 @@ class Callbacks { static void MetricsPipeline(void *old_value, void *new_value, DBMain *db_main, common::ManagedPointer action_context); + /** Enable or disable metrics collection for compilation */ + static void MetricsCompilation(void *old_value, void *new_value, DBMain *db_Main, + common::ManagedPointer action_context); + /** Update the sampling interval for logging. */ static void MetricsLoggingSampleRate(void *old_value, void *new_value, DBMain *db_main, common::ManagedPointer action_context); diff --git a/src/include/settings/settings_defs.h b/src/include/settings/settings_defs.h index 1fe4bea3a8..930f906fbb 100644 --- a/src/include/settings/settings_defs.h +++ b/src/include/settings/settings_defs.h @@ -371,6 +371,14 @@ SETTING_int( noisepage::settings::Callbacks::MetricsPipelineSampleRate ) +SETTING_bool( + compilation_metrics_enable, + "Metrics collection for compilation of queries (default: false).", + false, + true, + noisepage::settings::Callbacks::MetricsCompilation +) + SETTING_int( logging_metrics_sample_rate, "Sampling rate of metrics collection for logging.", @@ -421,6 +429,14 @@ SETTING_bool( noisepage::settings::Callbacks::ClearQueryCache ) +SETTING_bool( + enable_compilation_cache, + "Enable caching compiled modules of ExecutableQuery objects", + true, + true, + noisepage::settings::Callbacks::NoOp +) + SETTING_string( application_name, diff --git a/src/metrics/metrics_manager.cpp b/src/metrics/metrics_manager.cpp index 9e8b98dff0..ddee42f1a0 100644 --- a/src/metrics/metrics_manager.cpp +++ b/src/metrics/metrics_manager.cpp @@ -124,6 +124,11 @@ void MetricsManager::ResetMetric(const MetricsComponent component) const { metric->Swap(); break; } + case MetricsComponent::COMPILATION: { + const auto &metric = metrics_store.second->compilation_metric_; + metric->Swap(); + break; + } } } } @@ -206,6 +211,10 @@ void MetricsManager::ToCSV(uint8_t component) const { OpenFiles(&outfiles); break; } + case MetricsComponent::COMPILATION: { + OpenFiles(&outfiles); + break; + } } aggregated_metrics_[component]->ToCSV(&outfiles); for (auto &file : outfiles) { diff --git a/src/metrics/metrics_store.cpp b/src/metrics/metrics_store.cpp index d1fa17f2e5..8e7859eca7 100644 --- a/src/metrics/metrics_store.cpp +++ b/src/metrics/metrics_store.cpp @@ -21,6 +21,7 @@ MetricsStore::MetricsStore(const common::ManagedPointer bind_command_metric_ = std::make_unique(); execute_command_metric_ = std::make_unique(); query_trace_metric_ = std::make_unique(); + compilation_metric_ = std::make_unique(); } std::array, NUM_COMPONENTS> MetricsStore::GetDataToAggregate() { @@ -64,6 +65,13 @@ std::array, NUM_COMPONENTS> MetricsStore::GetDa result[component] = pipeline_metric_->Swap(); break; } + case MetricsComponent::COMPILATION: { + NOISEPAGE_ASSERT( + compilation_metric_ != nullptr, + "CompilationMetric cannot be a nullptr. Check the MetricsStore constructor that it was allocated."); + result[component] = compilation_metric_->Swap(); + break; + } case MetricsComponent::BIND_COMMAND: { NOISEPAGE_ASSERT( bind_command_metric_ != nullptr, diff --git a/src/self_driving/modeling/operating_unit.cpp b/src/self_driving/modeling/operating_unit.cpp index d00e12d1fd..43c05c891f 100644 --- a/src/self_driving/modeling/operating_unit.cpp +++ b/src/self_driving/modeling/operating_unit.cpp @@ -1,4 +1,4 @@ -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" namespace noisepage::selfdriving { diff --git a/src/self_driving/modeling/operating_unit_recorder.cpp b/src/self_driving/modeling/operating_unit_recorder.cpp index e159b128e1..78ce66e92e 100644 --- a/src/self_driving/modeling/operating_unit_recorder.cpp +++ b/src/self_driving/modeling/operating_unit_recorder.cpp @@ -51,7 +51,7 @@ #include "planner/plannodes/projection_plan_node.h" #include "planner/plannodes/seq_scan_plan_node.h" #include "planner/plannodes/update_plan_node.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/modeling/operating_unit_util.h" #include "storage/block_layout.h" #include "storage/index/bplustree.h" diff --git a/src/self_driving/planning/pilot_util.cpp b/src/self_driving/planning/pilot_util.cpp index 5bae63653f..d871b86019 100644 --- a/src/self_driving/planning/pilot_util.cpp +++ b/src/self_driving/planning/pilot_util.cpp @@ -24,7 +24,7 @@ #include "planner/plannodes/abstract_plan_node.h" #include "self_driving/forecasting/workload_forecast.h" #include "self_driving/model_server/model_server_manager.h" -#include "self_driving/modeling/operating_unit.h" +#include "self_driving/modeling/execution_operating_unit.h" #include "self_driving/planning/action/create_index_action.h" #include "self_driving/planning/action/drop_index_action.h" #include "self_driving/planning/inference_results.h" diff --git a/src/settings/settings_callbacks.cpp b/src/settings/settings_callbacks.cpp index a2312bfc29..741e4fdc31 100644 --- a/src/settings/settings_callbacks.cpp +++ b/src/settings/settings_callbacks.cpp @@ -133,6 +133,17 @@ void Callbacks::MetricsPipelineSampleRate(void *old_value, void *new_value, DBMa action_context->SetState(common::ActionState::SUCCESS); } +void Callbacks::MetricsCompilation(void *const old_value, void *const new_value, DBMain *const db_main, + common::ManagedPointer action_context) { + action_context->SetState(common::ActionState::IN_PROGRESS); + bool new_status = *static_cast(new_value); + if (new_status) + db_main->GetMetricsManager()->EnableMetric(metrics::MetricsComponent::COMPILATION); + else + db_main->GetMetricsManager()->DisableMetric(metrics::MetricsComponent::COMPILATION); + action_context->SetState(common::ActionState::SUCCESS); +} + void Callbacks::MetricsLoggingSampleRate(void *old_value, void *new_value, DBMain *db_main, common::ManagedPointer action_context) { action_context->SetState(common::ActionState::IN_PROGRESS); diff --git a/test/execution/atomics_test.cpp b/test/execution/atomics_test.cpp index d3fd2d1d13..1e20beb681 100644 --- a/test/execution/atomics_test.cpp +++ b/test/execution/atomics_test.cpp @@ -48,11 +48,12 @@ class AtomicsTest : public CompiledTplTest { // The function should exist std::function atomic_and; - EXPECT_TRUE(module->GetFunction("atomic_and", exec_mode, &atomic_and)); + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "atomic_and", exec_mode, &atomic_and)); // The function should exist std::function atomic_or; - EXPECT_TRUE(module->GetFunction("atomic_or", exec_mode, &atomic_or)); + EXPECT_TRUE(module->GetFunction(qid, "atomic_or", exec_mode, &atomic_or)); /*========================= *= Run correctness tests = @@ -109,7 +110,8 @@ class AtomicsTest : public CompiledTplTest { // The function should exist std::function cmpxchg; - EXPECT_TRUE(module->GetFunction("cmpxchg", exec_mode, &cmpxchg)); + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "cmpxchg", exec_mode, &cmpxchg)); /*========================= *= Run correctness tests = diff --git a/test/execution/compiler_test.cpp b/test/execution/compiler_test.cpp index f9a3636ff1..9c8fa222ed 100644 --- a/test/execution/compiler_test.cpp +++ b/test/execution/compiler_test.cpp @@ -88,7 +88,8 @@ TEST_F(CompilerTest, CompileFromSource) { // The function should exist std::function test_fn; - EXPECT_TRUE(module->GetFunction("test", vm::ExecutionMode::Interpret, &test_fn)); + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "test", vm::ExecutionMode::Interpret, &test_fn)); // And should return what we expect EXPECT_EQ(i * 10, test_fn()); diff --git a/test/execution/vm_bytecode_generator_test.cpp b/test/execution/vm_bytecode_generator_test.cpp index 20e9fdd0dd..b889e66b45 100644 --- a/test/execution/vm_bytecode_generator_test.cpp +++ b/test/execution/vm_bytecode_generator_test.cpp @@ -18,6 +18,7 @@ class BytecodeGeneratorTest : public TplTest { // NOLINTNEXTLINE TEST_F(BytecodeGeneratorTest, SimpleTest) { + execution::query_id_t qid(0); { auto src = "fun test() -> bool { return true }"; auto compiler = ModuleCompiler(); @@ -25,7 +26,7 @@ TEST_F(BytecodeGeneratorTest, SimpleTest) { ASSERT_TRUE(module != nullptr); std::function func; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &func)); + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &func)); EXPECT_TRUE(func()); } @@ -36,7 +37,7 @@ TEST_F(BytecodeGeneratorTest, SimpleTest) { ASSERT_TRUE(module != nullptr); std::function func; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &func)); + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &func)); EXPECT_FALSE(func()); } @@ -52,7 +53,7 @@ TEST_F(BytecodeGeneratorTest, SimpleTest) { ASSERT_TRUE(module != nullptr); std::function mul_20; - EXPECT_TRUE(module->GetFunction("mul20", ExecutionMode::Interpret, &mul_20)) + EXPECT_TRUE(module->GetFunction(qid, "mul20", ExecutionMode::Interpret, &mul_20)) << "Function 'mul20' not found in module"; EXPECT_EQ(20u, mul_20(1)); @@ -68,6 +69,7 @@ TEST_F(BytecodeGeneratorTest, BooleanEvaluationTest) { // Generate function: f(true) = -10, f(false) = 10 // + execution::query_id_t qid(0); { auto src = R"( fun test(c: bool) -> int32 { @@ -82,7 +84,8 @@ TEST_F(BytecodeGeneratorTest, BooleanEvaluationTest) { ASSERT_TRUE(module != nullptr); std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) + << "Function 'test' not found in module"; EXPECT_EQ(10, f(false)); EXPECT_EQ(-10, f(true)); } @@ -104,7 +107,8 @@ TEST_F(BytecodeGeneratorTest, BooleanEvaluationTest) { ASSERT_TRUE(module != nullptr); std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) + << "Function 'test' not found in module"; EXPECT_FALSE(f()); } } @@ -112,6 +116,7 @@ TEST_F(BytecodeGeneratorTest, BooleanEvaluationTest) { // NOLINTNEXTLINE TEST_F(BytecodeGeneratorTest, SimpleArithmeticTest) { const auto gen_compare_func = [](auto arg_type_name, auto dummy_arg, auto op, auto cb) { + execution::query_id_t qid(0); using Type = decltype(dummy_arg); auto src = fmt::format(R"( fun test(a: {0}, b: {0}) -> {0} {{ @@ -124,7 +129,8 @@ TEST_F(BytecodeGeneratorTest, SimpleArithmeticTest) { ASSERT_TRUE(module != nullptr); std::function fn; - ASSERT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &fn)) << "Function 'test' not found in module"; + ASSERT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &fn)) + << "Function 'test' not found in module"; // Test the function cb(fn); @@ -167,7 +173,9 @@ TEST_F(BytecodeGeneratorTest, ComparisonTest) { ASSERT_TRUE(module != nullptr); std::function fn; - ASSERT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &fn)) << "Function 'test' not found in module"; + execution::query_id_t qid(0); + ASSERT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &fn)) + << "Function 'test' not found in module"; // Test the function cb(fn); @@ -219,7 +227,8 @@ TEST_F(BytecodeGeneratorTest, ParameterPassingTest) { }; std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; S s{.a_ = 0, .b_ = 0}; EXPECT_TRUE(f(&s)); @@ -229,6 +238,7 @@ TEST_F(BytecodeGeneratorTest, ParameterPassingTest) { // NOLINTNEXTLINE TEST_F(BytecodeGeneratorTest, FunctionTypeCheckTest) { + execution::query_id_t qid(0); { auto src = R"( fun test() -> nil { @@ -262,7 +272,9 @@ TEST_F(BytecodeGeneratorTest, FunctionTypeCheckTest) { ASSERT_TRUE(module != nullptr); std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) + << "Function 'test' not found in module"; EXPECT_EQ(10, f()); } @@ -278,7 +290,8 @@ TEST_F(BytecodeGeneratorTest, FunctionTypeCheckTest) { auto module = compiler.CompileToModule(src); std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) + << "Function 'test' not found in module"; EXPECT_EQ(800, f()); } @@ -310,7 +323,8 @@ TEST_F(BytecodeGeneratorTest, FunctionTest) { }; std::function f; - EXPECT_TRUE(module->GetFunction("test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "test", ExecutionMode::Interpret, &f)) << "Function 'test' not found in module"; S s{.a_ = 0, .b_ = 0}; EXPECT_TRUE(f(&s)); diff --git a/test/execution/vm_bytecode_trampoline_test.cpp b/test/execution/vm_bytecode_trampoline_test.cpp index 671e263ff5..0ecf8e0927 100644 --- a/test/execution/vm_bytecode_trampoline_test.cpp +++ b/test/execution/vm_bytecode_trampoline_test.cpp @@ -245,7 +245,8 @@ TEST_F(BytecodeTrampolineTest, DISABLED_PerfGenComparisonForSortTest) { auto compiler = ModuleCompiler(); auto module = compiler.CompileToModule(src); std::function compare; - EXPECT_TRUE(module->GetFunction("compare", ExecutionMode::Interpret, &compare)); + execution::query_id_t qid(0); + EXPECT_TRUE(module->GetFunction(qid, "compare", ExecutionMode::Interpret, &compare)); util::Timer timer; timer.Start(); diff --git a/test/metrics/metrics_test.cpp b/test/metrics/metrics_test.cpp index cf36cd646b..b3d811181f 100644 --- a/test/metrics/metrics_test.cpp +++ b/test/metrics/metrics_test.cpp @@ -439,5 +439,18 @@ TEST_F(MetricsTests, ToggleSettings) { callback); EXPECT_EQ(action_context->GetState(), common::ActionState::SUCCESS); EXPECT_FALSE(metrics_manager_->ComponentEnabled(metrics::MetricsComponent::QUERY_TRACE)); + + // Compilation metrics + EXPECT_FALSE(metrics_manager_->ComponentEnabled(metrics::MetricsComponent::COMPILATION)); + action_context = std::make_unique(common::action_id_t(13)); + settings_manager_->SetBool(settings::Param::compilation_metrics_enable, true, common::ManagedPointer(action_context), + callback); + EXPECT_EQ(action_context->GetState(), common::ActionState::SUCCESS); + EXPECT_TRUE(metrics_manager_->ComponentEnabled(metrics::MetricsComponent::COMPILATION)); + action_context = std::make_unique(common::action_id_t(14)); + settings_manager_->SetBool(settings::Param::compilation_metrics_enable, false, common::ManagedPointer(action_context), + callback); + EXPECT_EQ(action_context->GetState(), common::ActionState::SUCCESS); + EXPECT_FALSE(metrics_manager_->ComponentEnabled(metrics::MetricsComponent::COMPILATION)); } } // namespace noisepage::metrics diff --git a/test/self_driving_e2e/model_server_test.cpp b/test/self_driving_e2e/model_server_test.cpp index 6cae28d89b..316f742d3e 100644 --- a/test/self_driving_e2e/model_server_test.cpp +++ b/test/self_driving_e2e/model_server_test.cpp @@ -168,6 +168,53 @@ TEST_F(ModelServerTest, OUAndInterferenceModelTest) { ms_manager->StopModelServer(); } +// NOLINTNEXTLINE +TEST_F(ModelServerTest, CompilationModelTest) { + messenger::messenger_logger->set_level(spdlog::level::info); + model_server_logger->set_level(spdlog::level::info); + + auto primary = BuildDBMain(); + primary->GetNetworkLayer()->GetServer()->RunServer(); + + auto ms_manager = primary->GetModelServerManager(); + + // Wait for the model server process to start + while (!ms_manager->ModelServerStarted()) { + } + + // Send a message + std::string msg = "ModelServer Compilation Model Test"; + ms_manager->PrintMessage(msg); + + std::vector> features{ + {72, 0, 5, 0}, + {72, 0, 5, 0}, + {72, 0, 5, 0}, + {72, 0, 5, 0}, + }; + + // Perform a training of the opunit models with {lr, rf} as training methods. + std::vector methods{"lr", "rf"}; + std::string ou_model_save_path = "ou_model_path.pickle"; + + ModelServerFuture future; + const char *env = ::getenv(BUILD_ABS_PATH); + std::string project_build_path = std::string(env != nullptr ? env : "."); + std::string model_path = project_build_path + "/bin"; + ms_manager->TrainModel(ModelType::Type::OperatingUnit, methods, &model_path, ou_model_save_path, nullptr, + common::ManagedPointer>(&future)); + auto res = future.DangerousWait(); + ASSERT_EQ(res.second, true); // Training succeeds + + // Perform inference on the trained opunit model for various opunits + auto result = ms_manager->InferOUModel("COMPILATION", ou_model_save_path, features); + ASSERT_TRUE(result.second); + ASSERT_EQ(result.first.size(), features.size()); + + // Quit + ms_manager->StopModelServer(); +} + // NOLINTNEXTLINE TEST_F(ModelServerTest, ForecastModelTest) { messenger::messenger_logger->set_level(spdlog::level::info); diff --git a/util/execution/tpl.cpp b/util/execution/tpl.cpp index 029faf4efc..dc5010f70b 100644 --- a/util/execution/tpl.cpp +++ b/util/execution/tpl.cpp @@ -192,20 +192,21 @@ static void CompileAndRun(const std::string &source, const std::string &name = " // Interpret // + execution::query_id_t qid(0); { exec_ctx.SetExecutionMode(static_cast(vm::ExecutionMode::Interpret)); util::ScopedTimer timer(&interp_exec_ms); if (IS_SQL) { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Interpret, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Interpret, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature (*ExecutionContext)->int32"); return; } EXECUTION_LOG_INFO("VM main() returned: {}", main(&exec_ctx)); } else { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Interpret, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Interpret, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature ()->int32"); return; } @@ -222,14 +223,14 @@ static void CompileAndRun(const std::string &source, const std::string &name = " if (IS_SQL) { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Adaptive, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Adaptive, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature (*ExecutionContext)->int32"); return; } EXECUTION_LOG_INFO("ADAPTIVE main() returned: {}", main(&exec_ctx)); } else { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Adaptive, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Adaptive, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature ()->int32"); return; } @@ -245,7 +246,7 @@ static void CompileAndRun(const std::string &source, const std::string &name = " if (IS_SQL) { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Compiled, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Compiled, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature (*ExecutionContext)->int32"); return; } @@ -256,7 +257,7 @@ static void CompileAndRun(const std::string &source, const std::string &name = " EXECUTION_LOG_INFO("Jit exec: {} ms", x.GetElapsed()); } else { std::function main; - if (!module->GetFunction("main", vm::ExecutionMode::Compiled, &main)) { + if (!module->GetFunction(qid, "main", vm::ExecutionMode::Compiled, &main)) { EXECUTION_LOG_ERROR("Missing 'main' entry function with signature ()->int32"); return; }