-
Notifications
You must be signed in to change notification settings - Fork 703
Refactor streamer batch management in table_data #3107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8af80bb
4ab3ad9
acea61d
b8103cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,7 +51,7 @@ bool print_runtime_stats = false; | |
| bool support_json_index = false; | ||
| bool is_filter_pushdown_enabled = true; | ||
| int32_t max_streamable_column_width = 128; | ||
| int32_t max_num_threads_for_global_state = 4; | ||
| int32_t max_num_threads_for_global_state = std::thread::hardware_concurrency(); | ||
|
||
| bool treat_numeric_as_double = true; // Treat numeric types as double by default | ||
| bool print_progress_during_seq_scan = false; | ||
| bool use_shared_mem_for_refresh = false; | ||
|
|
@@ -199,7 +199,7 @@ void initialize_guc_parameters() | |
| "Maximum number of threads for global state operations.", | ||
| nullptr, // optional long description | ||
| &pg::max_num_threads_for_global_state, // linked C variable | ||
| 6, // default value | ||
| base::system_report::cpu_cores(), // default value | ||
| 1, // min value | ||
| base::system_report::cpu_cores(), // max value | ||
| PGC_USERSET, // context (USERSET, SUSET, etc.) | ||
|
|
@@ -216,10 +216,10 @@ void initialize_guc_parameters() | |
| "for detecting dataset refreshes, which can improve performance but may " | ||
| "have implications on concurrency. " | ||
| "It make sense to disable this for OLTP workloads.", | ||
| &pg::use_shared_mem_for_refresh, // linked C variable | ||
| true, // default value | ||
| PGC_USERSET, // context (USERSET, SUSET, etc.) | ||
| 0, // flags | ||
| &pg::use_shared_mem_for_refresh, // linked C variable | ||
| true, // default value | ||
| PGC_USERSET, // context (USERSET, SUSET, etc.) | ||
| 0, // flags | ||
| nullptr, | ||
| nullptr, | ||
| nullptr // check_hook, assign_hook, show_hook | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -340,7 +340,7 @@ inline table_data::streamer_info& table_data::get_streamers() noexcept | |||||
|
|
||||||
| inline bool table_data::column_has_streamer(uint32_t idx) const noexcept | ||||||
| { | ||||||
| return streamers_.streamers.size() > idx && streamers_.streamers[idx] != nullptr; | ||||||
| return streamers_.column_to_batches.size() > idx && !streamers_.column_to_batches[idx].empty(); | ||||||
| } | ||||||
|
|
||||||
| inline void table_data::reset_streamers() noexcept | ||||||
|
|
@@ -529,45 +529,49 @@ inline std::pair<int64_t, int64_t> table_data::get_row_range(int32_t worker_id) | |||||
|
|
||||||
| inline void table_data::create_streamer(int32_t idx, int32_t worker_id) | ||||||
| { | ||||||
| if (streamers_.streamers.empty()) { | ||||||
| const auto s = num_columns(); | ||||||
| streamers_.streamers.resize(s); | ||||||
| std::vector<streamer_info::column_data> temp_data(s); | ||||||
| streamers_.column_to_batches.swap(temp_data); | ||||||
| } | ||||||
| if (!streamers_.streamers[idx]) { | ||||||
| if (pg::memory_tracker::has_memory_limit()) { | ||||||
| const auto column_size = | ||||||
| pg::utils::get_column_width(get_base_atttypid(idx), get_atttypmod(idx)) * num_rows(); | ||||||
| pg::memory_tracker::ensure_memory_available(column_size); | ||||||
| } | ||||||
| if (worker_id != -1) { | ||||||
| auto [start_row, end_row] = get_row_range(worker_id); | ||||||
| auto new_column = heimdall_common::create_filtered_column( | ||||||
| *(get_column_view(idx)), icm::index_mapping_t<int64_t>::slice({start_row, end_row, 1})); | ||||||
| streamers_.streamers[idx] = std::make_unique<bifrost::column_streamer>(new_column, batch_size_); | ||||||
| } else { | ||||||
| streamers_.streamers[idx] = std::make_unique<bifrost::column_streamer>(get_column_view(idx), batch_size_); | ||||||
| } | ||||||
| const int64_t batch_index = (num_rows() - 1) / batch_size_; | ||||||
| streamers_.column_to_batches[idx].batches.resize(batch_index + 1); | ||||||
| const auto col_count = num_columns(); | ||||||
| if (streamers_.column_to_batches.empty()) { | ||||||
| streamers_.column_to_batches.resize(col_count); | ||||||
| } | ||||||
| ASSERT(idx >= 0 && idx < col_count); | ||||||
| auto& column_batches = streamers_.column_to_batches[idx]; | ||||||
| if (!column_batches.empty()) { | ||||||
| return; | ||||||
| } | ||||||
| if (pg::memory_tracker::has_memory_limit()) { | ||||||
| const auto column_size = pg::utils::get_column_width(get_base_atttypid(idx), get_atttypmod(idx)) * num_rows(); | ||||||
| pg::memory_tracker::ensure_memory_available(column_size); | ||||||
| } | ||||||
| heimdall::column_view_ptr cv = get_column_view(idx); | ||||||
| if (worker_id != -1) { | ||||||
| auto [start_row, end_row] = get_row_range(worker_id); | ||||||
| cv = heimdall_common::create_filtered_column(*(cv), | ||||||
| icm::index_mapping_t<int64_t>::slice({start_row, end_row, 1})); | ||||||
| } | ||||||
| const int64_t row_count = num_rows(); | ||||||
| const int64_t batch_count = (row_count + batch_size_ - 1) / batch_size_; | ||||||
| column_batches = std::vector<streamer_info::batch_data>(batch_count); | ||||||
| for (int64_t i = 0; i < batch_count; ++i) { | ||||||
| const auto range_start = i * batch_size_; | ||||||
| const auto range_end = std::min<int64_t>(range_start + batch_size_, row_count); | ||||||
| auto p = async::run_on_main([cv, range_start, range_end, row_count]() { | ||||||
| return cv->request_range( | ||||||
| range_start, range_end, storage::fetch_options(static_cast<int>(row_count - range_start))); | ||||||
|
||||||
| range_start, range_end, storage::fetch_options(static_cast<int>(row_count - range_start))); | |
| range_start, range_end, storage::fetch_options(static_cast<int>(range_end - range_start))); |
Copilot
AI
Jan 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The static_cast<bool>(batch.promise_) pattern is used to check if the promise is valid. Consider documenting what constitutes a valid vs invalid promise state, or use a more self-documenting approach such as a helper method batch.is_initialized() to improve code readability across all three usages (lines 570, 593, 612).
Copilot
AI
Jan 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Resetting the promise by assigning a default-constructed promise is unclear. Consider adding a comment explaining that this marks the batch as initialized, or use a more explicit pattern such as a separate boolean flag or a method like batch.mark_initialized() to make the intent clearer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Corrected grammar: 'value is not always remain valid' should be 'value does not always remain valid'.