Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit c4af116

Browse files
committed
frags order
1 parent c75a886 commit c4af116

File tree

6 files changed

+31
-38
lines changed

6 files changed

+31
-38
lines changed

omniscidb/QueryEngine/ColumnFetcher.cpp

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ std::pair<const int8_t*, size_t> ColumnFetcher::getOneColumnFragment(
5959
const Data_Namespace::MemoryLevel effective_mem_lvl,
6060
const int device_id,
6161
DeviceAllocator* device_allocator,
62-
const size_t thread_idx,
6362
std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
6463
DataProvider* data_provider,
6564
ColumnCacheMap& column_cache) {
@@ -115,7 +114,6 @@ JoinColumn ColumnFetcher::makeJoinColumn(
115114
const Data_Namespace::MemoryLevel effective_mem_lvl,
116115
const int device_id,
117116
DeviceAllocator* device_allocator,
118-
const size_t thread_idx,
119117
std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
120118
std::vector<std::shared_ptr<void>>& malloc_owner,
121119
DataProvider* data_provider,
@@ -142,7 +140,6 @@ JoinColumn ColumnFetcher::makeJoinColumn(
142140
effective_mem_lvl,
143141
effective_mem_lvl == Data_Namespace::CPU_LEVEL ? 0 : device_id,
144142
device_allocator,
145-
thread_idx,
146143
chunks_owner,
147144
data_provider,
148145
column_cache);
@@ -364,8 +361,7 @@ const int8_t* ColumnFetcher::linearizeColumnFragments(
364361
std::list<ChunkIter>& chunk_iter_holder,
365362
const Data_Namespace::MemoryLevel memory_level,
366363
const int device_id,
367-
DeviceAllocator* device_allocator,
368-
const size_t thread_idx) const {
364+
DeviceAllocator* device_allocator) const {
369365
auto timer = DEBUG_TIMER(__func__);
370366
int db_id = col_info->db_id;
371367
int table_id = col_info->table_id;
@@ -477,8 +473,7 @@ const int8_t* ColumnFetcher::linearizeColumnFragments(
477473
total_data_buf_size,
478474
total_idx_buf_size,
479475
total_num_tuples,
480-
device_allocator,
481-
thread_idx);
476+
device_allocator);
482477
} else {
483478
CHECK(type->isVarLenArray());
484479
VLOG(2) << "Linearize variable-length multi-frag array column (col_id: " << col_id
@@ -496,8 +491,7 @@ const int8_t* ColumnFetcher::linearizeColumnFragments(
496491
total_data_buf_size,
497492
total_idx_buf_size,
498493
total_num_tuples,
499-
device_allocator,
500-
thread_idx);
494+
device_allocator);
501495
}
502496
}
503497
if (type->isString()) {
@@ -516,8 +510,7 @@ const int8_t* ColumnFetcher::linearizeColumnFragments(
516510
total_data_buf_size,
517511
total_idx_buf_size,
518512
total_num_tuples,
519-
device_allocator,
520-
thread_idx);
513+
device_allocator);
521514
}
522515
}
523516
CHECK(res.first); // check merged data buffer
@@ -573,8 +566,7 @@ MergedChunk ColumnFetcher::linearizeVarLenArrayColFrags(
573566
const size_t total_data_buf_size,
574567
const size_t total_idx_buf_size,
575568
const size_t total_num_tuples,
576-
DeviceAllocator* device_allocator,
577-
const size_t thread_idx) const {
569+
DeviceAllocator* device_allocator) const {
578570
// for linearization of varlen col we have to deal with not only data buffer
579571
// but also its underlying index buffer which is responsible for offset of varlen value
580572
// basically we maintain per-device linearized (data/index) buffer
@@ -902,8 +894,7 @@ MergedChunk ColumnFetcher::linearizeFixedLenArrayColFrags(
902894
const size_t total_data_buf_size,
903895
const size_t total_idx_buf_size,
904896
const size_t total_num_tuples,
905-
DeviceAllocator* device_allocator,
906-
const size_t thread_idx) const {
897+
DeviceAllocator* device_allocator) const {
907898
int64_t linearization_time_ms = 0;
908899
auto clock_begin = timer_start();
909900
// linearize collected fragments

omniscidb/QueryEngine/ColumnFetcher.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ class ColumnFetcher {
4646
const Data_Namespace::MemoryLevel effective_mem_lvl,
4747
const int device_id,
4848
DeviceAllocator* device_allocator,
49-
const size_t thread_idx,
5049
std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
5150
DataProvider* data_provider,
5251
ColumnCacheMap& column_cache);
@@ -59,7 +58,6 @@ class ColumnFetcher {
5958
const Data_Namespace::MemoryLevel effective_mem_lvl,
6059
const int device_id,
6160
DeviceAllocator* device_allocator,
62-
const size_t thread_idx,
6361
std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
6462
std::vector<std::shared_ptr<void>>& malloc_owner,
6563
DataProvider* data_provider,
@@ -90,8 +88,7 @@ class ColumnFetcher {
9088
std::list<ChunkIter>& chunk_iter_holder,
9189
const Data_Namespace::MemoryLevel memory_level,
9290
const int device_id,
93-
DeviceAllocator* device_allocator,
94-
const size_t thread_idx) const;
91+
DeviceAllocator* device_allocator) const;
9592

9693
void freeTemporaryCpuLinearizedIdxBuf();
9794
void freeLinearizedBuf();
@@ -118,8 +115,7 @@ class ColumnFetcher {
118115
const size_t total_data_buf_size,
119116
const size_t total_idx_buf_size,
120117
const size_t total_num_tuples,
121-
DeviceAllocator* device_allocator,
122-
const size_t thread_idx) const;
118+
DeviceAllocator* device_allocator) const;
123119

124120
MergedChunk linearizeFixedLenArrayColFrags(
125121
std::list<std::shared_ptr<Chunk_NS::Chunk>>& chunk_holder,
@@ -133,8 +129,7 @@ class ColumnFetcher {
133129
const size_t total_data_buf_size,
134130
const size_t total_idx_buf_size,
135131
const size_t total_num_tuples,
136-
DeviceAllocator* device_allocator,
137-
const size_t thread_idx) const;
132+
DeviceAllocator* device_allocator) const;
138133

139134
void addMergedChunkIter(const int table_id,
140135
const int col_id,

omniscidb/QueryEngine/Execute.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,7 +2871,7 @@ std::map<size_t, std::vector<uint64_t>> get_table_id_to_frag_offsets(
28712871
std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
28722872
Executor::getRowCountAndOffsetForAllFrags(
28732873
const RelAlgExecutionUnit& ra_exe_unit,
2874-
const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
2874+
const std::vector<std::vector<size_t>>& frag_ids_crossjoin,
28752875
const std::vector<InputDescriptor>& input_descs,
28762876
const std::map<TableRef, const TableFragments*>& all_tables_fragments) {
28772877
std::vector<std::vector<int64_t>> all_num_rows;
@@ -2947,6 +2947,8 @@ bool Executor::needLinearizeAllFragments(
29472947
const auto& fragments = selected_fragments[nest_level].fragment_ids;
29482948
auto need_linearize =
29492949
inner_col_desc.type()->isArray() || inner_col_desc.type()->isString();
2950+
LOG(INFO) << inner_col_desc.type()->isArray() << " || "
2951+
<< inner_col_desc.type()->isString() << ") && " << fragments.size() << " > 1";
29502952
return need_linearize && fragments.size() > 1;
29512953
}
29522954

@@ -2984,6 +2986,9 @@ FetchResult Executor::fetchChunks(
29842986
std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
29852987
std::vector<std::vector<int64_t>> all_num_rows;
29862988
std::vector<std::vector<uint64_t>> all_frag_offsets;
2989+
2990+
// in MT case we want to preserve "the order of insertion" into all_frag_col_buffers
2991+
std::vector<std::vector<size_t>> selected_frag_ids_vec;
29872992
if(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL){
29882993
std::mutex all_frag;
29892994
std::atomic<bool> empty_frags{false};
@@ -2993,7 +2998,6 @@ FetchResult Executor::fetchChunks(
29932998
frag_ids_crossjoin.begin(),
29942999
frag_ids_crossjoin.end(),
29953000
[&](const std::vector<size_t>& selected_frag_ids) {
2996-
// for (const auto& selected_frag_ids : frag_ids_crossjoin) {
29973001
std::vector<const int8_t*> frag_col_buffers(
29983002
plan_state_->global_to_local_col_ids_.size());
29993003
for (const auto& col_id : col_global_ids) {
@@ -3041,16 +3045,15 @@ FetchResult Executor::fetchChunks(
30413045
chunk_iterators,
30423046
for_lazy_fetch ? Data_Namespace::CPU_LEVEL : memory_level,
30433047
for_lazy_fetch ? 0 : device_id,
3044-
device_allocator,
3045-
thread_idx);
3048+
device_allocator);
30463049
} else {
30473050
frag_col_buffers[it->second] =
30483051
column_fetcher.getAllTableColumnFragments(col_id->getColInfo(),
30493052
all_tables_fragments,
30503053
memory_level_for_column,
30513054
device_id,
30523055
device_allocator,
3053-
thread_idx);
3056+
/*thread_idx=*/0);
30543057
}
30553058
} else {
30563059
frag_col_buffers[it->second] =
@@ -3065,10 +3068,11 @@ FetchResult Executor::fetchChunks(
30653068
}
30663069
}
30673070
all_frag.lock();
3071+
selected_frag_ids_vec.push_back(selected_frag_ids);
30683072
all_frag_col_buffers.push_back(frag_col_buffers);
30693073
all_frag.unlock();
3070-
});
3071-
});
3074+
});
3075+
});
30723076
if (empty_frags) {
30733077
return {};
30743078
}
@@ -3120,8 +3124,7 @@ FetchResult Executor::fetchChunks(
31203124
chunk_iterators,
31213125
for_lazy_fetch ? Data_Namespace::CPU_LEVEL : memory_level,
31223126
for_lazy_fetch ? 0 : device_id,
3123-
device_allocator,
3124-
thread_idx);
3127+
device_allocator);
31253128
} else {
31263129
frag_col_buffers[it->second] =
31273130
column_fetcher.getAllTableColumnFragments(col_id->getColInfo(),
@@ -3143,11 +3146,12 @@ FetchResult Executor::fetchChunks(
31433146
device_allocator);
31443147
}
31453148
}
3149+
selected_frag_ids_vec.push_back(selected_frag_ids);
31463150
all_frag_col_buffers.push_back(frag_col_buffers);
31473151
}
31483152
}
31493153
std::tie(all_num_rows, all_frag_offsets) = getRowCountAndOffsetForAllFrags(
3150-
ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
3154+
ra_exe_unit, selected_frag_ids_vec, ra_exe_unit.input_descs, all_tables_fragments);
31513155
return {all_frag_col_buffers, all_num_rows, all_frag_offsets};
31523156
}
31533157

@@ -3171,6 +3175,7 @@ FetchResult Executor::fetchUnionChunks(
31713175
std::vector<std::vector<const int8_t*>> all_frag_col_buffers;
31723176
std::vector<std::vector<int64_t>> all_num_rows;
31733177
std::vector<std::vector<uint64_t>> all_frag_offsets;
3178+
std::vector<std::vector<size_t>> selected_frag_ids_vec;
31743179

31753180
CHECK(!selected_fragments.empty());
31763181
CHECK_LE(2u, ra_exe_unit.input_descs.size());
@@ -3269,12 +3274,16 @@ FetchResult Executor::fetchUnionChunks(
32693274
device_allocator);
32703275
}
32713276
}
3277+
selected_frag_ids_vec.push_back(selected_frag_ids);
32723278
all_frag_col_buffers.push_back(frag_col_buffers);
32733279
}
32743280
std::vector<std::vector<int64_t>> num_rows;
32753281
std::vector<std::vector<uint64_t>> frag_offsets;
3276-
std::tie(num_rows, frag_offsets) = getRowCountAndOffsetForAllFrags(
3277-
ra_exe_unit, frag_ids_crossjoin, ra_exe_unit.input_descs, all_tables_fragments);
3282+
std::tie(num_rows, frag_offsets) =
3283+
getRowCountAndOffsetForAllFrags(ra_exe_unit,
3284+
selected_frag_ids_vec,
3285+
ra_exe_unit.input_descs,
3286+
all_tables_fragments);
32783287
all_num_rows.insert(all_num_rows.end(), num_rows.begin(), num_rows.end());
32793288
all_frag_offsets.insert(
32803289
all_frag_offsets.end(), frag_offsets.begin(), frag_offsets.end());

omniscidb/QueryEngine/Execute.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ class Executor : public StringDictionaryProvider {
571571
std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
572572
getRowCountAndOffsetForAllFrags(
573573
const RelAlgExecutionUnit& ra_exe_unit,
574-
const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
574+
const std::vector<std::vector<size_t>>& frag_ids_crossjoin,
575575
const std::vector<InputDescriptor>& input_descs,
576576
const std::map<TableRef, const TableFragments*>& all_tables_fragments);
577577

omniscidb/QueryEngine/JoinHashTable/HashJoin.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ JoinColumn HashJoin::fetchJoinColumn(
5252
effective_memory_level,
5353
device_id,
5454
dev_buff_owner,
55-
/*thread_idx=*/0,
5655
chunks_owner,
5756
malloc_owner,
5857
data_provider_,

omniscidb/QueryEngine/RelAlgExecutor.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,6 @@ std::unique_ptr<WindowFunctionContext> RelAlgExecutor::createWindowFunctionConte
11541154
memory_level,
11551155
0,
11561156
nullptr,
1157-
/*thread_idx=*/0,
11581157
chunks_owner,
11591158
data_provider_,
11601159
column_cache_map);

0 commit comments

Comments
 (0)