Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 40fd548

Browse files
committed
[Join] Remove redundant copies.
This commit removes useless copying(memcpy) in `getAllTableColumnFragments`. Also some parallelization added. Resolves: #574 Signed-off-by: Dmitrii Makarenko <[email protected]>
1 parent 619af58 commit 40fd548

18 files changed

+231
-34
lines changed

omniscidb/ArrowStorage/ArrowStorage.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,51 @@ std::unique_ptr<AbstractDataToken> ArrowStorage::getZeroCopyBufferMemory(
187187
return nullptr;
188188
}
189189

190+
std::unique_ptr<AbstractDataToken> ArrowStorage::getZeroCopyColumnData(
191+
const ColumnRef& col_ref) {
192+
mapd_shared_lock<mapd_shared_mutex> data_lock(data_mutex_);
193+
CHECK_EQ(col_ref.db_id, db_id_);
194+
CHECK_EQ(tables_.count(col_ref.table_id), (size_t)1);
195+
auto& table = *tables_.at(col_ref.table_id);
196+
mapd_shared_lock<mapd_shared_mutex> table_lock(table.mutex);
197+
data_lock.unlock();
198+
199+
auto col_type = getColumnInfo(col_ref.db_id, col_ref.table_id, col_ref.column_id)->type;
200+
201+
if (col_type->isExtDictionary()) {
202+
auto dict_id = col_type->as<hdk::ir::ExtDictionaryType>()->dictId();
203+
auto dict_descriptor = getDictMetadata(
204+
dict_id); // this will force materialize the dictionary. it is thread safe
205+
CHECK(dict_descriptor);
206+
}
207+
208+
if (!col_type->isVarLen()) {
209+
size_t col_idx = columnIndex(col_ref.column_id);
210+
size_t elem_size = col_type->size();
211+
const auto* fixed_type =
212+
dynamic_cast<const arrow::FixedWidthType*>(table.col_data[col_idx]->type().get());
213+
CHECK(fixed_type) << table.col_data[col_idx]->type()->ToString() << " (table "
214+
<< col_ref.table_id << ", column " << col_idx << ")";
215+
size_t arrow_elem_size = fixed_type->bit_width() / 8;
216+
// For fixed size arrays we simply use elem type in arrow and therefore have to scale
217+
// to get a proper slice.
218+
size_t elems = elem_size / arrow_elem_size;
219+
CHECK_GT(elems, (size_t)0);
220+
auto data_to_fetch = table.col_data[col_idx];
221+
LOG(ERROR) << "getZeroCopyColumnData num_chunks: " << data_to_fetch->num_chunks();
222+
if (data_to_fetch->num_chunks() == 1) {
223+
auto chunk = data_to_fetch->chunk(0);
224+
const int8_t* ptr =
225+
chunk->data()->GetValues<int8_t>(1, chunk->data()->offset * arrow_elem_size);
226+
size_t chunk_size = chunk->length() * arrow_elem_size;
227+
return std::make_unique<ArrowChunkDataToken>(
228+
std::move(chunk), col_type, ptr, chunk_size);
229+
}
230+
}
231+
232+
return nullptr;
233+
}
234+
190235
void ArrowStorage::fetchFixedLenData(const TableData& table,
191236
size_t frag_idx,
192237
size_t col_idx,

omniscidb/ArrowStorage/ArrowStorage.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class ArrowStorage : public SimpleSchemaProvider, public AbstractDataProvider {
7070
const ChunkKey& key,
7171
size_t num_bytes) override;
7272

73+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
74+
const ColumnRef& col_ref) override;
75+
7376
TableFragmentsInfo getTableMetadata(int db_id, int table_id) const override;
7477

7578
const DictDescriptor* getDictMetadata(int dict_id, bool load_dict = true) override;

omniscidb/DataMgr/AbstractBufferMgr.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ class AbstractBufferMgr {
8484
virtual void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
8585
const bool purge = true) = 0;
8686
virtual AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) = 0;
87+
// virtual AbstractBuffer* getZeroCopyColumnBuffer(const ColumnRef &col_ref) = 0;
8788
virtual std::unique_ptr<AbstractDataToken> getZeroCopyBufferMemory(const ChunkKey& key,
8889
size_t numBytes) = 0;
90+
virtual std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(
91+
const ColumnRef& col_ref) = 0;
8992
virtual void fetchBuffer(const ChunkKey& key,
9093
AbstractBuffer* destBuffer,
9194
const size_t numBytes = 0) = 0;

omniscidb/DataMgr/AbstractDataProvider.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ class AbstractDataProvider : public Data_Namespace::AbstractBufferMgr {
4242
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyBufferMemory(
4343
const ChunkKey& key,
4444
size_t numBytes) override {
45+
UNREACHABLE();
46+
return nullptr;
47+
}
48+
49+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
50+
const ColumnRef& col_ref) override {
51+
UNREACHABLE();
4552
return nullptr;
4653
}
4754

omniscidb/DataMgr/BufferMgr/BufferMgr.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,20 @@ std::unique_ptr<AbstractDataToken> BufferMgr::getZeroCopyBufferMemory(const Chun
865865
return parent_mgr_->getZeroCopyBufferMemory(key, numBytes);
866866
}
867867

868+
std::unique_ptr<AbstractDataToken> BufferMgr::getZeroCopyColumnData(
869+
const ColumnRef& col_ref) {
870+
return parent_mgr_->getZeroCopyColumnData(col_ref);
871+
}
872+
873+
// AbstractBuffer* BufferMgr::getZeroCopyColumnBuffer(const ColumnRef& col_ref) {
874+
// AbstractBuffer* res = nullptr;
875+
876+
// if (auto token = getZeroCopyColumnData(col_ref)) {
877+
// res = createZeroCopyBuffer({}, std::move(token));
878+
// }
879+
// return res;
880+
// }
881+
868882
MemoryInfo BufferMgr::getMemoryInfo() {
869883
std::unique_lock<std::mutex> sized_segs_lock(sized_segs_mutex_);
870884
MemoryInfo mi;

omniscidb/DataMgr/BufferMgr/BufferMgr.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,12 @@ class BufferMgr : public AbstractBufferMgr { // implements
159159
/// Returns the a pointer to the chunk with the specified key.
160160
AbstractBuffer* getBuffer(const ChunkKey& key, const size_t num_bytes = 0) override;
161161

162+
// AbstractBuffer* BufferMgr::getZeroCopyColumnBuffer(const ColumnRef &col_ref);
163+
162164
std::unique_ptr<AbstractDataToken> getZeroCopyBufferMemory(const ChunkKey& key,
163165
size_t numBytes) override;
166+
std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(
167+
const ColumnRef& col_ref) override;
164168

165169
/**
166170
* @brief Puts the contents of d into the Buffer with ChunkKey key.

omniscidb/DataMgr/DataMgr.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,13 @@ AbstractBuffer* DataMgr::getChunkBuffer(const ChunkKey& key,
458458
return bufferMgrs_[level][deviceId]->getBuffer(key, numBytes);
459459
}
460460

461+
std::unique_ptr<AbstractDataToken> DataMgr::getZeroCopyColumnData(
462+
const ColumnRef& col_ref) {
463+
const auto level = static_cast<size_t>(Data_Namespace::CPU_LEVEL);
464+
CHECK_LT(level, levelSizes_.size()); // make sure we have a legit buffermgr
465+
return bufferMgrs_[level][0]->getZeroCopyColumnData(col_ref);
466+
}
467+
461468
void DataMgr::deleteChunksWithPrefix(const ChunkKey& keyPrefix) {
462469
int numLevels = bufferMgrs_.size();
463470
for (int level = numLevels - 1; level >= 0; --level) {

omniscidb/DataMgr/DataMgr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ class DataMgr {
177177
const MemoryLevel memoryLevel,
178178
const int deviceId = 0,
179179
const size_t numBytes = 0);
180+
std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(const ColumnRef& col_ref);
180181
void deleteChunksWithPrefix(const ChunkKey& keyPrefix);
181182
void deleteChunksWithPrefix(const ChunkKey& keyPrefix, const MemoryLevel memLevel);
182183
AbstractBuffer* alloc(const MemoryLevel memoryLevel,

omniscidb/DataMgr/DataMgrDataProvider.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,16 @@ std::shared_ptr<Chunk_NS::Chunk> DataMgrDataProvider::getChunk(
2929
return Chunk_NS::Chunk::getChunk(
3030
col_info, data_mgr_, key, memory_level, device_id, num_bytes, num_elems);
3131
}
32+
33+
std::unique_ptr<Data_Namespace::AbstractDataToken>
34+
DataMgrDataProvider::getZeroCopyColumnData(const ColumnRef& col_ref) {
35+
return data_mgr_->getZeroCopyColumnData(col_ref);
36+
}
37+
3238
TableFragmentsInfo DataMgrDataProvider::getTableMetadata(int db_id, int table_id) const {
3339
return data_mgr_->getTableMetadata(db_id, table_id);
3440
}
41+
3542
const DictDescriptor* DataMgrDataProvider::getDictMetadata(int dict_id,
3643
bool load_dict) const {
3744
return data_mgr_->getDictMetadata(dict_id, load_dict);

omniscidb/DataMgr/DataMgrDataProvider.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ class DataMgrDataProvider : public DataProvider {
3535
const size_t num_bytes,
3636
const size_t num_elems) override;
3737

38+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
39+
const ColumnRef& col_ref) override;
40+
3841
TableFragmentsInfo getTableMetadata(int db_id, int table_id) const override;
3942

4043
const DictDescriptor* getDictMetadata(int dict_id,

0 commit comments

Comments
 (0)