Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit b42d496

Browse files
committed
[Join] Zero-copy storage column check.
This commit adds check for valid pointer to column buffer to skip copying. Most effective with enabled `enable-non-lazy-data-import` option. Checks number of chunks in storage. Partially resolves: #574 Signed-off-by: Dmitrii Makarenko <[email protected]>
1 parent e028550 commit b42d496

File tree

17 files changed

+144
-0
lines changed

17 files changed

+144
-0
lines changed

omniscidb/ArrowStorage/ArrowStorage.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,51 @@ std::unique_ptr<AbstractDataToken> ArrowStorage::getZeroCopyBufferMemory(
187187
return nullptr;
188188
}
189189

190+
std::unique_ptr<AbstractDataToken> ArrowStorage::getZeroCopyColumnData(
191+
const ColumnRef& col_ref) {
192+
mapd_shared_lock<mapd_shared_mutex> data_lock(data_mutex_);
193+
CHECK_EQ(col_ref.db_id, db_id_);
194+
CHECK_EQ(tables_.count(col_ref.table_id), (size_t)1);
195+
auto& table = *tables_.at(col_ref.table_id);
196+
mapd_shared_lock<mapd_shared_mutex> table_lock(table.mutex);
197+
data_lock.unlock();
198+
199+
auto col_type = getColumnInfo(col_ref.db_id, col_ref.table_id, col_ref.column_id)->type;
200+
201+
if (col_type->isExtDictionary()) {
202+
auto dict_id = col_type->as<hdk::ir::ExtDictionaryType>()->dictId();
203+
auto dict_descriptor = getDictMetadata(
204+
dict_id); // this will force materialize the dictionary. it is thread safe
205+
CHECK(dict_descriptor);
206+
}
207+
208+
if (!col_type->isVarLen()) {
209+
size_t col_idx = columnIndex(col_ref.column_id);
210+
if (col_idx >= table.col_data.size()) {
211+
return nullptr;
212+
}
213+
size_t elem_size = col_type->size();
214+
const auto* fixed_type =
215+
dynamic_cast<const arrow::FixedWidthType*>(table.col_data[col_idx]->type().get());
216+
CHECK(fixed_type) << table.col_data[col_idx]->type()->ToString() << " (table "
217+
<< col_ref.table_id << ", column " << col_idx << ")";
218+
size_t arrow_elem_size = fixed_type->bit_width() / 8;
219+
size_t elems = elem_size / arrow_elem_size;
220+
CHECK_GT(elems, (size_t)0);
221+
auto data_to_fetch = table.col_data[col_idx];
222+
if (data_to_fetch->num_chunks() == 1) {
223+
auto chunk = data_to_fetch->chunk(0);
224+
const int8_t* ptr =
225+
chunk->data()->GetValues<int8_t>(1, chunk->data()->offset * arrow_elem_size);
226+
size_t chunk_size = chunk->length() * arrow_elem_size;
227+
return std::make_unique<ArrowChunkDataToken>(
228+
std::move(chunk), col_type, ptr, chunk_size);
229+
}
230+
}
231+
232+
return nullptr;
233+
}
234+
190235
void ArrowStorage::fetchFixedLenData(const TableData& table,
191236
size_t frag_idx,
192237
size_t col_idx,

omniscidb/ArrowStorage/ArrowStorage.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class ArrowStorage : public SimpleSchemaProvider, public AbstractDataProvider {
7070
const ChunkKey& key,
7171
size_t num_bytes) override;
7272

73+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
74+
const ColumnRef& col_ref) override;
75+
7376
TableFragmentsInfo getTableMetadata(int db_id, int table_id) const override;
7477

7578
const DictDescriptor* getDictMetadata(int dict_id, bool load_dict = true) override;

omniscidb/DataMgr/AbstractBufferMgr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ class AbstractBufferMgr {
8686
virtual AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) = 0;
8787
virtual std::unique_ptr<AbstractDataToken> getZeroCopyBufferMemory(const ChunkKey& key,
8888
size_t numBytes) = 0;
89+
virtual std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(
90+
const ColumnRef& col_ref) = 0;
8991
virtual void fetchBuffer(const ChunkKey& key,
9092
AbstractBuffer* destBuffer,
9193
const size_t numBytes = 0) = 0;

omniscidb/DataMgr/AbstractDataProvider.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ class AbstractDataProvider : public Data_Namespace::AbstractBufferMgr {
4545
return nullptr;
4646
}
4747

48+
// TODO(dmitriim) remove this method after enabling
49+
// of hashtable, that takes into a count frag_id and offset
50+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
51+
const ColumnRef& col_ref) override {
52+
return nullptr;
53+
}
54+
4855
void deleteBuffer(const ChunkKey& key, const bool purge = true) override {
4956
UNREACHABLE();
5057
}

omniscidb/DataMgr/BufferMgr/BufferMgr.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,11 @@ std::unique_ptr<AbstractDataToken> BufferMgr::getZeroCopyBufferMemory(const Chun
865865
return parent_mgr_->getZeroCopyBufferMemory(key, numBytes);
866866
}
867867

868+
std::unique_ptr<AbstractDataToken> BufferMgr::getZeroCopyColumnData(
869+
const ColumnRef& col_ref) {
870+
return parent_mgr_->getZeroCopyColumnData(col_ref);
871+
}
872+
868873
MemoryInfo BufferMgr::getMemoryInfo() {
869874
std::unique_lock<std::mutex> sized_segs_lock(sized_segs_mutex_);
870875
MemoryInfo mi;

omniscidb/DataMgr/BufferMgr/BufferMgr.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ class BufferMgr : public AbstractBufferMgr { // implements
162162
std::unique_ptr<AbstractDataToken> getZeroCopyBufferMemory(const ChunkKey& key,
163163
size_t numBytes) override;
164164

165+
std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(
166+
const ColumnRef& col_ref) override;
167+
165168
/**
166169
* @brief Puts the contents of d into the Buffer with ChunkKey key.
167170
* @param key - Unique identifier for a Chunk.

omniscidb/DataMgr/DataMgr.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,13 @@ AbstractBuffer* DataMgr::getChunkBuffer(const ChunkKey& key,
458458
return bufferMgrs_[level][deviceId]->getBuffer(key, numBytes);
459459
}
460460

461+
std::unique_ptr<AbstractDataToken> DataMgr::getZeroCopyColumnData(
462+
const ColumnRef& col_ref) {
463+
const auto level = static_cast<size_t>(Data_Namespace::CPU_LEVEL);
464+
CHECK_LT(level, levelSizes_.size()); // make sure we have a legit buffermgr
465+
return bufferMgrs_[level][0]->getZeroCopyColumnData(col_ref);
466+
}
467+
461468
void DataMgr::deleteChunksWithPrefix(const ChunkKey& keyPrefix) {
462469
int numLevels = bufferMgrs_.size();
463470
for (int level = numLevels - 1; level >= 0; --level) {

omniscidb/DataMgr/DataMgr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ class DataMgr {
177177
const MemoryLevel memoryLevel,
178178
const int deviceId = 0,
179179
const size_t numBytes = 0);
180+
// TODO(dmitriim) remove this method after enabling of hashtable
181+
std::unique_ptr<AbstractDataToken> getZeroCopyColumnData(const ColumnRef& col_ref);
180182
void deleteChunksWithPrefix(const ChunkKey& keyPrefix);
181183
void deleteChunksWithPrefix(const ChunkKey& keyPrefix, const MemoryLevel memLevel);
182184
AbstractBuffer* alloc(const MemoryLevel memoryLevel,

omniscidb/DataMgr/DataMgrDataProvider.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,16 @@ std::shared_ptr<Chunk_NS::Chunk> DataMgrDataProvider::getChunk(
2929
return Chunk_NS::Chunk::getChunk(
3030
col_info, data_mgr_, key, memory_level, device_id, num_bytes, num_elems);
3131
}
32+
33+
std::unique_ptr<Data_Namespace::AbstractDataToken>
34+
DataMgrDataProvider::getZeroCopyColumnData(const ColumnRef& col_ref) {
35+
return data_mgr_->getZeroCopyColumnData(col_ref);
36+
}
37+
3238
TableFragmentsInfo DataMgrDataProvider::getTableMetadata(int db_id, int table_id) const {
3339
return data_mgr_->getTableMetadata(db_id, table_id);
3440
}
41+
3542
const DictDescriptor* DataMgrDataProvider::getDictMetadata(int dict_id,
3643
bool load_dict) const {
3744
return data_mgr_->getDictMetadata(dict_id, load_dict);

omniscidb/DataMgr/DataMgrDataProvider.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ class DataMgrDataProvider : public DataProvider {
3535
const size_t num_bytes,
3636
const size_t num_elems) override;
3737

38+
std::unique_ptr<Data_Namespace::AbstractDataToken> getZeroCopyColumnData(
39+
const ColumnRef& col_ref) override;
40+
3841
TableFragmentsInfo getTableMetadata(int db_id, int table_id) const override;
3942

4043
const DictDescriptor* getDictMetadata(int dict_id,

0 commit comments

Comments
 (0)