Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit fddc7ce

Browse files
committed
Use functions to map column indexes to IDs.
Signed-off-by: ienkovich <[email protected]>
1 parent 334db27 commit fddc7ce

File tree

11 files changed

+123
-87
lines changed

11 files changed

+123
-87
lines changed

omniscidb/ArrowStorage/ArrowStorage.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@ size_t computeTotalStringsLength(std::shared_ptr<arrow::ChunkedArray> arr,
6767
return total_bytes;
6868
}
6969

70+
/**
71+
* Get column ID by its 0-based index (position) in the table.
72+
*/
73+
int columnId(size_t col_idx) {
74+
return static_cast<int>(col_idx + 1000);
75+
}
76+
77+
/**
78+
* Translate column ID to 0-based index (position) in the table.
79+
*/
80+
size_t columnIndex(int col_id) {
81+
return static_cast<size_t>(col_id - 1000);
82+
}
83+
7084
} // anonymous namespace
7185

7286
void ArrowStorage::fetchBuffer(const ChunkKey& key,
@@ -79,7 +93,7 @@ void ArrowStorage::fetchBuffer(const ChunkKey& key,
7993
mapd_shared_lock<mapd_shared_mutex> table_lock(table.mutex);
8094
data_lock.unlock();
8195

82-
size_t col_idx = static_cast<size_t>(key[CHUNK_KEY_COLUMN_IDX] - 1);
96+
size_t col_idx = columnIndex(key[CHUNK_KEY_COLUMN_IDX]);
8397
size_t frag_idx = static_cast<size_t>(key[CHUNK_KEY_FRAGMENT_IDX] - 1);
8498
CHECK_LT(frag_idx, table.fragments.size());
8599
CHECK_LT(col_idx, table.col_data.size());
@@ -134,7 +148,7 @@ std::unique_ptr<AbstractDataToken> ArrowStorage::getZeroCopyBufferMemory(
134148
->type;
135149

136150
if (!col_type->isVarLen()) {
137-
size_t col_idx = static_cast<size_t>(key[CHUNK_KEY_COLUMN_IDX] - 1);
151+
size_t col_idx = columnIndex(key[CHUNK_KEY_COLUMN_IDX]);
138152
size_t frag_idx = static_cast<size_t>(key[CHUNK_KEY_FRAGMENT_IDX] - 1);
139153
CHECK_EQ(key.size(), (size_t)4);
140154
size_t elem_size = col_type->size();
@@ -293,7 +307,7 @@ TableFragmentsInfo ArrowStorage::getTableMetadata(int db_id, int table_id) const
293307
frag_info.deviceIds.push_back(0); // Data_Namespace::CPU_LEVEL
294308
frag_info.deviceIds.push_back(0); // Data_Namespace::GPU_LEVEL
295309
for (size_t col_idx = 0; col_idx < frag.metadata.size(); ++col_idx) {
296-
frag_info.setChunkMetadata(static_cast<int>(col_idx + 1), frag.metadata[col_idx]);
310+
frag_info.setChunkMetadata(columnId(col_idx), frag.metadata[col_idx]);
297311
}
298312
}
299313
return res;
@@ -331,14 +345,14 @@ TableInfoPtr ArrowStorage::createTable(const std::string& table_name,
331345
TableInfoPtr res;
332346
int table_id;
333347
mapd_unique_lock<mapd_shared_mutex> data_lock(data_mutex_);
348+
size_t next_col_idx = 0;
334349
{
335350
mapd_unique_lock<mapd_shared_mutex> dict_lock(dict_mutex_);
336351
mapd_unique_lock<mapd_shared_mutex> schema_lock(schema_mutex_);
337352
table_id = next_table_id_++;
338353
checkNewTableParams(table_name, columns, options);
339354
res = addTableInfo(
340355
db_id_, table_id, table_name, false, Data_Namespace::MemoryLevel::CPU_LEVEL, 0);
341-
int next_col_id = 1;
342356
std::unordered_map<int, int> dict_ids;
343357
for (auto& col : columns) {
344358
auto type = col.type;
@@ -382,10 +396,10 @@ TableInfoPtr ArrowStorage::createTable(const std::string& table_name,
382396
type = elem_type;
383397
}
384398
}
385-
auto col_info =
386-
addColumnInfo(db_id_, table_id, next_col_id++, col.name, type, false);
399+
auto col_info = addColumnInfo(
400+
db_id_, table_id, columnId(next_col_idx++), col.name, type, false);
387401
}
388-
addRowidColumn(db_id_, table_id);
402+
addRowidColumn(db_id_, table_id, columnId(next_col_idx++));
389403
}
390404

391405
std::vector<std::shared_ptr<arrow::Field>> fields;
@@ -479,7 +493,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
479493
threading::parallel_for(
480494
threading::blocked_range(0, (int)at->columns().size()), [&](auto range) {
481495
for (auto col_idx = range.begin(); col_idx != range.end(); col_idx++) {
482-
auto col_info = getColumnInfo(db_id_, table_id, col_idx + 1);
496+
auto col_info = getColumnInfo(db_id_, table_id, columnId(col_idx));
483497
auto col_type = col_info->type;
484498
auto col_arr = at->column(col_idx);
485499

@@ -605,7 +619,7 @@ void ArrowStorage::appendArrowTable(std::shared_ptr<arrow::Table> at, int table_
605619
auto& first_frag = fragments.front();
606620
last_frag.row_count += first_frag.row_count;
607621
for (size_t col_idx = 0; col_idx < last_frag.metadata.size(); ++col_idx) {
608-
auto col_type = getColumnInfo(db_id_, table_id, col_idx + 1)->type;
622+
auto col_type = getColumnInfo(db_id_, table_id, columnId(col_idx))->type;
609623
size_t num_elems = last_frag.metadata[col_idx]->numElements() +
610624
first_frag.metadata[col_idx]->numElements();
611625
size_t num_bytes = last_frag.metadata[col_idx]->numBytes() +

omniscidb/QueryBuilder/QueryBuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2269,9 +2269,10 @@ BuilderNode BuilderNode::sort(const std::vector<BuilderSortField>& fields,
22692269
auto base = node_;
22702270
if (node_->is<Scan>()) {
22712271
// Filter out rowid column if it's not used in the sort.
2272+
auto scan = node_->as<Scan>();
22722273
bool uses_rowid =
22732274
std::any_of(collation.begin(), collation.end(), [&](const SortField& field) {
2274-
return field.getField() == node_->size() - 1;
2275+
return scan->isVirtualCol(field.getField());
22752276
});
22762277
int cols_to_proj = uses_rowid ? node_->size() : node_->size() - 1;
22772278
std::vector<int> col_indices(cols_to_proj);

omniscidb/QueryEngine/WorkUnitBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ void WorkUnitBuilder::process(const ir::Node* node) {
349349
col_var = ir::makeExpr<ir::ColumnVar>(node->getOutputMetainfo()[i].type(),
350350
token->dbId(),
351351
token->tableId(),
352-
i + 1,
352+
token->columnId(i),
353353
rte_idx,
354354
false);
355355
}

omniscidb/ResultSetRegistry/ResultSetMetadata.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
#include "ResultSetMetadata.h"
9+
#include "ResultSetTableToken.h"
910

1011
#include "Shared/thread_count.h"
1112

@@ -80,7 +81,8 @@ ChunkMetadataMap synthesizeMetadata(const ResultSet* rows) {
8081
for (size_t i = 0; i < rows->colCount(); ++i) {
8182
decoders.emplace_back(Encoder::Create(nullptr, rows->colType(i)));
8283
const auto it_ok =
83-
metadata_map.emplace(i + 1, decoders.back()->getMetadata(rows->colType(i)));
84+
metadata_map.emplace(ResultSetTableToken::columnId(i),
85+
decoders.back()->getMetadata(rows->colType(i)));
8486
CHECK(it_ok.second);
8587
}
8688
return metadata_map;
@@ -212,7 +214,7 @@ ChunkMetadataMap synthesizeMetadata(const ResultSet* rows) {
212214
num_bytes,
213215
rows->rowCount(),
214216
dummy_encoders[0][i]->getMetadata(elem_type)->chunkStats());
215-
const auto it_ok = metadata_map.emplace(i + 1, meta);
217+
const auto it_ok = metadata_map.emplace(ResultSetTableToken::columnId(i), meta);
216218
CHECK(it_ok.second);
217219
}
218220
return metadata_map;

omniscidb/ResultSetRegistry/ResultSetRegistry.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ TableFragmentsInfo getEmptyTableMetadata(int table_id) {
5151
return res;
5252
}
5353

54+
int columnId(size_t col_idx) {
55+
return ResultSetTableToken::columnId(col_idx);
56+
}
57+
58+
size_t columnIndex(int col_id) {
59+
return ResultSetTableToken::columnIndex(col_id);
60+
}
61+
5462
} // namespace
5563

5664
ResultSetRegistry::ResultSetRegistry(ConfigPtr config)
@@ -111,14 +119,14 @@ ResultSetTableTokenPtr ResultSetRegistry::put(ResultSetTable table) {
111119
for (size_t col_idx = 0; col_idx < first_rs->colCount(); ++col_idx) {
112120
addColumnInfo(db_id_,
113121
table_id,
114-
static_cast<int>(col_idx + 1),
122+
columnId(col_idx),
115123
first_rs->colName(col_idx),
116124
first_rs->colType(col_idx),
117125
false);
118126
has_varlen = has_varlen || first_rs->colType(col_idx)->isVarLen();
119127
has_array = has_array || first_rs->colType(col_idx)->isArray();
120128
}
121-
addRowidColumn(db_id_, table_id);
129+
addRowidColumn(db_id_, table_id, columnId(first_rs->colCount()));
122130

123131
// TODO: lazily compute row count and try to avoid global write
124132
// locks for that
@@ -190,8 +198,8 @@ ChunkStats ResultSetRegistry::getChunkStats(int table_id,
190198
frag.meta = synthesizeMetadata(frag.rs.get());
191199
}
192200
}
193-
CHECK(frag.meta.count(col_idx + 1));
194-
return frag.meta.at(col_idx + 1)->chunkStats();
201+
CHECK(frag.meta.count(columnId(col_idx)));
202+
return frag.meta.at(columnId(col_idx))->chunkStats();
195203
}
196204

197205
void ResultSetRegistry::fetchBuffer(const ChunkKey& key,
@@ -204,7 +212,7 @@ void ResultSetRegistry::fetchBuffer(const ChunkKey& key,
204212
mapd_shared_lock<mapd_shared_mutex> table_lock(table.mutex);
205213
data_lock.unlock();
206214

207-
size_t col_idx = static_cast<size_t>(key[CHUNK_KEY_COLUMN_IDX] - 1);
215+
size_t col_idx = columnIndex(key[CHUNK_KEY_COLUMN_IDX]);
208216
size_t frag_idx = static_cast<size_t>(key[CHUNK_KEY_FRAGMENT_IDX] - 1);
209217
CHECK_LT(frag_idx, table.fragments.size());
210218
auto& rs = table.fragments[frag_idx].rs;
@@ -223,7 +231,7 @@ ResultSetRegistry::getZeroCopyBufferMemory(const ChunkKey& key, size_t num_bytes
223231
mapd_shared_lock<mapd_shared_mutex> table_lock(table.mutex);
224232
data_lock.unlock();
225233

226-
size_t col_idx = static_cast<size_t>(key[CHUNK_KEY_COLUMN_IDX] - 1);
234+
size_t col_idx = columnIndex(key[CHUNK_KEY_COLUMN_IDX]);
227235
size_t frag_idx = static_cast<size_t>(key[CHUNK_KEY_FRAGMENT_IDX] - 1);
228236
CHECK_LT(frag_idx, table.fragments.size());
229237
auto& frag = table.fragments[frag_idx];
@@ -317,7 +325,7 @@ TableFragmentsInfo ResultSetRegistry::getTableMetadata(int db_id, int table_id)
317325
[this, table_id, frag_idx, col_idx](ChunkStats& stats) {
318326
stats = this->getChunkStats(table_id, frag_idx, col_idx);
319327
});
320-
frag_info.setChunkMetadata(static_cast<int>(col_idx + 1), meta);
328+
frag_info.setChunkMetadata(columnId(col_idx), meta);
321329
}
322330
}
323331
} else {

omniscidb/ResultSetRegistry/ResultSetTableToken.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ class ResultSetTableToken {
3737
int dbId() const { return tinfo_->db_id; }
3838
int tableId() const { return tinfo_->table_id; }
3939

40+
// Column ID <-> Index mapping. Use a significant offset to early catch
41+
// cases of indexes used as IDs and vice versa.
42+
static int columnId(size_t col_idx) { return static_cast<int>(col_idx + 2000); }
43+
static size_t columnIndex(int col_id) { return static_cast<size_t>(col_id - 2000); }
44+
4045
size_t rowCount() const { return row_count_; }
4146

4247
size_t resultSetCount() const { return tinfo_->fragments; }

omniscidb/SchemaMgr/SimpleSchemaProvider.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,8 @@ class SimpleSchemaProvider : public SchemaProvider {
157157
return addColumnInfo(std::make_shared<ColumnInfo>(args...));
158158
}
159159

160-
ColumnInfoPtr addRowidColumn(int db_id, int table_id) {
160+
ColumnInfoPtr addRowidColumn(int db_id, int table_id, int col_id) {
161161
CHECK_EQ(column_index_by_name_.count({db_id, table_id}), (size_t)1);
162-
int col_id = static_cast<int>(column_index_by_name_[{db_id, table_id}].size() + 1);
163162
return addColumnInfo(db_id, table_id, col_id, "rowid", ctx_.int64(), true);
164163
}
165164

0 commit comments

Comments
 (0)