|
12 | 12 |
|
13 | 13 | namespace arcticdb {
|
14 | 14 |
|
| 15 | +// N.B. this will not catch all the things that C++ considers to be narrowing conversions, because |
| 16 | +// it doesn't take into account integral promotion, however we don't care about that for the |
| 17 | +// purpose for which it is used in this file. |
| 18 | +template <typename SourceType, typename TargetType> |
| 19 | +constexpr bool is_narrowing_conversion() { |
| 20 | + if(sizeof(TargetType) < sizeof(SourceType)) |
| 21 | + return true; |
| 22 | + |
| 23 | + if(sizeof(SourceType) == sizeof(TargetType) && std::is_integral_v<TargetType> && std::is_unsigned_v<SourceType> && std::is_signed_v<TargetType>) { |
| 24 | + return true; |
| 25 | + } |
| 26 | + |
| 27 | + return false; |
| 28 | +} |
| 29 | + |
| 30 | +JiveTable create_jive_table(const std::vector<std::shared_ptr<Column>>& columns) { |
| 31 | + JiveTable output(columns[0]->row_count()); |
| 32 | + std::iota(std::begin(output.orig_pos_), std::end(output.orig_pos_), 0); |
| 33 | + |
| 34 | + // Calls to scalar_at are expensive, so we precompute them to speed up the sort compare function. |
| 35 | + for(auto it = std::rbegin(columns); it != std::rend(columns); ++it) { |
| 36 | + auto& column = *it; |
| 37 | + user_input::check<ErrorCode::E_SORT_ON_SPARSE>(!column->is_sparse(), "Can't sort on sparse column with type {}", column->type()); |
| 38 | + details::visit_type(column->type().data_type(), [&output, &column] (auto type_desc_tag) { |
| 39 | + using type_info = ScalarTypeInfo<decltype(type_desc_tag)>; |
| 40 | + auto column_data = column->data(); |
| 41 | + auto accessor = random_accessor<typename type_info::TDT>(&column_data); |
| 42 | + std::stable_sort(std::begin(output.orig_pos_), |
| 43 | + std::end(output.orig_pos_), |
| 44 | + [&](const auto &a, const auto &b) -> bool { |
| 45 | + return accessor.at(a) < accessor.at(b); |
| 46 | + }); |
| 47 | + }); |
| 48 | + // Obtain the sorted_pos_ by reversing the orig_pos_ permutation |
| 49 | + for (auto i = 0u; i < output.orig_pos_.size(); ++i) { |
| 50 | + output.sorted_pos_[output.orig_pos_[i]] = i; |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | + return output; |
| 55 | +} |
| 56 | + |
| 57 | +bool operator==(const ExtraBufferIndex& lhs, const ExtraBufferIndex& rhs) { |
| 58 | + return (lhs.offset_bytes_ == rhs.offset_bytes_) && (lhs.type_ == rhs.type_); |
| 59 | +} |
| 60 | + |
| 61 | +std::size_t ExtraBufferIndexHash::operator()(const ExtraBufferIndex& index) const { |
| 62 | + return folly::hash::hash_combine(index.offset_bytes_, index.type_); |
| 63 | +} |
| 64 | + |
| 65 | +ChunkedBuffer& ExtraBufferContainer::create_buffer(size_t offset, ExtraBufferType type, size_t size, AllocationType allocation_type) { |
| 66 | + std::lock_guard lock(mutex_); |
| 67 | + auto inserted = buffers_.try_emplace(ExtraBufferIndex{offset, type}, ChunkedBuffer{size, allocation_type}); |
| 68 | + util::check(inserted.second, "Failed to insert additional chunked buffer at position {}", offset); |
| 69 | + return inserted.first->second; |
| 70 | +} |
| 71 | + |
| 72 | +void ExtraBufferContainer::set_buffer(size_t offset, ExtraBufferType type, ChunkedBuffer&& buffer) { |
| 73 | + std::lock_guard lock(mutex_); |
| 74 | + buffers_.try_emplace(ExtraBufferIndex{offset, type}, std::move(buffer)); |
| 75 | +} |
| 76 | + |
| 77 | +ChunkedBuffer& ExtraBufferContainer::get_buffer(size_t offset, ExtraBufferType type) const { |
| 78 | + std::lock_guard lock(mutex_); |
| 79 | + auto it = buffers_.find(ExtraBufferIndex{offset, type}); |
| 80 | + util::check(it != buffers_.end(), "Failed to find additional chunked buffer at position {}", offset); |
| 81 | + return const_cast<ChunkedBuffer&>(it->second); |
| 82 | +} |
| 83 | + |
| 84 | +bool ExtraBufferContainer::has_buffer(size_t offset, ExtraBufferType type) const { |
| 85 | + std::lock_guard lock(mutex_); |
| 86 | + auto it = buffers_.find(ExtraBufferIndex{offset, type}); |
| 87 | + return it != buffers_.end(); |
| 88 | +} |
| 89 | + |
15 | 90 | void initialise_output_column(const Column& input_column, Column& output_column) {
|
16 | 91 | if (&input_column != &output_column) {
|
17 | 92 | size_t output_physical_rows;
|
@@ -113,6 +188,215 @@ bool operator!=(const Column& left, const Column& right) {
|
113 | 188 | return !(left == right);
|
114 | 189 | }
|
115 | 190 |
|
| 191 | +Column::Column() : type_(null_type_descriptor()) {} |
| 192 | + |
| 193 | +Column::Column(TypeDescriptor type) : |
| 194 | + Column(type, 0, AllocationType::DYNAMIC, Sparsity::NOT_PERMITTED) { |
| 195 | +} |
| 196 | + |
| 197 | +Column::Column(TypeDescriptor type, Sparsity allow_sparse) : |
| 198 | + Column(type, 0, AllocationType::DYNAMIC, allow_sparse) { |
| 199 | +} |
| 200 | + |
| 201 | +Column::Column(TypeDescriptor type, Sparsity allow_sparse, ChunkedBuffer&& buffer) : |
| 202 | + data_(std::move(buffer)), |
| 203 | + type_(type), |
| 204 | + allow_sparse_(allow_sparse) { |
| 205 | +} |
| 206 | + |
| 207 | +Column::Column(TypeDescriptor type, Sparsity allow_sparse, ChunkedBuffer&& buffer, Buffer&& shapes) : |
| 208 | + data_(std::move(buffer)), |
| 209 | + shapes_(std::move(shapes)), |
| 210 | + type_(type), |
| 211 | + allow_sparse_(allow_sparse) { |
| 212 | +} |
| 213 | + |
| 214 | +Column::Column( |
| 215 | + TypeDescriptor type, |
| 216 | + size_t expected_rows, |
| 217 | + AllocationType presize, |
| 218 | + Sparsity allow_sparse) : |
| 219 | + data_(expected_rows * entity::internal_data_type_size(type), presize), |
| 220 | + type_(type), |
| 221 | + allow_sparse_(allow_sparse) { |
| 222 | + ARCTICDB_TRACE(log::inmem(), "Creating column with descriptor {}", type); |
| 223 | +} |
| 224 | + |
| 225 | +Column::Column( |
| 226 | + TypeDescriptor type, |
| 227 | + size_t expected_rows, |
| 228 | + AllocationType presize, |
| 229 | + Sparsity allow_sparse, |
| 230 | + OutputFormat output_format, |
| 231 | + DataTypeMode mode) : |
| 232 | + data_(expected_rows * entity::data_type_size(type, output_format, mode), presize), |
| 233 | + type_(type), |
| 234 | + allow_sparse_(allow_sparse) { |
| 235 | + ARCTICDB_TRACE(log::inmem(), "Creating column with descriptor {}", type); |
| 236 | +} |
| 237 | + |
| 238 | +void Column::set_statistics(FieldStatsImpl stats) { |
| 239 | + stats_ = stats; |
| 240 | +} |
| 241 | + |
| 242 | +bool Column::has_statistics() const { |
| 243 | + return stats_.set_; |
| 244 | +}; |
| 245 | + |
| 246 | +FieldStatsImpl Column::get_statistics() const { |
| 247 | + return stats_; |
| 248 | +} |
| 249 | + |
| 250 | +void Column::backfill_sparse_map(ssize_t to_row) { |
| 251 | + ARCTICDB_TRACE(log::version(), "Backfilling sparse map to position {}", to_row); |
| 252 | + // Initialise the optional to an empty bitset if it has not been created yet |
| 253 | + auto& bitset = sparse_map(); |
| 254 | + if (to_row >= 0) { |
| 255 | + bitset.set_range(0, bv_size(to_row), true); |
| 256 | + } |
| 257 | +} |
| 258 | + |
| 259 | +void Column::set_sparse_block(ChunkedBuffer&& buffer, util::BitSet&& bitset) { |
| 260 | + data_.buffer() = std::move(buffer); |
| 261 | + sparse_map_ = std::move(bitset); |
| 262 | +} |
| 263 | + |
| 264 | +void Column::set_sparse_block(ChunkedBuffer&& buffer, Buffer&& shapes, util::BitSet&& bitset) { |
| 265 | + data_.buffer() = std::move(buffer); |
| 266 | + shapes_.buffer() = std::move(shapes); |
| 267 | + sparse_map_ = std::move(bitset); |
| 268 | +} |
| 269 | + |
| 270 | +ChunkedBuffer&& Column::release_buffer() { |
| 271 | + return std::move(data_.buffer()); |
| 272 | +} |
| 273 | + |
| 274 | +Buffer&& Column::release_shapes() { |
| 275 | + return std::move(shapes_.buffer()); |
| 276 | +} |
| 277 | + |
| 278 | +std::optional<Column::StringArrayData> Column::string_array_at(position_t idx, const StringPool &string_pool) { |
| 279 | + util::check_arg(idx < row_count(), "String array index out of bounds in column"); |
| 280 | + util::check_arg(type_.dimension() == Dimension::Dim1, "String array should always be one dimensional"); |
| 281 | + if (!inflated_) |
| 282 | + inflate_string_arrays(string_pool); |
| 283 | + |
| 284 | + const shape_t *shape_ptr = shape_index(idx); |
| 285 | + auto num_strings = *shape_ptr; |
| 286 | + ssize_t string_size = offsets_[idx] / num_strings; |
| 287 | + return StringArrayData{num_strings, string_size, data_.ptr_cast<char>(bytes_offset(idx), num_strings * string_size)}; |
| 288 | +} |
| 289 | + |
| 290 | +ChunkedBuffer::Iterator Column::get_iterator() const { |
| 291 | + return {const_cast<ChunkedBuffer*>(&data_.buffer()), get_type_size(type_.data_type())}; |
| 292 | +} |
| 293 | + |
| 294 | +size_t Column::bytes() const { |
| 295 | + return data_.bytes(); |
| 296 | +} |
| 297 | + |
| 298 | +ColumnData Column::data() const { |
| 299 | + return ColumnData(&data_.buffer(), &shapes_.buffer(), type_, sparse_map_ ? &*sparse_map_ : nullptr); |
| 300 | +} |
| 301 | + |
| 302 | +const uint8_t* Column::ptr() const { |
| 303 | + return data_.buffer().data(); |
| 304 | +} |
| 305 | + |
| 306 | +uint8_t* Column::ptr() { |
| 307 | + return data_.buffer().data(); |
| 308 | +} |
| 309 | + |
| 310 | +TypeDescriptor Column::type() const { return type_; } |
| 311 | + |
| 312 | +size_t Column::num_blocks() const { |
| 313 | + return data_.buffer().num_blocks(); |
| 314 | +} |
| 315 | + |
| 316 | +const shape_t* Column::shape_ptr() const { |
| 317 | + return shapes_.ptr_cast<shape_t>(0, num_shapes()); |
| 318 | +} |
| 319 | + |
| 320 | +void Column::set_orig_type(const TypeDescriptor& desc) { |
| 321 | + orig_type_ = desc; |
| 322 | +} |
| 323 | + |
| 324 | +bool Column::has_orig_type() const { |
| 325 | + return static_cast<bool>(orig_type_); |
| 326 | +} |
| 327 | + |
| 328 | +const TypeDescriptor& Column::orig_type() const { |
| 329 | + return orig_type_.value(); |
| 330 | +} |
| 331 | + |
| 332 | +void Column::compact_blocks() { |
| 333 | + data_.compact_blocks(); |
| 334 | +} |
| 335 | + |
| 336 | +shape_t* Column::allocate_shapes(std::size_t bytes) { |
| 337 | + shapes_.ensure_bytes(bytes); |
| 338 | + return reinterpret_cast<shape_t *>(shapes_.cursor()); |
| 339 | +} |
| 340 | + |
| 341 | +uint8_t* Column::allocate_data(std::size_t bytes) { |
| 342 | + util::check(bytes != 0, "Allocate data called with zero size"); |
| 343 | + data_.ensure_bytes(bytes); |
| 344 | + return data_.cursor(); |
| 345 | +} |
| 346 | + |
| 347 | +void Column::advance_data(std::size_t size) { |
| 348 | + data_.advance(position_t(size)); |
| 349 | +} |
| 350 | + |
| 351 | +void Column::advance_shapes(std::size_t size) { |
| 352 | + shapes_.advance(position_t(size)); |
| 353 | +} |
| 354 | + |
| 355 | +[[nodiscard]] ChunkedBuffer& Column::buffer() { |
| 356 | + return data_.buffer(); |
| 357 | +} |
| 358 | + |
| 359 | +uint8_t* Column::bytes_at(size_t bytes, size_t required) { |
| 360 | + ARCTICDB_TRACE(log::inmem(), "Column returning {} bytes at position {}", required, bytes); |
| 361 | + return data_.bytes_at(bytes, required); |
| 362 | +} |
| 363 | + |
| 364 | +const uint8_t* Column::bytes_at(size_t bytes, size_t required) const { |
| 365 | + return data_.bytes_at(bytes, required); |
| 366 | +} |
| 367 | + |
| 368 | +void Column::assert_size(size_t bytes) const { |
| 369 | + data_.buffer().assert_size(bytes); |
| 370 | +} |
| 371 | + |
| 372 | +void Column::init_buffer() { |
| 373 | + std::call_once(*init_buffer_, [this] () { |
| 374 | + extra_buffers_ = std::make_unique<ExtraBufferContainer>(); |
| 375 | + }); |
| 376 | +} |
| 377 | + |
| 378 | +ChunkedBuffer& Column::create_extra_buffer(size_t offset, ExtraBufferType type, size_t size, AllocationType allocation_type) { |
| 379 | + init_buffer(); |
| 380 | + return extra_buffers_->create_buffer(offset, type, size, allocation_type); |
| 381 | +} |
| 382 | + |
| 383 | +ChunkedBuffer& Column::get_extra_buffer(size_t offset, ExtraBufferType type) const { |
| 384 | + util::check(static_cast<bool>(extra_buffers_), "Extra buffer {} requested but pointer is null", offset); |
| 385 | + return extra_buffers_->get_buffer(offset, type); |
| 386 | +} |
| 387 | + |
| 388 | +void Column::set_extra_buffer(size_t offset, ExtraBufferType type, ChunkedBuffer&& buffer) { |
| 389 | + init_buffer(); |
| 390 | + extra_buffers_->set_buffer(offset, type, std::move(buffer)); |
| 391 | +} |
| 392 | + |
| 393 | +bool Column::has_extra_buffer(size_t offset, ExtraBufferType type) const { |
| 394 | + if(!extra_buffers_) |
| 395 | + return false; |
| 396 | + |
| 397 | + return extra_buffers_->has_buffer(offset, type); |
| 398 | +} |
| 399 | + |
116 | 400 | // Column public methods
|
117 | 401 | Column Column::clone() const {
|
118 | 402 | Column output;
|
|
0 commit comments