|
21 | 21 | namespace facebook::nimble { |
22 | 22 | namespace detail { |
23 | 23 | inline bool shouldOmitDataStream( |
24 | | - const StreamData& streamData, |
| 24 | + uint64_t dataVectorSize, |
25 | 25 | uint64_t minChunkSize, |
| 26 | + bool allNulls, |
26 | 27 | bool isFirstChunk) { |
27 | | - if (streamData.data().size() > minChunkSize) { |
| 28 | + if (dataVectorSize > minChunkSize) { |
28 | 29 | return false; |
29 | 30 | } |
30 | 31 | // When all values are null, the values stream is omitted. |
31 | | - return isFirstChunk || streamData.nonNulls().empty(); |
| 32 | + return isFirstChunk || allNulls; |
32 | 33 | } |
33 | 34 |
|
34 | 35 | inline bool shouldOmitNullStream( |
@@ -184,7 +185,7 @@ class ContentStreamChunker final : public StreamChunker { |
184 | 185 | } |
185 | 186 |
|
186 | 187 | void compact() override { |
187 | | - // No changes made to stream data, nothing to compact. |
| 188 | + // No chunks consumed from stream data, we should not compact. |
188 | 189 | if (dataElementOffset_ == 0) { |
189 | 190 | return; |
190 | 191 | } |
@@ -300,7 +301,7 @@ class NullsStreamChunker final : public StreamChunker { |
300 | 301 |
|
301 | 302 | private: |
302 | 303 | void compact() override { |
303 | | - // No changes made to stream data, nothing to compact. |
| 304 | + // No chunks consumed from stream data, we should not compact. |
304 | 305 | if (nonNullsOffset_ == 0) { |
305 | 306 | return; |
306 | 307 | } |
@@ -357,8 +358,9 @@ class NullableContentStreamChunker final : public StreamChunker { |
357 | 358 | minChunkSize_{options.minChunkSize}, |
358 | 359 | maxChunkSize_{options.maxChunkSize}, |
359 | 360 | omitStream_{detail::shouldOmitDataStream( |
360 | | - streamData, |
| 361 | + streamData.data().size(), |
361 | 362 | options.minChunkSize, |
| 363 | + streamData.nonNulls().empty(), |
362 | 364 | options.isFirstChunk)}, |
363 | 365 | ensureFullChunks_{options.ensureFullChunks}, |
364 | 366 | extraMemory_{streamData_->extraMemory()} { |
@@ -448,7 +450,7 @@ class NullableContentStreamChunker final : public StreamChunker { |
448 | 450 | } |
449 | 451 |
|
450 | 452 | void compact() override { |
451 | | - // No changes made to stream data, nothing to compact. |
| 453 | + // No chunks consumed from stream data, we should not compact. |
452 | 454 | if (nonNullsOffset_ == 0) { |
453 | 455 | return; |
454 | 456 | } |
@@ -573,4 +575,201 @@ NullableContentStreamChunker<std::string_view>::nextChunkSize() { |
573 | 575 | } |
574 | 576 | return chunkSize; |
575 | 577 | } |
| 578 | + |
| 579 | +class NullableContentStringStreamChunker final : public StreamChunker { |
| 580 | + public: |
| 581 | + explicit NullableContentStringStreamChunker( |
| 582 | + NullableContentStringStreamData& streamData, |
| 583 | + const StreamChunkerOptions& options) |
| 584 | + : streamData_{&streamData}, |
| 585 | + minChunkSize_{options.minChunkSize}, |
| 586 | + maxChunkSize_{options.maxChunkSize}, |
| 587 | + omitStream_{detail::shouldOmitDataStream( |
| 588 | + streamData.bufferSize(), |
| 589 | + options.minChunkSize, |
| 590 | + streamData.nonNulls().empty(), |
| 591 | + options.isFirstChunk)}, |
| 592 | + ensureFullChunks_{options.ensureFullChunks} { |
| 593 | + static_assert(sizeof(bool) == 1); |
| 594 | + } |
| 595 | + |
| 596 | + std::optional<StreamDataView> next() override { |
| 597 | + if (omitStream_) { |
| 598 | + return std::nullopt; |
| 599 | + } |
| 600 | + const auto& chunkSize = nextChunkSize(); |
| 601 | + if (chunkSize.rollingChunkSize == 0) { |
| 602 | + return std::nullopt; |
| 603 | + } |
| 604 | + |
| 605 | + // Content |
| 606 | + auto& output = streamData_->mutableStringViews(); |
| 607 | + output.resize(0); |
| 608 | + output.reserve(chunkSize.dataElementCount); |
| 609 | + auto mutableData = streamData_->mutableData(); |
| 610 | + auto& mutableLengths = mutableData.lengths; |
| 611 | + auto& mutableBuffer = mutableData.buffer; |
| 612 | + auto currentBufferOffset = bufferOffset_; |
| 613 | + for (size_t i = 0; i < chunkSize.dataElementCount; ++i) { |
| 614 | + const auto currentLength = mutableLengths[lengthsOffset_ + i]; |
| 615 | + output.emplace_back( |
| 616 | + std::string_view{ |
| 617 | + mutableBuffer.data() + currentBufferOffset, currentLength}); |
| 618 | + currentBufferOffset += currentLength; |
| 619 | + } |
| 620 | + std::string_view dataChunk = { |
| 621 | + reinterpret_cast<const char*>(output.data()), |
| 622 | + chunkSize.dataElementCount * sizeof(std::string_view)}; |
| 623 | + |
| 624 | + // Nulls |
| 625 | + std::span<const bool> nonNullsChunk( |
| 626 | + streamData_->mutableNonNulls().data() + nonNullsOffset_, |
| 627 | + chunkSize.nullElementCount); |
| 628 | + |
| 629 | + lengthsOffset_ += chunkSize.dataElementCount; |
| 630 | + nonNullsOffset_ += chunkSize.nullElementCount; |
| 631 | + bufferOffset_ += chunkSize.extraMemory; |
| 632 | + |
| 633 | + if (chunkSize.nullElementCount > chunkSize.dataElementCount) { |
| 634 | + return StreamDataView{ |
| 635 | + streamData_->descriptor(), |
| 636 | + dataChunk, |
| 637 | + static_cast<uint32_t>(chunkSize.nullElementCount), |
| 638 | + nonNullsChunk}; |
| 639 | + } |
| 640 | + NIMBLE_DCHECK_EQ(chunkSize.dataElementCount, chunkSize.nullElementCount); |
| 641 | + return StreamDataView{ |
| 642 | + streamData_->descriptor(), |
| 643 | + dataChunk, |
| 644 | + static_cast<uint32_t>(chunkSize.dataElementCount)}; |
| 645 | + } |
| 646 | + |
| 647 | + private: |
| 648 | + ChunkSize nextChunkSize() { |
| 649 | + const auto& stringLengths = streamData_->mutableData().lengths; |
| 650 | + const auto& nonNulls = streamData_->mutableNonNulls(); |
| 651 | + ChunkSize chunkSize; |
| 652 | + bool fullChunk{false}; |
| 653 | + // Calculate how many entries we can fit in the chunk |
| 654 | + for (size_t idx = nonNullsOffset_; idx < nonNulls.size(); ++idx) { |
| 655 | + uint64_t currentTotalSize{sizeof(bool)}; |
| 656 | + uint32_t currentDataCount{0}; |
| 657 | + size_t currentExtraMemory{0}; |
| 658 | + if (nonNulls[idx]) { |
| 659 | + currentExtraMemory = |
| 660 | + stringLengths[lengthsOffset_ + chunkSize.dataElementCount]; |
| 661 | + currentTotalSize += currentExtraMemory + sizeof(uint64_t); |
| 662 | + ++currentDataCount; |
| 663 | + } |
| 664 | + |
| 665 | + if (chunkSize.rollingChunkSize == 0 && currentTotalSize > maxChunkSize_) { |
| 666 | + // Allow a single oversized string as its own chunk. |
| 667 | + fullChunk = true; |
| 668 | + chunkSize.extraMemory += currentExtraMemory; |
| 669 | + chunkSize.dataElementCount += currentDataCount; |
| 670 | + chunkSize.rollingChunkSize += currentTotalSize; |
| 671 | + ++chunkSize.nullElementCount; |
| 672 | + break; |
| 673 | + } |
| 674 | + |
| 675 | + if (chunkSize.rollingChunkSize + currentTotalSize > maxChunkSize_) { |
| 676 | + fullChunk = true; |
| 677 | + break; |
| 678 | + } |
| 679 | + |
| 680 | + chunkSize.extraMemory += currentExtraMemory; |
| 681 | + chunkSize.dataElementCount += currentDataCount; |
| 682 | + chunkSize.rollingChunkSize += currentTotalSize; |
| 683 | + ++chunkSize.nullElementCount; |
| 684 | + } |
| 685 | + |
| 686 | + fullChunk = fullChunk || (chunkSize.rollingChunkSize == maxChunkSize_); |
| 687 | + if ((ensureFullChunks_ && !fullChunk) || |
| 688 | + (chunkSize.rollingChunkSize < minChunkSize_)) { |
| 689 | + chunkSize = ChunkSize{}; |
| 690 | + } |
| 691 | + return chunkSize; |
| 692 | + } |
| 693 | + |
| 694 | + void compact() override { |
| 695 | + // Clear existing outputvector before beginning compaction. |
| 696 | + streamData_->mutableStringViews().clear(); |
| 697 | + |
| 698 | + // No chunks consumed from stream data, we should not compact. |
| 699 | + if (nonNullsOffset_ == 0) { |
| 700 | + return; |
| 701 | + } |
| 702 | + |
| 703 | + const bool hasNulls = streamData_->hasNulls(); |
| 704 | + // Move and clear existing buffers |
| 705 | + auto tempBuffer = std::move(streamData_->mutableData().buffer); |
| 706 | + auto tempLengths = std::move(streamData_->mutableData().lengths); |
| 707 | + auto tempNonNulls = std::move(streamData_->mutableNonNulls()); |
| 708 | + streamData_->reset(); |
| 709 | + NIMBLE_DCHECK( |
| 710 | + streamData_->empty(), "StreamData should be empty after reset"); |
| 711 | + |
| 712 | + { |
| 713 | + const auto remainingDataCount = tempLengths.size() - lengthsOffset_; |
| 714 | + auto& mutableDataLength = streamData_->mutableData().lengths; |
| 715 | + mutableDataLength.resize(remainingDataCount); |
| 716 | + NIMBLE_DCHECK_EQ( |
| 717 | + mutableDataLength.size(), |
| 718 | + remainingDataCount, |
| 719 | + "Data length size should be equal to remaining data count"); |
| 720 | + |
| 721 | + std::copy_n( |
| 722 | + tempLengths.begin() + lengthsOffset_, |
| 723 | + remainingDataCount, |
| 724 | + mutableDataLength.begin()); |
| 725 | + } |
| 726 | + |
| 727 | + { |
| 728 | + const auto remainingDataBytes = tempBuffer.size() - bufferOffset_; |
| 729 | + auto& mutableDataBuffer = streamData_->mutableData().buffer; |
| 730 | + mutableDataBuffer.resize(remainingDataBytes); |
| 731 | + NIMBLE_DCHECK_EQ( |
| 732 | + mutableDataBuffer.size(), |
| 733 | + remainingDataBytes, |
| 734 | + "Data buffer size should be equal to remaining data bytes"); |
| 735 | + |
| 736 | + std::copy_n( |
| 737 | + tempBuffer.begin() + bufferOffset_, |
| 738 | + remainingDataBytes, |
| 739 | + mutableDataBuffer.begin()); |
| 740 | + } |
| 741 | + |
| 742 | + { |
| 743 | + auto& mutableNonNulls = streamData_->mutableNonNulls(); |
| 744 | + const auto remainingNonNullsCount = tempNonNulls.size() - nonNullsOffset_; |
| 745 | + streamData_->ensureAdditionalNullsCapacity( |
| 746 | + hasNulls, static_cast<uint32_t>(remainingNonNullsCount)); |
| 747 | + if (hasNulls) { |
| 748 | + mutableNonNulls.resize(remainingNonNullsCount); |
| 749 | + NIMBLE_DCHECK_EQ( |
| 750 | + mutableNonNulls.size(), |
| 751 | + remainingNonNullsCount, |
| 752 | + "NonNulls buffer size should be equal to remaining non-nulls count"); |
| 753 | + |
| 754 | + std::copy_n( |
| 755 | + tempNonNulls.begin() + nonNullsOffset_, |
| 756 | + remainingNonNullsCount, |
| 757 | + mutableNonNulls.begin()); |
| 758 | + } |
| 759 | + } |
| 760 | + lengthsOffset_ = 0; |
| 761 | + nonNullsOffset_ = 0; |
| 762 | + bufferOffset_ = 0; |
| 763 | + } |
| 764 | + |
| 765 | + NullableContentStringStreamData* const streamData_; |
| 766 | + const uint64_t minChunkSize_; |
| 767 | + const uint64_t maxChunkSize_; |
| 768 | + const bool omitStream_; |
| 769 | + const bool ensureFullChunks_; |
| 770 | + |
| 771 | + size_t lengthsOffset_{0}; |
| 772 | + size_t nonNullsOffset_{0}; |
| 773 | + uint64_t bufferOffset_{0}; |
| 774 | +}; |
576 | 775 | } // namespace facebook::nimble |
0 commit comments