@@ -41,6 +41,28 @@ inline bool shouldOmitNullStream(
4141 }
4242 return isFirstChunk || streamData.empty ();
4343}
44+
45+ template <typename T>
46+ inline void compactStringBuffer (
47+ velox::memory::MemoryPool* pool,
48+ StringBuffer& stringBuffer,
49+ Vector<T>& mutableData,
50+ size_t dataElementOffset,
51+ uint64_t extraMemory) {
52+ StringBuffer newStringBuffer (*pool);
53+ auto & newBuffer = newStringBuffer.mutableBuffer ();
54+ newBuffer.reserve (extraMemory);
55+
56+ auto & newLengths = newStringBuffer.mutableLengths ();
57+ newLengths.reserve (mutableData.size () - dataElementOffset);
58+
59+ for (auto i = dataElementOffset; i < mutableData.size (); ++i) {
60+ auto str = mutableData[i];
61+ newBuffer.insert (newBuffer.end (), str.begin (), str.end ());
62+ newLengths.push_back (str.size ());
63+ }
64+ stringBuffer = std::move (newStringBuffer);
65+ }
4466} // namespace detail
4567
4668/* *
@@ -106,6 +128,9 @@ class ContentStreamChunker final : public StreamChunker {
106128 maxChunkSize_,
107129 sizeof (T),
108130 " MaxChunkSize must be at least the size of a single data element." );
131+ if constexpr (std::is_same_v<T, std::string_view>) {
132+ streamData_->materializeStringBuffer ();
133+ }
109134 }
110135
111136 std::optional<StreamDataView> next () override {
@@ -194,27 +219,36 @@ class ContentStreamChunker final : public StreamChunker {
194219
195220 // Move and clear existing buffer
196221 auto tempData = std::move (currentData);
222+ auto tempBuffer = std::move (streamData_->mutableStringBuffer ());
197223 streamData_->reset ();
198224 NIMBLE_DCHECK (
199225 streamData_->empty (), " StreamData should be empty after reset" );
200226
201227 auto & mutableData = streamData_->mutableData ();
202- mutableData.reserve (remainingDataCount);
203- NIMBLE_DCHECK_GE (
204- mutableData.capacity (),
205- remainingDataCount,
206- " Data buffer capacity should be at least new capacity" );
207-
208- mutableData.resize (remainingDataCount);
209- NIMBLE_DCHECK_EQ (
210- mutableData.size (),
211- remainingDataCount,
212- " Data buffer size should be equal to remaining data count" );
213-
214- std::copy_n (
215- tempData.begin () + dataElementOffset_,
216- remainingDataCount,
217- mutableData.begin ());
228+
229+ bool compactedStringBuffer{false };
230+ if constexpr (std::is_same_v<T, std::string_view>) {
231+ if (!tempBuffer.empty ()) {
232+ detail::compactStringBuffer<T>(
233+ mutableData.memoryPool (),
234+ streamData_->mutableStringBuffer (),
235+ tempData,
236+ dataElementOffset_,
237+ extraMemory_);
238+ compactedStringBuffer = true ;
239+ }
240+ }
241+ if (!compactedStringBuffer) {
242+ mutableData.resize (remainingDataCount);
243+ NIMBLE_DCHECK_EQ (
244+ mutableData.size (),
245+ remainingDataCount,
246+ " Data buffer size should be equal to remaining data count" );
247+ std::copy_n (
248+ tempData.begin () + dataElementOffset_,
249+ remainingDataCount,
250+ mutableData.begin ());
251+ }
218252 dataElementOffset_ = 0 ;
219253 streamData_->extraMemory () = extraMemory_;
220254
@@ -314,16 +348,10 @@ class NullsStreamChunker final : public StreamChunker {
314348 NIMBLE_CHECK (
315349 streamData_->empty (), " StreamData should be empty after reset" );
316350
317- auto & mutableNonNulls = streamData_->mutableNonNulls ();
318- mutableNonNulls.reserve (remainingNonNullsCount);
319- NIMBLE_DCHECK_GE (
320- mutableNonNulls.capacity (),
321- remainingNonNullsCount,
322- " NonNulls buffer capacity should be at least new capacity" );
323-
324351 streamData_->ensureAdditionalNullsCapacity (
325352 hasNulls, static_cast <uint32_t >(remainingNonNullsCount));
326353 if (hasNulls) {
354+ auto & mutableNonNulls = streamData_->mutableNonNulls ();
327355 mutableNonNulls.resize (remainingNonNullsCount);
328356 NIMBLE_CHECK_EQ (
329357 mutableNonNulls.size (),
@@ -372,6 +400,9 @@ class NullableContentStreamChunker final : public StreamChunker {
372400 " MaxChunkSize must be at least the size of a single element." );
373401
374402 streamData.materialize ();
403+ if constexpr (std::is_same_v<T, std::string_view>) {
404+ streamData_->materializeStringBuffer ();
405+ }
375406 }
376407
377408 std::optional<StreamDataView> next () override {
@@ -461,39 +492,41 @@ class NullableContentStreamChunker final : public StreamChunker {
461492 // Move and clear existing buffers
462493 auto tempNonNulls = std::move (currentNonNulls);
463494 auto tempData = std::move (currentData);
495+ auto tempBuffer = std::move (streamData_->mutableStringBuffer ());
464496 const bool hasNulls = streamData_->hasNulls ();
465497 streamData_->reset ();
466498 NIMBLE_CHECK (
467499 streamData_->empty (), " StreamData should be empty after reset" );
468500
469501 auto & mutableData = streamData_->mutableData ();
470- mutableData. reserve (remainingDataCount) ;
471- NIMBLE_DCHECK_GE (
472- mutableData. capacity (),
473- remainingDataCount,
474- " Data buffer capacity should be at least new capacity " );
475-
476- mutableData. resize (remainingDataCount);
477- NIMBLE_CHECK_EQ (
478- mutableData. size (),
479- remainingDataCount,
480- " Data buffer size should be equal to remaining data count " );
481-
482- std::copy_n (
483- tempData. begin () + dataElementOffset_,
484- remainingDataCount,
485- mutableData.begin ());
486-
487- auto & mutableNonNulls = streamData_-> mutableNonNulls ( );
488- mutableNonNulls. reserve (remainingNonNullsCount);
489- NIMBLE_DCHECK_GE (
490- mutableNonNulls. capacity () ,
491- remainingNonNullsCount,
492- " NonNulls buffer capacity should be at least new capacity " );
502+ bool compactedStringBuffer{ false } ;
503+ if constexpr (std::is_same_v<T, std::string_view>) {
504+ if (!tempBuffer. empty ()) {
505+ detail::compactStringBuffer<T>(
506+ mutableData. memoryPool (),
507+ streamData_-> mutableStringBuffer (),
508+ tempData,
509+ dataElementOffset_,
510+ extraMemory_);
511+ compactedStringBuffer = true ;
512+ }
513+ }
514+ if (!compactedStringBuffer) {
515+ mutableData. resize (remainingDataCount);
516+ NIMBLE_DCHECK_EQ (
517+ mutableData.size (),
518+ remainingDataCount,
519+ " Data buffer size should be equal to remaining data count " );
520+ std::copy_n (
521+ tempData. begin () + dataElementOffset_,
522+ remainingDataCount ,
523+ mutableData. begin ());
524+ }
493525
494526 streamData_->ensureAdditionalNullsCapacity (
495527 hasNulls, static_cast <uint32_t >(remainingNonNullsCount));
496528 if (hasNulls) {
529+ auto & mutableNonNulls = streamData_->mutableNonNulls ();
497530 mutableNonNulls.resize (remainingNonNullsCount);
498531 NIMBLE_CHECK_EQ (
499532 mutableNonNulls.size (),
0 commit comments