@@ -338,6 +338,7 @@ class SimpleFieldWriter : public FieldWriter {
338338 nullCount = size - nonNullCount;
339339 }
340340
341+ // NOTE: This logic is wrong. Will be removed with new stats changes.
341342 columnStats_.logicalSize += nullCount +
342343 ((K == velox::TypeKind::VARCHAR || K == velox::TypeKind::VARBINARY)
343344 ? valuesStream_.extraMemory ()
@@ -355,6 +356,72 @@ class SimpleFieldWriter : public FieldWriter {
355356 ColumnStats& columnStats_;
356357};
357358
359+ template <velox::TypeKind K>
360+ class StringFieldWriter : public FieldWriter {
361+ public:
362+ explicit StringFieldWriter (FieldWriterContext& context)
363+ : FieldWriter(
364+ context,
365+ context.schemaBuilder().createScalarTypeBuilder(
366+ NimbleTypeTraits<K>::scalarKind)),
367+ valuesStream_{context.createNullableContentStringStreamData (
368+ typeBuilder_->asScalar ().scalarDescriptor ())},
369+ columnStats_{context.columnStats (valuesStream_.descriptor ().offset ())} {
370+ }
371+
372+ void write (
373+ const velox::VectorPtr& vector,
374+ const OrderedRanges& ranges,
375+ folly::Executor*) override {
376+ // Ensure string buffer capacity.
377+ auto size = ranges.size ();
378+ const uint64_t totalBytes = getRawSizeFromVector (vector, ranges);
379+ valuesStream_.ensureStringBufferCapacity (size, totalBytes);
380+
381+ // Append to string buffer.
382+ uint64_t memoryUsed = 0 ;
383+ auto stringBuffer = valuesStream_.mutableData ();
384+ auto appendToStringBuffer = [&](velox::StringView sv) {
385+ memoryUsed += sv.size ();
386+ auto & buffer = stringBuffer.buffer ;
387+ buffer.insert (buffer.end (), sv.begin (), sv.end ());
388+ auto & mutableLengths = stringBuffer.lengths ;
389+ mutableLengths.push_back (sv.size ());
390+ };
391+
392+ uint64_t nonNullCount = 0 ;
393+ if (auto flat = vector->asFlatVector <velox::StringView>()) {
394+ valuesStream_.ensureAdditionalNullsCapacity (flat->mayHaveNulls (), size);
395+ nonNullCount = iterateNonNullValues (
396+ ranges,
397+ valuesStream_.mutableNonNulls (),
398+ Flat<velox::StringView>{vector},
399+ appendToStringBuffer);
400+ } else {
401+ auto decodingContext = context_.decodingContext ();
402+ auto & decoded = decodingContext.decode (vector, ranges);
403+ valuesStream_.ensureAdditionalNullsCapacity (decoded.mayHaveNulls (), size);
404+ nonNullCount = iterateNonNullValues (
405+ ranges,
406+ valuesStream_.mutableNonNulls (),
407+ Decoded<velox::StringView>{decoded},
408+ appendToStringBuffer);
409+ }
410+ uint64_t nullCount = size - nonNullCount;
411+ columnStats_.logicalSize += nullCount + memoryUsed;
412+ columnStats_.nullCount += nullCount;
413+ columnStats_.valueCount += size;
414+ }
415+
416+ void reset () override {
417+ valuesStream_.reset ();
418+ }
419+
420+ private:
421+ NullableContentStringStreamData& valuesStream_;
422+ ColumnStats& columnStats_;
423+ };
424+
358425class TimestampFieldWriter : public FieldWriter {
359426 public:
360427 explicit TimestampFieldWriter (FieldWriterContext& context)
@@ -1964,15 +2031,25 @@ std::unique_ptr<FieldWriter> FieldWriter::create(
19642031 break ;
19652032 }
19662033 case velox::TypeKind::VARCHAR: {
1967- field = std::make_unique<
1968- SimpleFieldWriter<velox::TypeKind::VARCHAR, StringConverter>>(
1969- context);
2034+ if (context.disableSharedStringBuffers ()) {
2035+ field = std::make_unique<StringFieldWriter<velox::TypeKind::VARCHAR>>(
2036+ context);
2037+ } else {
2038+ field = std::make_unique<
2039+ SimpleFieldWriter<velox::TypeKind::VARCHAR, StringConverter>>(
2040+ context);
2041+ }
19702042 break ;
19712043 }
19722044 case velox::TypeKind::VARBINARY: {
1973- field = std::make_unique<
1974- SimpleFieldWriter<velox::TypeKind::VARBINARY, StringConverter>>(
1975- context);
2045+ if (context.disableSharedStringBuffers ()) {
2046+ field = std::make_unique<StringFieldWriter<velox::TypeKind::VARBINARY>>(
2047+ context);
2048+ } else {
2049+ field = std::make_unique<
2050+ SimpleFieldWriter<velox::TypeKind::VARBINARY, StringConverter>>(
2051+ context);
2052+ }
19762053 break ;
19772054 }
19782055 case velox::TypeKind::TIMESTAMP: {
0 commit comments