@@ -338,6 +338,7 @@ class SimpleFieldWriter : public FieldWriter {
338338 nullCount = size - nonNullCount;
339339 }
340340
341+ // NOTE: This logic is wrong. Will be removed with new stats changes.
341342 columnStats_.logicalSize += nullCount +
342343 ((K == velox::TypeKind::VARCHAR || K == velox::TypeKind::VARBINARY)
343344 ? valuesStream_.extraMemory ()
@@ -355,6 +356,76 @@ class SimpleFieldWriter : public FieldWriter {
355356 ColumnStats& columnStats_;
356357};
357358
359+ template <velox::TypeKind K>
360+ class StringFieldWriter : public FieldWriter {
361+ using SourceType = typename velox::TypeTraits<K>::NativeType;
362+
363+ public:
364+ explicit StringFieldWriter (FieldWriterContext& context)
365+ : FieldWriter(
366+ context,
367+ context.schemaBuilder().createScalarTypeBuilder(
368+ NimbleTypeTraits<K>::scalarKind)),
369+ valuesStream_{context.createNullableContentStringStreamData (
370+ typeBuilder_->asScalar ().scalarDescriptor ())},
371+ columnStats_{context.columnStats (valuesStream_.descriptor ().offset ())} {
372+ }
373+
374+ void write (
375+ const velox::VectorPtr& vector,
376+ const OrderedRanges& ranges,
377+ folly::Executor*) override {
378+ // Ensure string buffer capacity.
379+ auto size = ranges.size ();
380+ uint64_t totalBytes = getRawSizeFromVector (vector, ranges);
381+ valuesStream_.ensureStringBufferCapacity (size, totalBytes);
382+
383+ // Append to string buffer.
384+ uint64_t memoryUsed = 0 ;
385+ auto appendToStringBuffer = [&](SourceType sv) {
386+ memoryUsed += sv.size ();
387+ auto stringBuffer = valuesStream_.mutableData ();
388+ auto & buffer = stringBuffer.buffer ;
389+ buffer.insert (buffer.end (), sv.begin (), sv.end ());
390+ auto & mutableLengths = stringBuffer.lengths ;
391+ mutableLengths.push_back (sv.size ());
392+ };
393+
394+ uint64_t nonNullCount = 0 ;
395+ if (auto flat = vector->asFlatVector <SourceType>()) {
396+ valuesStream_.ensureAdditionalNullsCapacity (flat->mayHaveNulls (), size);
397+ nonNullCount = iterateNonNullValues (
398+ ranges,
399+ valuesStream_.mutableNonNulls (),
400+ Flat<SourceType>{vector},
401+ appendToStringBuffer);
402+ } else {
403+ auto decodingContext = context_.decodingContext ();
404+ auto & decoded = decodingContext.decode (vector, ranges);
405+ valuesStream_.ensureAdditionalNullsCapacity (decoded.mayHaveNulls (), size);
406+ nonNullCount = iterateNonNullValues (
407+ ranges,
408+ valuesStream_.mutableNonNulls (),
409+ Decoded<SourceType>{decoded},
410+ appendToStringBuffer);
411+ }
412+ uint64_t nullCount = size - nonNullCount;
413+
414+ // TODO: Validate that this logic is correct.
415+ columnStats_.logicalSize += nullCount + memoryUsed;
416+ columnStats_.nullCount += nullCount;
417+ columnStats_.valueCount += size;
418+ }
419+
420+ void reset () override {
421+ valuesStream_.reset ();
422+ }
423+
424+ private:
425+ NullableContentStringStreamData& valuesStream_;
426+ ColumnStats& columnStats_;
427+ };
428+
358429class TimestampFieldWriter : public FieldWriter {
359430 public:
360431 explicit TimestampFieldWriter (FieldWriterContext& context)
@@ -1964,15 +2035,25 @@ std::unique_ptr<FieldWriter> FieldWriter::create(
19642035 break ;
19652036 }
19662037 case velox::TypeKind::VARCHAR: {
1967- field = std::make_unique<
1968- SimpleFieldWriter<velox::TypeKind::VARCHAR, StringConverter>>(
1969- context);
2038+ if (context.disableSharedStringBuffers ()) {
2039+ field = std::make_unique<StringFieldWriter<velox::TypeKind::VARCHAR>>(
2040+ context);
2041+ } else {
2042+ field = std::make_unique<
2043+ SimpleFieldWriter<velox::TypeKind::VARCHAR, StringConverter>>(
2044+ context);
2045+ }
19702046 break ;
19712047 }
19722048 case velox::TypeKind::VARBINARY: {
1973- field = std::make_unique<
1974- SimpleFieldWriter<velox::TypeKind::VARBINARY, StringConverter>>(
1975- context);
2049+ if (context.disableSharedStringBuffers ()) {
2050+ field = std::make_unique<StringFieldWriter<velox::TypeKind::VARBINARY>>(
2051+ context);
2052+ } else {
2053+ field = std::make_unique<
2054+ SimpleFieldWriter<velox::TypeKind::VARBINARY, StringConverter>>(
2055+ context);
2056+ }
19762057 break ;
19772058 }
19782059 case velox::TypeKind::TIMESTAMP: {
0 commit comments