Skip to content

Commit 1b5871f

Browse files
committed
Add feature flag for direct Avro decoder
- Add ReaderProperties::kAvroUseDirectDecoder config flag (default: true) - Direct decoder is now the default for better performance - Legacy GenericDatum decoder returns NotImplemented error - Users can disable direct decoder by setting avro.use-direct-decoder=false (will get deprecation error message) This addresses feedback from wgtmac to add a feature flag as a safety measure in case of bugs in the new direct decoder implementation.
1 parent e54929d commit 1b5871f

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ class AvroReader::Impl {
8282
}
8383

8484
batch_size_ = options.properties->Get(ReaderProperties::kBatchSize);
85+
use_direct_decoder_ =
86+
options.properties->Get(ReaderProperties::kAvroUseDirectDecoder);
87+
if (!use_direct_decoder_) {
88+
return NotImplemented(
89+
"GenericDatum-based Avro decoder is deprecated. "
90+
"Please use the direct decoder (avro.use-direct-decoder=true)");
91+
}
8592
read_schema_ = options.projection;
8693

8794
// Open the input stream and adapt to the avro interface.
@@ -146,7 +153,7 @@ class AvroReader::Impl {
146153
}
147154
reader_->decr();
148155

149-
// Use direct decoder instead of GenericDatum
156+
// Use direct decoder for better performance (default path)
150157
ICEBERG_RETURN_UNEXPECTED(
151158
DecodeAvroToBuilder(reader_->readerSchema().root(), reader_->decoder(),
152159
projection_, *read_schema_, context_->builder_.get()));
@@ -244,6 +251,8 @@ class AvroReader::Impl {
244251
private:
245252
// Max number of rows in the record batch to read.
246253
int64_t batch_size_{};
254+
// Whether to use direct decoder (true) or GenericDatum-based decoder (false).
255+
bool use_direct_decoder_{true};
247256
// The end of the split to read and used to terminate the reading.
248257
std::optional<int64_t> split_end_;
249258
// The schema to read.

src/iceberg/file_reader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ class ReaderProperties : public ConfigBase<ReaderProperties> {
7676
/// \brief The batch size to read.
7777
inline static Entry<int64_t> kBatchSize{"read.batch-size", 4096};
7878

79+
/// \brief Use direct Avro decoder (true) or GenericDatum-based decoder (false).
80+
/// Default: true (use direct decoder for better performance).
81+
inline static Entry<bool> kAvroUseDirectDecoder{"avro.use-direct-decoder", true};
82+
7983
/// \brief Create a default ReaderProperties instance.
8084
static std::unique_ptr<ReaderProperties> default_properties();
8185

0 commit comments

Comments
 (0)