Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ if(ICEBERG_BUILD_BUNDLE)
avro/avro_data_util.cc
avro/avro_reader.cc
avro/avro_schema_util.cc
avro/avro_register.cc
avro/avro_stream_internal.cc)

# Libraries to link with exported libiceberg_bundle.{so,a}.
Expand Down
36 changes: 36 additions & 0 deletions src/iceberg/avro/avro_register.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "avro_register.h"

#include "iceberg/avro/avro_schema_util_internal.h"

namespace iceberg::avro {

void RegisterLogicalTypes() {
static std::once_flag flag{};
std::call_once(flag, []() {
// Register the map logical type with the avro custom logical type registry.
// See https://github.com/apache/avro/pull/3326 for details.
::avro::CustomLogicalTypeRegistry::instance().registerType(
"map", [](const std::string&) { return std::make_shared<MapLogicalType>(); });
});
}

} // namespace iceberg::avro
28 changes: 28 additions & 0 deletions src/iceberg/avro/avro_register.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include "iceberg/iceberg_bundle_export.h"

namespace iceberg::avro {

ICEBERG_BUNDLE_EXPORT void RegisterLogicalTypes();

} // namespace iceberg::avro
14 changes: 2 additions & 12 deletions src/iceberg/avro/avro_schema_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
#include <string_view>

#include <arrow/type.h>
#include <arrow/util/decimal.h>
#include <avro/CustomAttributes.hh>
#include <avro/LogicalType.hh>
#include <avro/NodeImpl.hh>
#include <avro/Schema.hh>
#include <avro/Types.hh>
#include <avro/ValidSchema.hh>

#include "iceberg/avro/avro_register.h"
#include "iceberg/avro/avro_schema_util_internal.h"
#include "iceberg/metadata_columns.h"
#include "iceberg/schema.h"
Expand All @@ -49,18 +49,8 @@ constexpr std::string_view kValueIdProp = "value-id";
constexpr std::string_view kElementIdProp = "element-id";
constexpr std::string_view kAdjustToUtcProp = "adjust-to-utc";

struct MapLogicalType : public ::avro::CustomLogicalType {
MapLogicalType() : ::avro::CustomLogicalType("map") {}
};

::avro::LogicalType GetMapLogicalType() {
static std::once_flag flag{};
std::call_once(flag, []() {
// Register the map logical type with the avro custom logical type registry.
// See https://github.com/apache/avro/pull/3326 for details.
::avro::CustomLogicalTypeRegistry::instance().registerType(
"map", [](const std::string&) { return std::make_shared<MapLogicalType>(); });
});
RegisterLogicalTypes();
return ::avro::LogicalType(std::make_shared<MapLogicalType>());
}

Expand Down
4 changes: 4 additions & 0 deletions src/iceberg/avro/avro_schema_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class ValidSchema;

namespace iceberg::avro {

struct MapLogicalType : public ::avro::CustomLogicalType {
MapLogicalType() : ::avro::CustomLogicalType("map") {}
};

/// \brief A visitor that converts an Iceberg type to an Avro node.
class ToAvroNodeVisitor {
public:
Expand Down
12 changes: 7 additions & 5 deletions src/iceberg/file_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/util/string_utils.h"

namespace iceberg {

Expand Down Expand Up @@ -54,12 +55,13 @@ ICEBERG_EXPORT inline std::string_view ToString(FileFormatType format_type) {
}

/// \brief Convert a string to a FileFormatType
ICEBERG_EXPORT constexpr Result<FileFormatType> FileFormatTypeFromString(
ICEBERG_EXPORT inline Result<FileFormatType> FileFormatTypeFromString(
std::string_view str) noexcept {
if (str == "parquet") return FileFormatType::kParquet;
if (str == "avro") return FileFormatType::kAvro;
if (str == "orc") return FileFormatType::kOrc;
if (str == "puffin") return FileFormatType::kPuffin;
auto lower = StringUtils::ToLower(str);
if (lower == "parquet") return FileFormatType::kParquet;
if (lower == "avro") return FileFormatType::kAvro;
if (lower == "orc") return FileFormatType::kOrc;
if (lower == "puffin") return FileFormatType::kPuffin;
return InvalidArgument("Invalid file format type: {}", str);
}

Expand Down
4 changes: 2 additions & 2 deletions src/iceberg/manifest_entry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ std::shared_ptr<StructType> DataFile::Type(std::shared_ptr<StructType> partition
kContent,
kFilePath,
kFileFormat,
SchemaField::MakeRequired(102, "partition", std::move(partition_type)),
SchemaField::MakeRequired(102, kPartitionField, std::move(partition_type)),
kRecordCount,
kFileSize,
kColumnSizes,
Expand Down Expand Up @@ -68,7 +68,7 @@ std::shared_ptr<StructType> ManifestEntry::TypeFromDataFileType(
std::shared_ptr<StructType> datafile_type) {
return std::make_shared<StructType>(std::vector<SchemaField>{
kStatus, kSnapshotId, kSequenceNumber, kFileSequenceNumber,
SchemaField::MakeRequired(2, "data_file", std::move(datafile_type))});
SchemaField::MakeRequired(2, kDataFileField, std::move(datafile_type))});
}

} // namespace iceberg
4 changes: 3 additions & 1 deletion src/iceberg/manifest_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ struct ICEBERG_EXPORT DataFile {
inline static const SchemaField kFilePath = SchemaField::MakeRequired(
100, "file_path", iceberg::string(), "Location URI with FS scheme");
inline static const SchemaField kFileFormat = SchemaField::MakeRequired(
101, "file_format", iceberg::int32(), "File format name: avro, orc, or parquet");
101, "file_format", iceberg::string(), "File format name: avro, orc, or parquet");
inline static const std::string kPartitionField = "partition";
inline static const SchemaField kRecordCount = SchemaField::MakeRequired(
103, "record_count", iceberg::int64(), "Number of records in the file");
inline static const SchemaField kFileSize = SchemaField::MakeRequired(
Expand Down Expand Up @@ -299,6 +300,7 @@ struct ICEBERG_EXPORT ManifestEntry {
SchemaField::MakeOptional(3, "sequence_number", iceberg::int64());
inline static const SchemaField kFileSequenceNumber =
SchemaField::MakeOptional(4, "file_sequence_number", iceberg::int64());
inline static const std::string kDataFileField = "data_file";

bool operator==(const ManifestEntry& other) const;

Expand Down
Loading
Loading