Skip to content

Commit 3b59ef2

Browse files
authored
refactor: trivial improvements and minor fixes (#308)
1 parent abf1c1d commit 3b59ef2

File tree

8 files changed

+69
-78
lines changed

8 files changed

+69
-78
lines changed

src/iceberg/catalog/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
iceberg_install_all_headers(iceberg/catalog)
19-
2018
add_subdirectory(memory)
2119

2220
if(ICEBERG_BUILD_REST)

src/iceberg/manifest_reader.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ namespace iceberg {
3131

3232
Result<std::unique_ptr<ManifestReader>> ManifestReader::Make(
3333
const ManifestFile& manifest, std::shared_ptr<FileIO> file_io,
34-
std::shared_ptr<StructType> partition_schema) {
34+
std::shared_ptr<StructType> partition_type) {
3535
auto manifest_entry_schema =
36-
ManifestEntry::TypeFromPartitionType(std::move(partition_schema));
36+
ManifestEntry::TypeFromPartitionType(std::move(partition_type));
3737
std::shared_ptr<Schema> schema =
3838
FromStructType(std::move(*manifest_entry_schema), std::nullopt);
3939

@@ -53,9 +53,9 @@ Result<std::unique_ptr<ManifestReader>> ManifestReader::Make(
5353

5454
Result<std::unique_ptr<ManifestReader>> ManifestReader::Make(
5555
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
56-
std::shared_ptr<StructType> partition_schema) {
56+
std::shared_ptr<StructType> partition_type) {
5757
auto manifest_entry_schema =
58-
ManifestEntry::TypeFromPartitionType(std::move(partition_schema));
58+
ManifestEntry::TypeFromPartitionType(std::move(partition_type));
5959
auto fields_span = manifest_entry_schema->fields();
6060
std::vector<SchemaField> fields(fields_span.begin(), fields_span.end());
6161
auto schema = std::make_shared<Schema>(fields);

src/iceberg/manifest_reader.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,20 @@ class ICEBERG_EXPORT ManifestReader {
4141
/// \brief Creates a reader for a manifest file.
4242
/// \param manifest A ManifestFile object containing metadata about the manifest.
4343
/// \param file_io File IO implementation to use.
44-
/// \param partition_schema Schema for the partition.
44+
/// \param partition_type Schema for the partition.
4545
/// \return A Result containing the reader or an error.
4646
static Result<std::unique_ptr<ManifestReader>> Make(
4747
const ManifestFile& manifest, std::shared_ptr<FileIO> file_io,
48-
std::shared_ptr<StructType> partition_schema);
48+
std::shared_ptr<StructType> partition_type);
4949

5050
/// \brief Creates a reader for a manifest file.
5151
/// \param manifest_location Path to the manifest file.
5252
/// \param file_io File IO implementation to use.
53-
/// \param partition_schema Schema for the partition.
53+
/// \param partition_type Schema for the partition.
5454
/// \return A Result containing the reader or an error.
5555
static Result<std::unique_ptr<ManifestReader>> Make(
5656
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
57-
std::shared_ptr<StructType> partition_schema);
57+
std::shared_ptr<StructType> partition_type);
5858
};
5959

6060
/// \brief Read manifest files from a manifest list file.

src/iceberg/sort_order.cc

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ bool SortOrder::Equals(const SortOrder& other) const {
8888

8989
Status SortOrder::Validate(const Schema& schema) const {
9090
for (const auto& field : fields_) {
91-
auto schema_field = schema.FindFieldById(field.source_id());
92-
if (!schema_field.has_value() || schema_field.value() == std::nullopt) {
91+
ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema.FindFieldById(field.source_id()));
92+
if (!schema_field.has_value() || schema_field == std::nullopt) {
9393
return InvalidArgument("Cannot find source column for sort field: {}", field);
9494
}
9595

96-
const auto& source_type = schema_field.value().value().get().type();
96+
const auto& source_type = schema_field.value().get().type();
9797

9898
if (!field.transform()->CanTransform(*source_type)) {
9999
return InvalidArgument("Invalid source type {} for transform {}",
@@ -113,20 +113,9 @@ Result<std::unique_ptr<SortOrder>> SortOrder::Make(const Schema& schema, int32_t
113113
return InvalidArgument("Sort order must have at least one sort field");
114114
}
115115

116-
for (const auto& field : fields) {
117-
ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema.FindFieldById(field.source_id()));
118-
if (schema_field == std::nullopt) {
119-
return InvalidArgument("Cannot find source column for sort field: {}", field);
120-
}
121-
122-
const auto& source_type = schema_field.value().get().type();
123-
if (field.transform()->CanTransform(*source_type) == false) {
124-
return InvalidArgument("Invalid source type {} for transform {}",
125-
source_type->ToString(), field.transform()->ToString());
126-
}
127-
}
128-
129-
return std::make_unique<SortOrder>(sort_id, std::move(fields));
116+
auto sort_order = std::make_unique<SortOrder>(sort_id, std::move(fields));
117+
ICEBERG_RETURN_UNEXPECTED(sort_order->Validate(schema));
118+
return sort_order;
130119
}
131120

132121
Result<std::unique_ptr<SortOrder>> SortOrder::Make(int32_t sort_id,

src/iceberg/test/json_internal_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ TEST(JsonPartitionTest, PartitionFieldFromJsonMissingField) {
141141

142142
TEST(JsonPartitionTest, PartitionSpec) {
143143
auto schema = std::make_shared<Schema>(
144-
std::vector<SchemaField>{SchemaField(3, "region", iceberg::string(), false),
145-
SchemaField(5, "ts", iceberg::int64(), false)},
144+
std::vector<SchemaField>{SchemaField(3, "region", string(), false),
145+
SchemaField(5, "ts", int64(), false)},
146146
/*schema_id=*/100);
147147
auto identity_transform = Transform::Identity();
148148
PartitionSpec spec(1, {PartitionField(3, 101, "region", identity_transform),

src/iceberg/test/manifest_reader_writer_test.cc

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
namespace iceberg {
3939

40-
class ManifestReaderTestBase : public TempFileTestBase {
40+
class ManifestReaderWriterTestBase : public TempFileTestBase {
4141
protected:
4242
static void SetUpTestSuite() { avro::RegisterAll(); }
4343

@@ -88,7 +88,7 @@ class ManifestReaderTestBase : public TempFileTestBase {
8888
std::shared_ptr<FileIO> file_io_;
8989
};
9090

91-
class ManifestReaderV1Test : public ManifestReaderTestBase {
91+
class ManifestV1Test : public ManifestReaderWriterTestBase {
9292
protected:
9393
std::vector<ManifestEntry> PreparePartitionedTestData() {
9494
std::vector<ManifestEntry> manifest_entries;
@@ -167,19 +167,19 @@ class ManifestReaderV1Test : public ManifestReaderTestBase {
167167
}
168168
};
169169

170-
TEST_F(ManifestReaderV1Test, PartitionedTest) {
170+
TEST_F(ManifestV1Test, ReadPartitionedTest) {
171171
// TODO(xiao.dong) we need to add more cases for different partition types
172-
iceberg::SchemaField partition_field(1000, "order_ts_hour", iceberg::int32(), true);
172+
SchemaField partition_field(1000, "order_ts_hour", int32(), true);
173173
auto partition_schema =
174174
std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
175175
auto expected_entries = PreparePartitionedTestData();
176176
TestManifestReading("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro", expected_entries,
177177
partition_schema);
178178
}
179179

180-
TEST_F(ManifestReaderV1Test, WritePartitionedTest) {
181-
iceberg::SchemaField table_field(1, "order_ts_hour_source", iceberg::int32(), true);
182-
iceberg::SchemaField partition_field(1000, "order_ts_hour", iceberg::int32(), true);
180+
TEST_F(ManifestV1Test, WritePartitionedTest) {
181+
SchemaField table_field(1, "order_ts_hour_source", int32(), true);
182+
SchemaField partition_field(1000, "order_ts_hour", int32(), true);
183183
auto table_schema = std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
184184
auto partition_schema =
185185
std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
@@ -194,7 +194,7 @@ TEST_F(ManifestReaderV1Test, WritePartitionedTest) {
194194
TestManifestReadingByPath(write_manifest_path, expected_entries, partition_schema);
195195
}
196196

197-
class ManifestReaderV2Test : public ManifestReaderTestBase {
197+
class ManifestV2Test : public ManifestReaderWriterTestBase {
198198
protected:
199199
std::vector<ManifestEntry> CreateV2TestData(
200200
std::optional<int64_t> sequence_number = std::nullopt,
@@ -276,12 +276,12 @@ class ManifestReaderV2Test : public ManifestReaderTestBase {
276276
}
277277
};
278278

279-
TEST_F(ManifestReaderV2Test, NonPartitionedTest) {
279+
TEST_F(ManifestV2Test, ReadNonPartitionedTest) {
280280
auto expected_entries = PrepareNonPartitionedTestData();
281281
TestManifestReading("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro", expected_entries);
282282
}
283283

284-
TEST_F(ManifestReaderV2Test, MetadataInheritanceTest) {
284+
TEST_F(ManifestV2Test, ReadMetadataInheritanceTest) {
285285
std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro");
286286
ManifestFile manifest_file{
287287
.manifest_path = path,
@@ -295,9 +295,9 @@ TEST_F(ManifestReaderV2Test, MetadataInheritanceTest) {
295295
TestManifestReadingWithManifestFile(manifest_file, expected_entries);
296296
}
297297

298-
TEST_F(ManifestReaderV2Test, WriteNonPartitionedTest) {
299-
iceberg::SchemaField table_field(1, "order_ts_hour_source", int32(), true);
300-
iceberg::SchemaField partition_field(1000, "order_ts_hour", int32(), true);
298+
TEST_F(ManifestV2Test, WriteNonPartitionedTest) {
299+
SchemaField table_field(1, "order_ts_hour_source", int32(), true);
300+
SchemaField partition_field(1000, "order_ts_hour", int32(), true);
301301
auto table_schema = std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
302302
auto expected_entries = PrepareNonPartitionedTestData();
303303
auto write_manifest_path = CreateNewTempFilePath();
@@ -306,9 +306,9 @@ TEST_F(ManifestReaderV2Test, WriteNonPartitionedTest) {
306306
TestManifestReadingByPath(write_manifest_path, expected_entries);
307307
}
308308

309-
TEST_F(ManifestReaderV2Test, WriteInheritancePartitionedTest) {
310-
iceberg::SchemaField table_field(1, "order_ts_hour_source", int32(), true);
311-
iceberg::SchemaField partition_field(1000, "order_ts_hour", int32(), true);
309+
TEST_F(ManifestV2Test, WriteInheritancePartitionedTest) {
310+
SchemaField table_field(1, "order_ts_hour_source", int32(), true);
311+
SchemaField partition_field(1000, "order_ts_hour", int32(), true);
312312
auto table_schema = std::make_shared<Schema>(std::vector<SchemaField>({table_field}));
313313
auto expected_entries = PrepareMetadataInheritanceTestData();
314314
auto write_manifest_path = CreateNewTempFilePath();

src/iceberg/test/partition_spec_test.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,13 @@ TEST(PartitionSpecTest, PartitionSchemaTest) {
9494
PartitionField pt_field2(7, 1001, "hour", identity_transform);
9595
PartitionSpec spec(100, {pt_field1, pt_field2});
9696

97-
auto partition_schema = spec.PartitionType(schema);
98-
ASSERT_TRUE(partition_schema.has_value());
99-
ASSERT_EQ(2, partition_schema.value()->fields().size());
100-
EXPECT_EQ(pt_field1.name(), partition_schema.value()->fields()[0].name());
101-
EXPECT_EQ(pt_field1.field_id(), partition_schema.value()->fields()[0].field_id());
102-
EXPECT_EQ(pt_field2.name(), partition_schema.value()->fields()[1].name());
103-
EXPECT_EQ(pt_field2.field_id(), partition_schema.value()->fields()[1].field_id());
97+
auto partition_type = spec.PartitionType(schema);
98+
ASSERT_TRUE(partition_type.has_value());
99+
ASSERT_EQ(2, partition_type.value()->fields().size());
100+
EXPECT_EQ(pt_field1.name(), partition_type.value()->fields()[0].name());
101+
EXPECT_EQ(pt_field1.field_id(), partition_type.value()->fields()[0].field_id());
102+
EXPECT_EQ(pt_field2.name(), partition_type.value()->fields()[1].name());
103+
EXPECT_EQ(pt_field2.field_id(), partition_type.value()->fields()[1].field_id());
104104
}
105105

106106
TEST(PartitionSpecTest, PartitionTypeTest) {
@@ -138,18 +138,18 @@ TEST(PartitionSpecTest, PartitionTypeTest) {
138138
auto parsed_spec_result = PartitionSpecFromJson(schema, json);
139139
ASSERT_TRUE(parsed_spec_result.has_value()) << parsed_spec_result.error().message;
140140

141-
auto partition_schema = parsed_spec_result.value()->PartitionType(*schema);
141+
auto partition_type = parsed_spec_result.value()->PartitionType(*schema);
142142

143143
SchemaField pt_field1(1000, "ts_day", date(), true);
144144
SchemaField pt_field2(1001, "id_bucket", int32(), true);
145145
SchemaField pt_field3(1002, "id_truncate", string(), true);
146146

147-
ASSERT_TRUE(partition_schema.has_value());
148-
ASSERT_EQ(3, partition_schema.value()->fields().size());
147+
ASSERT_TRUE(partition_type.has_value());
148+
ASSERT_EQ(3, partition_type.value()->fields().size());
149149

150-
EXPECT_EQ(pt_field1, partition_schema.value()->fields()[0]);
151-
EXPECT_EQ(pt_field2, partition_schema.value()->fields()[1]);
152-
EXPECT_EQ(pt_field3, partition_schema.value()->fields()[2]);
150+
EXPECT_EQ(pt_field1, partition_type.value()->fields()[0]);
151+
EXPECT_EQ(pt_field2, partition_type.value()->fields()[1]);
152+
EXPECT_EQ(pt_field3, partition_type.value()->fields()[2]);
153153
}
154154

155155
} // namespace iceberg

src/iceberg/test/schema_field_test.cc

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,45 +27,47 @@
2727
#include "iceberg/type.h"
2828
#include "iceberg/util/formatter.h" // IWYU pragma: keep
2929

30+
namespace iceberg {
31+
3032
TEST(SchemaFieldTest, Basics) {
3133
{
32-
iceberg::SchemaField field(1, "foo", iceberg::int32(), false);
34+
SchemaField field(1, "foo", int32(), false);
3335
EXPECT_EQ(1, field.field_id());
3436
EXPECT_EQ("foo", field.name());
35-
EXPECT_EQ(iceberg::TypeId::kInt, field.type()->type_id());
37+
EXPECT_EQ(TypeId::kInt, field.type()->type_id());
3638
EXPECT_FALSE(field.optional());
3739
EXPECT_EQ("foo (1): int (required)", field.ToString());
3840
EXPECT_EQ("foo (1): int (required)", std::format("{}", field));
3941
}
4042
{
41-
iceberg::SchemaField field = iceberg::SchemaField::MakeOptional(
42-
2, "foo bar", std::make_shared<iceberg::FixedType>(10));
43+
SchemaField field =
44+
SchemaField::MakeOptional(2, "foo bar", std::make_shared<FixedType>(10));
4345
EXPECT_EQ(2, field.field_id());
4446
EXPECT_EQ("foo bar", field.name());
45-
EXPECT_EQ(iceberg::FixedType(10), *field.type());
47+
EXPECT_EQ(FixedType(10), *field.type());
4648
EXPECT_TRUE(field.optional());
4749
EXPECT_EQ("foo bar (2): fixed(10) (optional)", field.ToString());
4850
EXPECT_EQ("foo bar (2): fixed(10) (optional)", std::format("{}", field));
4951
}
5052
{
51-
iceberg::SchemaField field = iceberg::SchemaField::MakeRequired(
52-
2, "foo bar", std::make_shared<iceberg::FixedType>(10));
53+
SchemaField field =
54+
SchemaField::MakeRequired(2, "foo bar", std::make_shared<FixedType>(10));
5355
EXPECT_EQ(2, field.field_id());
5456
EXPECT_EQ("foo bar", field.name());
55-
EXPECT_EQ(iceberg::FixedType(10), *field.type());
57+
EXPECT_EQ(FixedType(10), *field.type());
5658
EXPECT_FALSE(field.optional());
5759
EXPECT_EQ("foo bar (2): fixed(10) (required)", field.ToString());
5860
EXPECT_EQ("foo bar (2): fixed(10) (required)", std::format("{}", field));
5961
}
6062
}
6163

6264
TEST(SchemaFieldTest, Equality) {
63-
iceberg::SchemaField field1(1, "foo", iceberg::int32(), false);
64-
iceberg::SchemaField field2(2, "foo", iceberg::int32(), false);
65-
iceberg::SchemaField field3(1, "bar", iceberg::int32(), false);
66-
iceberg::SchemaField field4(1, "foo", iceberg::int64(), false);
67-
iceberg::SchemaField field5(1, "foo", iceberg::int32(), true);
68-
iceberg::SchemaField field6(1, "foo", iceberg::int32(), false);
65+
SchemaField field1(1, "foo", int32(), false);
66+
SchemaField field2(2, "foo", int32(), false);
67+
SchemaField field3(1, "bar", int32(), false);
68+
SchemaField field4(1, "foo", int64(), false);
69+
SchemaField field5(1, "foo", int32(), true);
70+
SchemaField field6(1, "foo", int32(), false);
6971

7072
ASSERT_EQ(field1, field1);
7173
ASSERT_NE(field1, field2);
@@ -82,25 +84,27 @@ TEST(SchemaFieldTest, Equality) {
8284

8385
TEST(SchemaFieldTest, WithDoc) {
8486
{
85-
iceberg::SchemaField field(/*field_id=*/1, /*name=*/"foo", iceberg::int32(),
86-
/*optional=*/false, /*doc=*/"Field documentation");
87+
SchemaField field(/*field_id=*/1, /*name=*/"foo", int32(),
88+
/*optional=*/false, /*doc=*/"Field documentation");
8789
EXPECT_EQ(1, field.field_id());
8890
EXPECT_EQ("foo", field.name());
89-
EXPECT_EQ(iceberg::TypeId::kInt, field.type()->type_id());
91+
EXPECT_EQ(TypeId::kInt, field.type()->type_id());
9092
EXPECT_FALSE(field.optional());
9193
EXPECT_EQ("Field documentation", field.doc());
9294
EXPECT_EQ("foo (1): int (required) - Field documentation", field.ToString());
9395
}
9496
{
95-
iceberg::SchemaField field = iceberg::SchemaField::MakeOptional(
97+
SchemaField field = SchemaField::MakeOptional(
9698
/*field_id=*/2, /*name=*/"bar",
97-
/*type=*/std::make_shared<iceberg::FixedType>(10),
99+
/*type=*/std::make_shared<FixedType>(10),
98100
/*doc=*/"Field with 10 bytes");
99101
EXPECT_EQ(2, field.field_id());
100102
EXPECT_EQ("bar", field.name());
101-
EXPECT_EQ(iceberg::FixedType(10), *field.type());
103+
EXPECT_EQ(FixedType(10), *field.type());
102104
EXPECT_TRUE(field.optional());
103105
EXPECT_EQ("Field with 10 bytes", field.doc());
104106
EXPECT_EQ("bar (2): fixed(10) (optional) - Field with 10 bytes", field.ToString());
105107
}
106108
}
109+
110+
} // namespace iceberg

0 commit comments

Comments
 (0)