Skip to content

Commit 4fb89f8

Browse files
authored
feat: add scaffolding work for parquet reader (#154)
1 parent 82a488f commit 4fb89f8

File tree

10 files changed

+515
-13
lines changed

10 files changed

+515
-13
lines changed

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ function(resolve_arrow_dependency)
6565
set(ARROW_BUILD_STATIC
6666
ON
6767
CACHE BOOL "" FORCE)
68+
# Work around undefined symbol: arrow::ipc::ReadSchema(arrow::io::InputStream*, arrow::ipc::DictionaryMemo*)
6869
set(ARROW_IPC
69-
OFF
70+
ON
7071
CACHE BOOL "" FORCE)
7172
set(ARROW_FILESYSTEM
7273
ON

src/iceberg/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ if(ICEBERG_BUILD_BUNDLE)
109109
avro/avro_reader.cc
110110
avro/avro_schema_util.cc
111111
avro/avro_register.cc
112-
avro/avro_stream_internal.cc)
112+
avro/avro_stream_internal.cc
113+
parquet/parquet_data_util.cc
114+
parquet/parquet_reader.cc
115+
parquet/parquet_schema_util.cc)
113116

114117
# Libraries to link with exported libiceberg_bundle.{so,a}.
115118
set(ICEBERG_BUNDLE_STATIC_BUILD_INTERFACE_LIBS)
@@ -161,6 +164,7 @@ if(ICEBERG_BUILD_BUNDLE)
161164

162165
add_subdirectory(arrow)
163166
add_subdirectory(avro)
167+
add_subdirectory(parquet)
164168

165169
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/iceberg_bundle_export.h
166170
DESTINATION ${ICEBERG_INSTALL_INCLUDEDIR}/iceberg)

src/iceberg/arrow/arrow_error_transform_internal.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,18 @@ inline ErrorKind ToErrorKind(const ::arrow::Status& status) {
4343
} \
4444
lhs = std::move(result_name).ValueOrDie();
4545

46-
#define ICEBERG_ARROW_ASSIGN_OR_RETURN(lhs, rexpr) \
47-
ICEBERG_ARROW_ASSIGN_OR_RETURN_IMPL( \
48-
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr, ToErrorKind)
49-
50-
#define ICEBERG_ARROW_RETURN_NOT_OK(expr) \
51-
do { \
52-
auto&& _status = (expr); \
53-
if (!_status.ok()) { \
54-
return std::unexpected<Error>{ \
55-
{.kind = ToErrorKind(_status), .message = _status.ToString()}}; \
56-
} \
46+
#define ICEBERG_ARROW_ASSIGN_OR_RETURN(lhs, rexpr) \
47+
ICEBERG_ARROW_ASSIGN_OR_RETURN_IMPL( \
48+
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr, \
49+
::iceberg::arrow::ToErrorKind)
50+
51+
#define ICEBERG_ARROW_RETURN_NOT_OK(expr) \
52+
do { \
53+
auto&& _status = (expr); \
54+
if (!_status.ok()) { \
55+
return std::unexpected<Error>{{.kind = ::iceberg::arrow::ToErrorKind(_status), \
56+
.message = _status.ToString()}}; \
57+
} \
5758
} while (0)
5859

5960
} // namespace iceberg::arrow

src/iceberg/parquet/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
iceberg_install_all_headers(iceberg/parquet)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/parquet/parquet_data_util_internal.h"
21+
22+
namespace iceberg::parquet {
23+
24+
Result<std::shared_ptr<::arrow::RecordBatch>> ProjectRecordBatch(
25+
std::shared_ptr<::arrow::RecordBatch> record_batch,
26+
const std::shared_ptr<::arrow::Schema>& output_arrow_schema,
27+
const Schema& projected_schema, const SchemaProjection& projection) {
28+
return NotImplemented("NYI");
29+
}
30+
31+
} // namespace iceberg::parquet
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include "iceberg/schema_util.h"
23+
24+
namespace arrow {
25+
class RecordBatch;
26+
class Schema;
27+
} // namespace arrow
28+
29+
namespace iceberg::parquet {
30+
31+
/// \brief Convert record batch read from a Parquet file to projected Iceberg Schema.
32+
///
33+
/// \param record_batch The record batch to convert.
34+
/// \param output_arrow_schema The Arrow schema to convert to.
35+
/// \param projected_schema The projected Iceberg schema.
36+
/// \param projection The projection from projected Iceberg schema to the record batch.
37+
/// \return The converted record batch.
38+
Result<std::shared_ptr<::arrow::RecordBatch>> ProjectRecordBatch(
39+
std::shared_ptr<::arrow::RecordBatch> record_batch,
40+
const std::shared_ptr<::arrow::Schema>& output_arrow_schema,
41+
const Schema& projected_schema, const SchemaProjection& projection);
42+
43+
} // namespace iceberg::parquet

0 commit comments

Comments
 (0)