Skip to content

Commit 7c63990

Browse files
committed
feat: add scaffolding work for parquet reader
1 parent 5bffdf6 commit 7c63990

File tree

10 files changed

+541
-2
lines changed

10 files changed

+541
-2
lines changed

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ function(resolve_arrow_dependency)
6565
set(ARROW_BUILD_STATIC
6666
ON
6767
CACHE BOOL "" FORCE)
68+
# Workaround undefined symbol: arrow::ipc::ReadSchema(arrow::io::InputStream*, arrow::ipc::DictionaryMemo*)
6869
set(ARROW_IPC
69-
OFF
70+
ON
7071
CACHE BOOL "" FORCE)
7172
set(ARROW_FILESYSTEM
7273
ON

src/iceberg/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ if(ICEBERG_BUILD_BUNDLE)
108108
avro/avro_data_util.cc
109109
avro/avro_reader.cc
110110
avro/avro_schema_util.cc
111-
avro/avro_stream_internal.cc)
111+
avro/avro_stream_internal.cc
112+
parquet/parquet_data_util.cc
113+
parquet/parquet_reader.cc
114+
parquet/parquet_schema_util.cc)
112115

113116
# Libraries to link with exported libiceberg_bundle.{so,a}.
114117
set(ICEBERG_BUNDLE_STATIC_BUILD_INTERFACE_LIBS)
@@ -160,6 +163,7 @@ if(ICEBERG_BUILD_BUNDLE)
160163

161164
add_subdirectory(arrow)
162165
add_subdirectory(avro)
166+
add_subdirectory(parquet)
163167

164168
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/iceberg_bundle_export.h
165169
DESTINATION ${ICEBERG_INSTALL_INCLUDEDIR}/iceberg)

src/iceberg/parquet/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
iceberg_install_all_headers(iceberg/parquet)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/parquet/parquet_data_util_internal.h"
21+
22+
namespace iceberg::parquet {
23+
24+
Result<std::shared_ptr<::arrow::RecordBatch>> ConvertRecordBatch(
25+
std::shared_ptr<::arrow::RecordBatch> record_batch,
26+
const std::shared_ptr<::arrow::Schema>& output_arrow_schema,
27+
const Schema& projected_schema, const SchemaProjection& projection) {
28+
return NotImplemented("NYI");
29+
}
30+
31+
} // namespace iceberg::parquet
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include "iceberg/schema_util.h"
23+
24+
namespace arrow {
25+
class RecordBatch;
26+
class Schema;
27+
} // namespace arrow
28+
29+
namespace iceberg::parquet {
30+
31+
/// \brief Convert record batch read from a Parquet file to projected Iceberg Schema.
32+
///
33+
/// \param record_batch The record batch to convert.
34+
/// \param output_arrow_schema The Arrow schema to convert to.
35+
/// \param projected_schema The projected Iceberg schema.
36+
/// \param projection The projection from projected Iceberg schema to the record batch.
37+
/// \return The converted record batch.
38+
Result<std::shared_ptr<::arrow::RecordBatch>> ConvertRecordBatch(
39+
std::shared_ptr<::arrow::RecordBatch> record_batch,
40+
const std::shared_ptr<::arrow::Schema>& output_arrow_schema,
41+
const Schema& projected_schema, const SchemaProjection& projection);
42+
43+
} // namespace iceberg::parquet

0 commit comments

Comments
 (0)