Skip to content

Commit 1b005f3

Browse files
authored
DRILL-8542: Support Paimon Format Plugin (#3035)
1 parent bcb4386 commit 1b005f3

26 files changed

+3601
-0
lines changed

contrib/format-paimon/README.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Apache Paimon format plugin
2+
3+
This format plugin enables Drill to query Apache Paimon tables.
4+
5+
Unlike regular format plugins, the Paimon table is a folder with data and metadata files, but Drill checks the presence
6+
of the `snapshot` directory and `schema` directory to ensure that the table is a Paimon one.
7+
8+
Drill supports reading all formats of Paimon tables currently supported via Paimon Java API: Parquet and ORC.
9+
No need to provide actual table format, it will be discovered automatically.
10+
11+
For details related to Apache Paimon table format, please refer to [official docs](https://paimon.apache.org/).
12+
13+
## Supported optimizations and features
14+
15+
### Project pushdown
16+
17+
This format plugin supports project pushdown optimization.
18+
19+
For the case of project pushdown, only columns specified in the query will be read. In conjunction with
20+
column-oriented formats like Parquet or ORC, it allows improving reading performance significantly.
21+
22+
### Filter pushdown
23+
24+
This format plugin supports filter pushdown optimization.
25+
26+
For the case of filter pushdown, expressions supported by Paimon API will be pushed down, so only data that matches
27+
the filter expression will be read.
28+
29+
### Limit pushdown
30+
31+
This format plugin supports limit pushdown optimization.
32+
33+
The limit is pushed down to Paimon scan planning to reduce the amount of data read.
34+
35+
### Querying table metadata
36+
37+
Apache Drill provides the ability to query table metadata exposed by Paimon.
38+
39+
At this point, Apache Paimon has the following metadata kinds:
40+
41+
* SNAPSHOTS
42+
* SCHEMAS
43+
* FILES
44+
* MANIFESTS
45+
46+
To query specific metadata, just add the `#metadata_name` suffix to the table location, like in the following example:
47+
48+
```sql
49+
SELECT *
50+
FROM dfs.tmp.`testTable#snapshots`;
51+
```
52+
53+
### Querying specific table versions (time travel)
54+
55+
Apache Paimon has the ability to track the table modifications and read specific version before or after modifications
56+
or modifications itself.
57+
58+
This format plugin embraces this ability and provides an easy-to-use way of triggering it.
59+
60+
The following ways of specifying table version are supported:
61+
62+
- `snapshotId` - id of the specific snapshot
63+
- `snapshotAsOfTime` - the most recent snapshot as of the given time in milliseconds
64+
65+
Table function can be used to specify one of the above configs in the following way:
66+
67+
```sql
68+
SELECT *
69+
FROM table(dfs.tmp.testTable(type => 'paimon', snapshotId => 1));
70+
71+
SELECT *
72+
FROM table(dfs.tmp.testTable(type => 'paimon', snapshotAsOfTime => 1736345510000));
73+
```
74+
75+
Note: `snapshotId` and `snapshotAsOfTime` are mutually exclusive and cannot be specified at the same time.
76+
77+
## Configuration
78+
79+
The only required configuration option is:
80+
81+
- `type` - format plugin type, should be `'paimon'`
82+
83+
Note: `snapshotId` and `snapshotAsOfTime` for time travel queries are specified at query time using the `table()` function.
84+
85+
### Format config example:
86+
87+
```json
88+
{
89+
"type": "file",
90+
"formats": {
91+
"paimon": {
92+
"type": "paimon"
93+
}
94+
}
95+
}
96+
```

contrib/format-paimon/pom.xml

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<?xml version="1.0"?>
2+
<!--
3+
4+
Licensed to the Apache Software Foundation (ASF) under one
5+
or more contributor license agreements. See the NOTICE file
6+
distributed with this work for additional information
7+
regarding copyright ownership. The ASF licenses this file
8+
to you under the Apache License, Version 2.0 (the
9+
"License"); you may not use this file except in compliance
10+
with the License. You may obtain a copy of the License at
11+
12+
http://www.apache.org/licenses/LICENSE-2.0
13+
14+
Unless required by applicable law or agreed to in writing, software
15+
distributed under the License is distributed on an "AS IS" BASIS,
16+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
See the License for the specific language governing permissions and
18+
limitations under the License.
19+
20+
-->
21+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
<parent>
24+
<artifactId>drill-contrib-parent</artifactId>
25+
<groupId>org.apache.drill.contrib</groupId>
26+
<version>1.23.0-SNAPSHOT</version>
27+
</parent>
28+
29+
<artifactId>drill-paimon-format</artifactId>
30+
31+
<name>Drill : Contrib : Format : Paimon</name>
32+
33+
<dependencies>
34+
<dependency>
35+
<groupId>org.apache.drill.exec</groupId>
36+
<artifactId>drill-java-exec</artifactId>
37+
<version>${project.version}</version>
38+
</dependency>
39+
40+
<dependency>
41+
<groupId>org.apache.paimon</groupId>
42+
<artifactId>paimon-core</artifactId>
43+
<version>${paimon.version}</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>org.apache.paimon</groupId>
47+
<artifactId>paimon-common</artifactId>
48+
<version>${paimon.version}</version>
49+
</dependency>
50+
<dependency>
51+
<groupId>org.apache.paimon</groupId>
52+
<artifactId>paimon-api</artifactId>
53+
<version>${paimon.version}</version>
54+
</dependency>
55+
<dependency>
56+
<groupId>org.apache.paimon</groupId>
57+
<artifactId>paimon-codegen-loader</artifactId>
58+
<version>${paimon.version}</version>
59+
</dependency>
60+
<dependency>
61+
<groupId>org.apache.paimon</groupId>
62+
<artifactId>paimon-format</artifactId>
63+
<version>${paimon.version}</version>
64+
</dependency>
65+
<dependency>
66+
<groupId>org.apache.paimon</groupId>
67+
<artifactId>paimon-shade-jackson-2</artifactId>
68+
<version>2.14.2-0.8.0</version>
69+
</dependency>
70+
<dependency>
71+
<groupId>org.apache.paimon</groupId>
72+
<artifactId>paimon-shade-guava-30</artifactId>
73+
<version>30.1.1-jre-0.8.0</version>
74+
</dependency>
75+
<dependency>
76+
<groupId>org.apache.paimon</groupId>
77+
<artifactId>paimon-shade-caffeine-2</artifactId>
78+
<version>2.9.3-0.8.0</version>
79+
</dependency>
80+
<dependency>
81+
<groupId>org.apache.paimon</groupId>
82+
<artifactId>paimon-shade-netty-4</artifactId>
83+
<version>4.1.100.Final-0.8.0</version>
84+
</dependency>
85+
<dependency>
86+
<groupId>io.airlift</groupId>
87+
<artifactId>aircompressor</artifactId>
88+
<version>0.27</version>
89+
</dependency>
90+
<dependency>
91+
<groupId>org.lz4</groupId>
92+
<artifactId>lz4-java</artifactId>
93+
<version>1.8.0</version>
94+
</dependency>
95+
<dependency>
96+
<groupId>com.github.luben</groupId>
97+
<artifactId>zstd-jni</artifactId>
98+
<version>1.5.5-11</version>
99+
</dependency>
100+
<dependency>
101+
<groupId>org.xerial.snappy</groupId>
102+
<artifactId>snappy-java</artifactId>
103+
<version>1.1.8.4</version>
104+
</dependency>
105+
106+
<!-- Test dependency -->
107+
<dependency>
108+
<groupId>org.apache.drill.exec</groupId>
109+
<artifactId>drill-java-exec</artifactId>
110+
<classifier>tests</classifier>
111+
<version>${project.version}</version>
112+
<scope>test</scope>
113+
</dependency>
114+
<dependency>
115+
<groupId>org.apache.drill</groupId>
116+
<artifactId>drill-common</artifactId>
117+
<classifier>tests</classifier>
118+
<version>${project.version}</version>
119+
<scope>test</scope>
120+
</dependency>
121+
</dependencies>
122+
123+
</project>
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.drill.exec.store.paimon;
19+
20+
import org.apache.drill.exec.store.schedule.CompleteWork;
21+
import org.apache.drill.exec.store.schedule.EndpointByteMap;
22+
import org.apache.paimon.table.source.Split;
23+
24+
public class PaimonCompleteWork implements CompleteWork {
25+
private final EndpointByteMap byteMap;
26+
27+
private final Split split;
28+
29+
private final long totalBytes;
30+
31+
public PaimonCompleteWork(EndpointByteMap byteMap, Split split) {
32+
this.byteMap = byteMap;
33+
this.split = split;
34+
long rowCount = split.rowCount();
35+
this.totalBytes = rowCount > 0 ? rowCount : 1;
36+
}
37+
38+
public Split getSplit() {
39+
return split;
40+
}
41+
42+
public long getRowCount() {
43+
return split.rowCount();
44+
}
45+
46+
@Override
47+
public long getTotalBytes() {
48+
return totalBytes;
49+
}
50+
51+
@Override
52+
public EndpointByteMap getByteMap() {
53+
return byteMap;
54+
}
55+
56+
@Override
57+
public int compareTo(CompleteWork o) {
58+
return 0;
59+
}
60+
}

0 commit comments

Comments
 (0)