Skip to content

Commit aabcd06

Browse files
authored
[fs] Add filesystem support for Microsoft Azure Storage (#5379)
1 parent 9e478df commit aabcd06

File tree

16 files changed

+1366
-0
lines changed

16 files changed

+1366
-0
lines changed

docs/content/maintenance/filesystems.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ SELECT * FROM test_table;
242242
SELECT COUNT(1) FROM test_table;
243243
```
244244

245+
245246
{{< /tab >}}
246247
{{< tab "Trino" >}}
247248

@@ -420,3 +421,96 @@ CREATE CATALOG my_catalog WITH (
420421
{{< /tab >}}
421422

422423
{{< /tabs >}}
424+
425+
## Microsoft Azure Storage
426+
427+
{{< stable >}}
428+
429+
Download [paimon-azure-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-gs/{{< version >}}/paimon-gs-{{< version >}}.jar).
430+
431+
{{< /stable >}}
432+
433+
{{< unstable >}}
434+
435+
Download [paimon-gs-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-gs/{{< version >}}/).
436+
437+
{{< /unstable >}}
438+
439+
{{< tabs "gs" >}}
440+
441+
{{< tab "Flink" >}}
442+
443+
{{< hint info >}}
444+
If you have already configured [oss access through Flink](https://nightlies.apache.org/flink/flink-docs-release-2.0/docs/deployment/filesystems/gcs/) (Via Flink FileSystem),
445+
here you can skip the following configuration.
446+
{{< /hint >}}
447+
448+
Put `paimon-gs-{{< version >}}.jar` into `lib` directory of your Flink home, and create catalog:
449+
450+
```sql
451+
CREATE CATALOG my_catalog WITH (
452+
'type' = 'paimon',
453+
'warehouse' = 'oss://<bucket>/<path>',
454+
'fs.gs.auth.type' = 'SERVICE_ACCOUNT_JSON_KEYFILE',
455+
'fs.gs.auth.service.account.json.keyfile' = '/path/to/service-account-.json'
456+
);
457+
```
458+
459+
{{< /tab >}}
460+
461+
{{< /tabs >}}
462+
463+
464+
## Microsoft Azure Storage
465+
466+
{{< stable >}}
467+
468+
Download [paimon-azure-{{< version >}}.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-azure/{{< version >}}/paimon-azure-{{< version >}}.jar).
469+
470+
{{< /stable >}}
471+
472+
{{< unstable >}}
473+
474+
Download [paimon-azure-{{< version >}}.jar](https://repository.apache.org/snapshots/org/apache/paimon/paimon-azure/{{< version >}}/).
475+
476+
{{< /unstable >}}
477+
478+
{{< tabs "azure" >}}
479+
480+
{{< tab "Flink" >}}
481+
482+
{{< hint info >}}
483+
If you have already configured [azure access through Flink](https://nightlies.apache.org/flink/flink-docs-release-2.0/docs/deployment/filesystems/azure/) (Via Flink FileSystem),
484+
here you can skip the following configuration.
485+
{{< /hint >}}
486+
487+
Put `paimon-azure-{{< version >}}.jar` into `lib` directory of your Flink home, and create catalog:
488+
489+
```sql
490+
CREATE CATALOG my_catalog WITH (
491+
'type' = 'paimon',
492+
'warehouse' = 'wasb://,<container>@<account>.blob.core.windows.net/<path>',
493+
'fs.azure.account.key.Account.blob.core.windows.net' = 'yyy'
494+
);
495+
```
496+
497+
{{< /tab >}}
498+
499+
{{< tab "Spark" >}}
500+
501+
{{< hint info >}}
502+
If you have already configured azure access through Spark (Via Hadoop FileSystem), here you can skip the following configuration.
503+
{{< /hint >}}
504+
505+
Place `paimon-azure-{{< version >}}.jar` together with `paimon-spark-{{< version >}}.jar` under Spark's jars directory, and start like
506+
507+
```shell
508+
spark-sql \
509+
--conf spark.sql.catalog.paimon=org.apache.paimon.spark.SparkCatalog \
510+
--conf spark.sql.catalog.paimon.warehouse=wasb://,<container>@<account>.blob.core.windows.net/<path> \
511+
--conf fs.azure.account.key.Account.blob.core.windows.net=yyy \
512+
```
513+
514+
{{< /tab >}}
515+
516+
{{< /tabs >}}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one
4+
~ or more contributor license agreements. See the NOTICE file
5+
~ distributed with this work for additional information
6+
~ regarding copyright ownership. The ASF licenses this file
7+
~ to you under the Apache License, Version 2.0 (the
8+
~ "License"); you may not use this file except in compliance
9+
~ with the License. You may obtain a copy of the License at
10+
~
11+
~ http://www.apache.org/licenses/LICENSE-2.0
12+
~
13+
~ Unless required by applicable law or agreed to in writing, software
14+
~ distributed under the License is distributed on an "AS IS" BASIS,
15+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
~ See the License for the specific language governing permissions and
17+
~ limitations under the License.
18+
-->
19+
<project xmlns="http://maven.apache.org/POM/4.0.0"
20+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
<parent>
24+
<artifactId>paimon-filesystems</artifactId>
25+
<groupId>org.apache.paimon</groupId>
26+
<version>1.1-SNAPSHOT</version>
27+
</parent>
28+
29+
<artifactId>paimon-azure-impl</artifactId>
30+
<name>Paimon : FileSystems : AZURE : Impl</name>
31+
<packaging>jar</packaging>
32+
33+
<properties>
34+
<fs.azure.sdk.version>3.3.4</fs.azure.sdk.version>
35+
<fs.azure.api.version>1.16.0</fs.azure.api.version>
36+
</properties>
37+
<dependencies>
38+
<dependency>
39+
<groupId>org.apache.paimon</groupId>
40+
<artifactId>paimon-hadoop-shaded</artifactId>
41+
<version>${project.version}</version>
42+
43+
</dependency>
44+
<dependency>
45+
<groupId>org.apache.paimon</groupId>
46+
<artifactId>paimon-common</artifactId>
47+
<version>${project.version}</version>
48+
<scope>provided</scope>
49+
</dependency>
50+
51+
<dependency>
52+
<groupId>org.apache.hadoop</groupId>
53+
<artifactId>hadoop-azure</artifactId>
54+
<version>${fs.hadoopshaded.version}</version>
55+
<exclusions>
56+
<exclusion>
57+
<groupId>com.microsoft.azure</groupId>
58+
<artifactId>azure</artifactId>
59+
</exclusion>
60+
<exclusion>
61+
<!-- provided by paimon-hadoop-shaded -->
62+
<groupId>org.apache.hadoop</groupId>
63+
<artifactId>hadoop-common</artifactId>
64+
</exclusion>
65+
<exclusion>
66+
<groupId>ch.qos.reload4j</groupId>
67+
<artifactId>reload4j</artifactId>
68+
</exclusion>
69+
<exclusion>
70+
<groupId>org.slf4j</groupId>
71+
<artifactId>slf4j-reload4j</artifactId>
72+
</exclusion>
73+
74+
</exclusions>
75+
</dependency>
76+
<dependency>
77+
<groupId>com.microsoft.azure</groupId>
78+
<artifactId>azure</artifactId>
79+
<version>${fs.azure.api.version}</version>
80+
<scope>test</scope>
81+
</dependency>
82+
<dependency>
83+
<!-- Hadoop requires jaxb-api for javax.xml.bind.JAXBException -->
84+
<groupId>javax.xml.bind</groupId>
85+
<artifactId>jaxb-api</artifactId>
86+
<version>${jaxb.api.version}</version>
87+
<!-- packaged as an optional dependency that is only accessible on Java 11+ -->
88+
<scope>provided</scope>
89+
</dependency>
90+
</dependencies>
91+
<build>
92+
<plugins>
93+
<plugin>
94+
<groupId>org.apache.maven.plugins</groupId>
95+
<artifactId>maven-jar-plugin</artifactId>
96+
<configuration>
97+
<archive>
98+
<manifestEntries>
99+
<!-- jaxb-api is packaged as an optional dependency that is only accessible on Java 11 -->
100+
<Multi-Release>true</Multi-Release>
101+
</manifestEntries>
102+
</archive>
103+
</configuration>
104+
</plugin>
105+
106+
<plugin>
107+
<groupId>org.apache.maven.plugins</groupId>
108+
<artifactId>maven-dependency-plugin</artifactId>
109+
<executions>
110+
<execution>
111+
<id>copy-javax-jars</id>
112+
<phase>process-resources</phase>
113+
<goals>
114+
<goal>copy</goal>
115+
</goals>
116+
</execution>
117+
</executions>
118+
<configuration>
119+
<artifactItems>
120+
<artifactItem>
121+
<groupId>javax.xml.bind</groupId>
122+
<artifactId>jaxb-api</artifactId>
123+
<version>${jaxb.api.version}</version>
124+
<type>jar</type>
125+
<overWrite>true</overWrite>
126+
</artifactItem>
127+
</artifactItems>
128+
<outputDirectory>${project.build.directory}/temporary</outputDirectory>
129+
</configuration>
130+
</plugin>
131+
132+
<plugin>
133+
<groupId>org.apache.maven.plugins</groupId>
134+
<artifactId>maven-antrun-plugin</artifactId>
135+
<executions>
136+
<execution>
137+
<id>unpack-javax-libraries</id>
138+
<phase>process-resources</phase>
139+
<goals>
140+
<goal>run</goal>
141+
</goals>
142+
<configuration>
143+
<target>
144+
<echo message="unpacking javax jars"/>
145+
<unzip dest="${project.build.directory}/classes/META-INF/versions/11">
146+
<fileset dir="${project.build.directory}/temporary">
147+
<include name="*"/>
148+
</fileset>
149+
</unzip>
150+
</target>
151+
</configuration>
152+
</execution>
153+
</executions>
154+
</plugin>
155+
156+
<plugin>
157+
<groupId>org.apache.maven.plugins</groupId>
158+
<artifactId>maven-shade-plugin</artifactId>
159+
<executions>
160+
<execution>
161+
<id>shade-paimon</id>
162+
<phase>package</phase>
163+
<goals>
164+
<goal>shade</goal>
165+
</goals>
166+
<configuration>
167+
<artifactSet>
168+
<includes>
169+
<include>*:*</include>
170+
</includes>
171+
</artifactSet>
172+
<filters>
173+
<filter>
174+
<artifact>*</artifact>
175+
<excludes>
176+
<exclude>.gitkeep</exclude>
177+
<exclude>mime.types</exclude>
178+
<exclude>mozilla/**</exclude>
179+
<exclude>META-INF/maven/**</exclude>
180+
<exclude>META-INF/versions/11/META-INF/maven/**</exclude>
181+
<exclude>META-INF/LICENSE.txt</exclude>
182+
</excludes>
183+
</filter>
184+
<filter>
185+
<artifact>*</artifact>
186+
<excludes>
187+
<exclude>properties.dtd</exclude>
188+
<exclude>PropertyList-1.0.dtd</exclude>
189+
<exclude>META-INF/services/javax.xml.stream.*</exclude>
190+
<exclude>META-INF/LICENSE.txt</exclude>
191+
</excludes>
192+
</filter>
193+
</filters>
194+
</configuration>
195+
</execution>
196+
</executions>
197+
</plugin>
198+
</plugins>
199+
</build>
200+
</project>

0 commit comments

Comments
 (0)