Skip to content

Commit cbdab7c

Browse files
xuchen-pluszenghuadmetasoul01Ceng23333
authored
[Rust] Implement Arrow Flight SQL Service (lakesoul-io#580)
* stash fligh_sql_poc Signed-off-by: zenghua <huazeng@dmetasoul.com> * update flight sql server Signed-off-by: zenghua <huazeng@dmetasoul.com> * update flight server Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix compile error Signed-off-by: zenghua <huazeng@dmetasoul.com> * add flight_server_wrapper Signed-off-by: zenghua <huazeng@dmetasoul.com> * add flight_server_wrapper Signed-off-by: zenghua <huazeng@dmetasoul.com> * bump version to datafusion=43.0.0 Signed-off-by: zenghua <huazeng@dmetasoul.com> * add jwt and rbac Signed-off-by: chenxu <chenxu@dmetasoul.com> * update jwt and rbac Signed-off-by: chenxu <chenxu@dmetasoul.com> * add rbac and jwt test Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix meta data client domain Signed-off-by: chenxu <chenxu@dmetasoul.com> * implement CommandStatementIngest Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix build Signed-off-by: chenxu <chenxu@dmetasoul.com> * update fligth server implementation Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix hdfs Signed-off-by: chenxu <chenxu@dmetasoul.com> * update hdfs impl Signed-off-by: chenxu <chenxu@dmetasoul.com> * impl jwt verification Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix rbac test Signed-off-by: chenxu <chenxu@dmetasoul.com> * add flow monitor metrics Signed-off-by: zenghua <huazeng@dmetasoul.com> * add s3 support Signed-off-by: zenghua <huazeng@dmetasoul.com> * add k8s deployment files Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix control_throughput Signed-off-by: zenghua <huazeng@dmetasoul.com> * add arg worker_threads Signed-off-by: zenghua <huazeng@dmetasoul.com> * update k8s config Signed-off-by: Ceng23333 <441651826@qq.com> * update hdfs support Signed-off-by: zenghua <huazeng@dmetasoul.com> * update k8s config file Signed-off-by: Ceng23333 <441651826@qq.com> * update initialization Signed-off-by: zenghua <huazeng@dmetasoul.com> * update k8s config Signed-off-by: Ceng23333 <441651826@qq.com> * fix config Signed-off-by: Ceng23333 <441651826@qq.com> * upgrade datafusion to 45.0.0 Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix rust tests Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix filter Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix listing table Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix repartition pushdown Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix timestamp literal type for substrait Signed-off-by: chenxu <chenxu@dmetasoul.com> * fix clippy Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix clippy Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix clippy Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix rust ci Signed-off-by: zenghua <huazeng@dmetasoul.com> * add ci test info Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix tpch ci config Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix tpch ci config Signed-off-by: zenghua <huazeng@dmetasoul.com> * fix tpch ci config Signed-off-by: zenghua <huazeng@dmetasoul.com> --------- Signed-off-by: zenghua <huazeng@dmetasoul.com> Signed-off-by: chenxu <chenxu@dmetasoul.com> Signed-off-by: Ceng23333 <441651826@qq.com> Co-authored-by: zenghua <huazeng@dmetasoul.com> Co-authored-by: chenxu <chenxu@dmetasoul.com> Co-authored-by: Ceng23333 <441651826@qq.com>
1 parent 1274ecc commit cbdab7c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+6594
-2419
lines changed

k8s/build.Dockerfile

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
FROM ubuntu:22.04
2+
3+
ENV HADOOP_VERSION=3.3.6
4+
ENV HADOOP_HOME=/opt/hadoop
5+
6+
RUN sed -i "s@http://.*archive.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list && \
7+
sed -i "s@http://.*security.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list
8+
9+
ENV DEBIAN_FRONTEND=noninteractive
10+
RUN apt-get update -y && \
11+
apt-get install net-tools procps telnet vim openjdk-11-jre-headless curl libjemalloc2 -y
12+
13+
RUN curl -L -o /opt/hadoop-${HADOOP_VERSION}.tar.gz https://mirrors.huaweicloud.com/apache/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz && \
14+
HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION} && \
15+
tar -xzf /opt/${HADOOP_TAR_NAME}.tar.gz -C /opt && \
16+
ln -s /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \
17+
rm /opt/${HADOOP_TAR_NAME}.tar.gz
18+
19+
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
20+
ENV LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}
21+
22+
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
23+
24+
ENV LAKESOUL_USER=stackable
25+
ENV LAKESOUL_UID=1000
26+
ENV HOME=/home/$LAKESOUL_USER
27+
ENV SHELL=/bin/bash
28+
ENV LANG=en_US.UTF-8
29+
ENV LANGUAGE=en_US.UTF-8
30+
ENV TZ=Asia/Shanghai
31+
32+
RUN groupadd -r -g ${LAKESOUL_UID} ${LAKESOUL_USER} \
33+
&& useradd -M -s /bin/bash -N -u ${LAKESOUL_UID} -g ${LAKESOUL_UID} ${LAKESOUL_USER} \
34+
&& mkdir -p ${HOME} \
35+
&& chown -R ${LAKESOUL_USER}:users ${HOME} \
36+
&& passwd -d ${LAKESOUL_USER} \
37+
&& usermod -aG sudo ${LAKESOUL_USER} \
38+
&& echo ${LAKESOUL_USER}' ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers \
39+
&& mkdir -p /app
40+
41+
COPY rust/target/release/flight_sql_server /app/flight_sql_server
42+
RUN chown -R $LAKESOUL_USER:$LAKESOUL_USER /app \
43+
&& chmod +x /app/flight_sql_server
44+
45+
USER ${LAKESOUL_USER}
46+
WORKDIR ${HOME}
47+
ENV RUST_LOG=info
48+
ENV RUST_BACKTRACE=full
49+
ENV RUST_LOG_FORMAT="%Y-%m-%dT%H:%M:%S%:z %l [%f:%L] %m"

k8s/flight_server.yaml

Lines changed: 111 additions & 0 deletions
Large diffs are not rendered by default.

k8s/lakesoul_flight_service.yaml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
labels:
5+
app: lakesoul-flight-server
6+
name: lakesoul-flight-server
7+
namespace: lakesoul-dashboard
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: lakesoul-flight-server
13+
template:
14+
metadata:
15+
labels:
16+
app: lakesoul-flight-server
17+
spec:
18+
containers:
19+
- image: swr.cn-southwest-2.myhuaweicloud.com/dmetasoul-repo/lakesoul-flight-server:v1.0.0
20+
command: ["/app/flight_sql_server"]
21+
resources:
22+
limits:
23+
cpu: 2000m
24+
memory: 100Mi
25+
requests:
26+
cpu: 1000m
27+
memory: 50Mi
28+
name: lakesoul-flight-server
29+
ports:
30+
- containerPort: 50051
31+
env:
32+
- name: HADOOP_CONF_DIR
33+
value: /etc/hadoop/conf
34+
- name: LAKESOUL_PG_DRIVER
35+
value: com.lakesoul.shaded.org.postgresql.Driver
36+
- name: LAKESOUL_PG_URL
37+
value: jdbc:postgresql://pgbouncer.default.svc.cluster.local:6432/postgres?stringtype=unspecified
38+
- name: LAKESOUL_PG_USERNAME
39+
value: postgres
40+
- name: LAKESOUL_PG_PASSWORD
41+
value: vlfs5gvt
42+
volumeMounts:
43+
- mountPath: /etc/hadoop/conf
44+
name: hadoop-config-volume
45+
volumes:
46+
- configMap:
47+
defaultMode: 420
48+
items:
49+
- key: hdfs-site.xml
50+
path: hdfs-site.xml
51+
- key: core-site.xml
52+
path: core-site.xml
53+
name: dashboard-config
54+
name: hadoop-config-volume
55+
---
56+
apiVersion: v1
57+
kind: Service
58+
metadata:
59+
labels:
60+
app: lakesoul-flight-server
61+
name: lakesoul-flight-server
62+
namespace: lakesoul-dashboard
63+
spec:
64+
clusterIP: None
65+
ports:
66+
- port: 50051
67+
protocol: TCP
68+
targetPort: 50051
69+
selector:
70+
app: lakesoul-flight-server
71+
type: ClusterIP
72+
---
73+
apiVersion: networking.k8s.io/v1
74+
kind: Ingress
75+
metadata:
76+
annotations:
77+
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
78+
name: lakesoul-flight-server
79+
namespace: lakesoul-dashboard
80+
spec:
81+
ingressClassName: nginx-1
82+
rules:
83+
- host: dapm-api.dmetasoul.com
84+
http:
85+
paths:
86+
- path: /
87+
pathType: Prefix
88+
backend:
89+
service:
90+
name: lakesoul-flight-server
91+
port:
92+
number: 50051
93+
tls:
94+
- hosts:
95+
- "*.dmetasoul.com"
96+
secretName: csair-secret

lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/ConsistencyCI.scala

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.apache.spark.sql.lakesoul.benchmark
22

33
import org.apache.spark.sql.SparkSession
4+
import org.apache.spark.sql.functions.column
45
import org.apache.spark.sql.internal.SQLConf
56
import org.apache.spark.sql.lakesoul.catalog.LakeSoulCatalog
67
import org.apache.spark.sql.types._
@@ -102,12 +103,13 @@ object ConsistencyCI {
102103
StructField("l_shipmode", StringType, nullable = false),
103104
StructField("l_comment", StringType, nullable = false),
104105
)),
105-
"l_orderkey, l_partkey", Option.empty),
106+
"l_orderkey, l_partkey, l_suppkey, l_linenumber", Option.empty),
106107
)
107108

108109
def load_data(spark: SparkSession): Unit = {
109110

110111
val tpchPath = System.getenv("TPCH_DATA")
112+
// val tpchPath = "/Users/ceng/Documents/GitHub/LakeSoul/test_files/tpch/data"
111113
val lakeSoulPath = "/tmp/lakesoul/tpch"
112114
tpchTable.foreach(tup => {
113115
val (name, schema, hashPartitions, rangePartitions) = tup
@@ -167,10 +169,14 @@ object ConsistencyCI {
167169
val diff2 = rustDF.rdd.subtract(sparkDF.rdd)
168170
val result = diff1.count() == 0 && diff2.count() == 0
169171
if (!result) {
170-
println("sparkDF: ")
171-
println(sparkDF.collectAsList())
172-
println("rustDF: ")
173-
println(rustDF.collectAsList())
172+
println("diff1: ")
173+
spark.createDataFrame(diff1, sparkDF.schema)
174+
.orderBy(tup._3.split(",").map(_.trim).map(column): _*)
175+
.show(100)
176+
println("diff2: ")
177+
spark.createDataFrame(diff2, sparkDF.schema)
178+
.orderBy(tup._3.split(",").map(_.trim).map(column): _*)
179+
.show(100)
174180
System.exit(1)
175181
}
176182
})

0 commit comments

Comments
 (0)