Skip to content

Commit 52c7bfc

Browse files
committed
feat: add MinIO user secrets, deployment, and client configuration for Spark Connect
1 parent 0fbe335 commit 52c7bfc

File tree

6 files changed

+184
-8
lines changed

6 files changed

+184
-8
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
apiVersion: v1
3+
kind: Secret
4+
metadata:
5+
name: minio-users
6+
type: Opaque
7+
stringData:
8+
username1: |
9+
username=spark
10+
password=sparkspark
11+
disabled=false
12+
policies=readwrite,consoleAdmin,diagnostics
13+
setPolicies=false
14+
---
15+
apiVersion: v1
16+
kind: Secret
17+
metadata:
18+
name: s3-credentials
19+
stringData:
20+
accessKey: spark
21+
secretKey: sparkspark
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
timeout: 900
5+
---
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: minio
10+
status:
11+
readyReplicas: 1
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: >-
6+
helm install minio
7+
--namespace $NAMESPACE
8+
--version 14.6.16
9+
-f helm-bitnami-minio-values.yaml
10+
--repo https://charts.bitnami.com/bitnami minio
11+
timeout: 240

tests/templates/kuttl/spark-connect/10-deploy-spark-connect.yaml.j2

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ data:
1515
appender.CONSOLE.filter.threshold.type = ThresholdFilter
1616
appender.CONSOLE.filter.threshold.level = DEBUG
1717

18-
rootLogger.level=INFO
18+
rootLogger.level=DEBUG
1919
rootLogger.appenderRefs = CONSOLE
2020
rootLogger.appenderRef.CONSOLE.ref = CONSOLE
2121
---
@@ -36,7 +36,10 @@ spec:
3636
vectorAggregatorConfigMapName: vector-aggregator-discovery
3737
{% endif %}
3838
args:
39-
- --packages org.apache.iceberg:iceberg-spark-runtime-{{ ".".join(test_scenario['values']['spark-connect'].split('.')[:2]) }}_2.12:1.8.1
39+
- --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider
40+
- --conf spark.hadoop.fs.s3a.path.style.access=Path
41+
- --conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
42+
- --conf spark.hadoop.fs.s3a.region=us-east-1
4043
server:
4144
podOverrides:
4245
spec:
@@ -45,6 +48,17 @@ spec:
4548
env:
4649
- name: DEMO_GREETING
4750
value: "Hello from the overlords"
51+
- name: AWS_ACCESS_KEY_ID
52+
valueFrom:
53+
secretKeyRef:
54+
name: s3-credentials
55+
key: accessKey
56+
- name: AWS_SECRET_ACCESS_KEY
57+
valueFrom:
58+
secretKeyRef:
59+
name: s3-credentials
60+
key: secretKey
61+
4862
jvmArgumentOverrides:
4963
add:
5064
- -Dmy.custom.jvm.arg=customValue
@@ -59,11 +73,11 @@ spec:
5973
configOverrides:
6074
spark-defaults.conf:
6175
spark.jars.ivy: /tmp/ivy2
62-
spark.sql.extensions: org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
63-
spark.sql.catalog.local: org.apache.iceberg.spark.SparkCatalog
64-
spark.sql.catalog.local.type: hadoop
65-
spark.sql.catalog.local.warehouse: /tmp/warehouse
66-
spark.sql.defaultCatalog: local
76+
# spark.sql.extensions: org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
77+
# spark.sql.catalog.local: org.apache.iceberg.spark.SparkCatalog
78+
# spark.sql.catalog.local.type: hadoop
79+
# spark.sql.catalog.local.warehouse: /tmp/warehouse
80+
# spark.sql.defaultCatalog: local
6781
executor:
6882
configOverrides:
6983
spark-defaults.conf:

tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,56 @@
11
---
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: spark-connect-client
6+
data:
7+
example.py: |-
8+
import sys
9+
10+
from pyspark.sql import SparkSession
11+
import pyspark.sql.functions as fn
12+
13+
if __name__ == "__main__":
14+
remote: str = sys.argv[1]
15+
16+
print(f"Connecting to Spark Connect server at {remote}")
17+
18+
spark = (
19+
SparkSession.builder.appName("SimpleSparkConnectApp")
20+
.remote(remote)
21+
.config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
22+
.config("spark.hadoop.fs.s3a.path.style.access", "true")
23+
.config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
24+
.config("spark.hadoop.fs.s3a.region", "us-east-1")
25+
.config("spark.hadoop.fs.s3a.access.key", "spark")
26+
.config("spark.hadoop.fs.s3a.secret.key", "sparkspark")
27+
.getOrCreate()
28+
)
29+
30+
# See https://issues.apache.org/jira/browse/SPARK-46032
31+
# spark.addArtifacts("/stackable/spark/connect/spark-connect_2.12-3.5.5.jar")
32+
33+
logFile = "/stackable/spark/README.md"
34+
35+
print(f"Reading log file: {logFile}")
36+
logData = spark.read.text(logFile).cache()
37+
38+
print("Counting words in log file")
39+
wc = (
40+
logData.select(
41+
fn.explode(fn.split(logData["value"], r"\s+"))
42+
.alias("words"))
43+
.groupBy("words").count()
44+
)
45+
46+
wc.show()
47+
48+
dest = "s3a://mybucket/wordcount"
49+
print(f"Writing word count to S3 {dest}")
50+
wc.write.mode("overwrite").parquet(dest)
51+
52+
spark.stop()
53+
---
254
apiVersion: batch/v1
355
kind: Job
456
metadata:
@@ -21,7 +73,7 @@ spec:
2173
command:
2274
[
2375
"/usr/bin/python",
24-
"/stackable/spark-connect-examples/python/simple-connect-app.py",
76+
"/app/example.py",
2577
"sc://spark-connect-server-default",
2678
]
2779
resources:
@@ -31,3 +83,21 @@ spec:
3183
requests:
3284
cpu: 200m
3385
memory: 128Mi
86+
env:
87+
- name: AWS_ACCESS_KEY_ID
88+
valueFrom:
89+
secretKeyRef:
90+
name: s3-credentials
91+
key: accessKey
92+
- name: AWS_SECRET_ACCESS_KEY
93+
valueFrom:
94+
secretKeyRef:
95+
name: s3-credentials
96+
key: secretKey
97+
volumeMounts:
98+
- name: spark-connect-client
99+
mountPath: /app
100+
volumes:
101+
- name: spark-connect-client
102+
configMap:
103+
name: spark-connect-client
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
mode: standalone
3+
disableWebUI: false
4+
extraEnvVars:
5+
- name: BITNAMI_DEBUG
6+
value: "true"
7+
- name: MINIO_LOG_LEVEL
8+
value: DEBUG
9+
10+
provisioning:
11+
enabled: true
12+
buckets:
13+
- name: mybucket
14+
usersExistingSecrets:
15+
- minio-users
16+
resources:
17+
requests:
18+
memory: 1Gi
19+
cpu: "512m"
20+
limits:
21+
memory: "1Gi"
22+
cpu: "1"
23+
podSecurityContext:
24+
enabled: false
25+
containerSecurityContext:
26+
enabled: false
27+
28+
volumePermissions:
29+
enabled: false
30+
31+
podSecurityContext:
32+
enabled: false
33+
34+
containerSecurityContext:
35+
enabled: false
36+
37+
persistence:
38+
enabled: false
39+
40+
resources:
41+
requests:
42+
memory: 1Gi
43+
cpu: "512m"
44+
limits:
45+
memory: "1Gi"
46+
cpu: "1"
47+
48+
service:
49+
type: NodePort

0 commit comments

Comments
 (0)