@@ -37,3 +37,76 @@ services:
3737 depends_on :
3838 - spark_master
3939` ` `
40+
41+ ## Python library
42+
43+ -> [Minio](../../dev_ops/services/minio.md) as local s3 service
44+
45+ ### Apache Iceberg integration
46+
47+ ` ` ` python
48+ from pyspark.sql import SparkSession
49+
50+ spark = (
51+ SparkSession.builder.master('spark://localhost:7077')
52+ .config(
53+ ' spark.jars.packages' ,
54+ ' org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.1,' ' org.apache.iceberg:iceberg-aws-bundle:1.7.1,' ' org.postgresql:postgresql:42.7.4' ,
55+ )
56+ .config('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions')
57+ .config('spark.sql.catalog.my_catalog', 'org.apache.iceberg.spark.SparkCatalog')
58+ .config('spark.sql.catalog.my_catalog.type', 'hadoop')
59+ .config('spark.sql.catalog.my_catalog.type', 'jdbc')
60+ .config('spark.sql.catalog.my_catalog.uri', 'jdbc:postgresql://localhost:5500/postgres')
61+ .config('spark.sql.catalog.my_catalog.jdbc.user', 'postgres')
62+ .config('spark.sql.catalog.my_catalog.jdbc.password', 'postgres')
63+ .config('spark.sql.catalog.my_catalog.io-impl', 'org.apache.iceberg.aws.s3.S3FileIO')
64+ .config('spark.sql.catalog.my_catalog.warehouse', 's3://data-lakehouse')
65+ .config('spark.sql.catalog.my_catalog.s3.region', 'us-east-1')
66+ .config('spark.sql.catalog.my_catalog.s3.endpoint', 'http://YOUR_IP_ADDRESS:5561')
67+ .config('spark.sql.catalog.my_catalog.s3.access-key-id', 'admin')
68+ .config('spark.sql.catalog.my_catalog.s3.secret-access-key', 'password')
69+ .getOrCreate()
70+ )
71+
72+ spark.sql('CREATE TABLE my_catalog.table (name string) USING iceberg;')
73+ spark.sql("INSERT INTO my_catalog.table VALUES ('Alex'), ('Dipankar'), ('Jason')")
74+ ```
75+
76+ ### Apache Iceberg + Sedona
77+
78+ ``` python
79+ from sedona.spark import SedonaContext
80+
81+ spark = (
82+ SedonaContext.builder()
83+ .master(' spark://localhost:7077' )
84+ .config(
85+ ' spark.jars.packages' ,
86+ # sedona
87+ ' org.apache.sedona:sedona-spark-3.5_2.12:1.7.0,'
88+ ' org.datasyslab:geotools-wrapper:1.7.0-28.5,'
89+ # iceberg
90+ ' org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.7.1,'
91+ ' org.apache.iceberg:iceberg-aws-bundle:1.7.1,'
92+ ' org.postgresql:postgresql:42.7.4' ,
93+ )
94+ .config(' spark.sql.extensions' , ' org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions' )
95+ .config(' spark.sql.catalog.my_catalog' , ' org.apache.iceberg.spark.SparkCatalog' )
96+ .config(' spark.sql.catalog.my_catalog.type' , ' jdbc' )
97+ .config(' spark.sql.catalog.my_catalog.uri' , ' jdbc:postgresql://localhost:5500/postgres' )
98+ .config(' spark.sql.catalog.my_catalog.jdbc.user' , ' postgres' )
99+ .config(' spark.sql.catalog.my_catalog.jdbc.password' , ' postgres' )
100+ .config(' spark.sql.catalog.my_catalog.io-impl' , ' org.apache.iceberg.aws.s3.S3FileIO' )
101+ .config(' spark.sql.catalog.my_catalog.warehouse' , ' s3://data-lakehouse' )
102+ .config(' spark.sql.catalog.my_catalog.s3.region' , ' us-east-1' )
103+ .config(' spark.sql.catalog.my_catalog.s3.endpoint' , ' http://YOUR_IP_ADDRESS:5561' )
104+ .config(' spark.sql.catalog.my_catalog.s3.access-key-id' , ' admin' )
105+ .config(' spark.sql.catalog.my_catalog.s3.secret-access-key' , ' password' )
106+ .config(' spark.sql.catalog.my_catalog.s3.path-style-access' , ' true' )
107+ .getOrCreate()
108+ )
109+
110+ spark.sql(' CREATE TABLE my_catalog.table8 (name string) USING iceberg;' )
111+ spark.sql(" INSERT INTO my_catalog.table8 VALUES ('Alex'), ('Dipankar'), ('Jason')" )
112+ ```
0 commit comments