Skip to content

Commit 7adc1cc

Browse files
committed
do more stuff nested namespaces for iceberg, make everything tables and make replacement logic where none could be found by default. Why is there no sane defaults for spark???
1 parent 3014165 commit 7adc1cc

File tree

6 files changed

+58
-33
lines changed

6 files changed

+58
-33
lines changed

nessie-stack/jaffle-shop-classic-spark/dbt_project.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
name: 'jaffle_shop'
22

3+
34
config-version: 2
45
version: '0.1'
56

@@ -21,7 +22,18 @@ require-dbt-version: [">=1.0.0", "<2.0.0"]
2122

2223
models:
2324
jaffle_shop:
25+
+on_table_exists: replace
2426
+table_format: iceberg
2527
materialized: table
2628
staging:
27-
materialized: view
29+
+schema: stg
30+
materialized: table
31+
32+
seeds:
33+
jaffle_shop:
34+
+quote_columns: false
35+
+pre_hook: "DROP TABLE IF EXISTS {{ this }}"
36+
+schema: raw
37+
+file_format: iceberg
38+
+materialized: seed
39+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{% macro generate_schema_name(custom_schema_name, node) -%}
2+
3+
{%- set default_schema = target.schema -%}
4+
{%- if custom_schema_name is none -%}
5+
6+
{{ default_schema }}
7+
8+
{%- else -%}
9+
10+
{{ default_schema }}.{{ custom_schema_name | trim }}
11+
12+
{%- endif -%}
13+
14+
{%- endmacro %}

nessie-stack/jaffle-shop-classic-spark/models/employees2.sql

Lines changed: 0 additions & 10 deletions
This file was deleted.

nessie-stack/jaffle-shop-classic-spark/models/sources.yml

Lines changed: 0 additions & 9 deletions
This file was deleted.

nessie-stack/jaffle-shop-classic-spark/models/staging/stg_customers.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
{{
2+
config(
3+
materialized='table',
4+
file_format='iceberg'
5+
)
6+
}}
7+
18
with source as (
29

310
{#-

nessie-stack/notebooks/test.ipynb

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"name": "stderr",
1010
"output_type": "stream",
1111
"text": [
12-
"25/03/21 22:59:45 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
12+
"25/03/22 00:13:18 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n"
1313
]
1414
}
1515
],
@@ -28,13 +28,14 @@
2828
"data": {
2929
"text/plain": [
3030
"[('spark.sql.catalog.nessie', 'org.apache.iceberg.spark.SparkCatalog'),\n",
31+
" ('spark.app.submitTime', '1742601784570'),\n",
3132
" ('spark.sql.catalog.nessie.uri', 'http://nessie:19120/api/v1'),\n",
3233
" ('spark.hadoop.fs.s3a.path.style.access', 'true'),\n",
33-
" ('spark.app.id', 'local-1742597984957'),\n",
3434
" ('spark.sql.catalog.nessie.ref', 'main'),\n",
35+
" ('spark.app.startTime', '1742601784818'),\n",
36+
" ('spark.driver.port', '39441'),\n",
3537
" ('spark.sql.warehouse.dir',\n",
3638
" 'file:/home/iceberg/notebooks/notebooks/spark-warehouse'),\n",
37-
" ('spark.driver.port', '40237'),\n",
3839
" ('spark.hadoop.fs.s3a.access.key', 'minioadmin'),\n",
3940
" ('spark.sql.catalog.nessie.s3.path-style-access', 'true'),\n",
4041
" ('spark.serializer.objectStreamReset', '100'),\n",
@@ -49,31 +50,30 @@
4950
" 'file:///root/.ivy2/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,file:///root/.ivy2/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
5051
" ('spark.driver.extraJavaOptions',\n",
5152
" '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'),\n",
52-
" ('spark.driver.host', '41c5f4f73c86'),\n",
53+
" ('spark.app.id', 'local-1742601785395'),\n",
5354
" ('spark.sql.catalog.nessie.s3.endpoint', 'http://minio:9000'),\n",
5455
" ('spark.repl.local.jars',\n",
5556
" 'file:///root/.ivy2/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,file:///root/.ivy2/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
5657
" ('spark.executor.id', 'driver'),\n",
57-
" ('spark.app.submitTime', '1742597984280'),\n",
58+
" ('spark.driver.host', '876c347e2cee'),\n",
5859
" ('spark.app.name', 'PySparkShell'),\n",
5960
" ('spark.hadoop.fs.s3a.impl', 'org.apache.hadoop.fs.s3a.S3AFileSystem'),\n",
6061
" ('spark.sql.catalogImplementation', 'hive'),\n",
6162
" ('spark.sql.catalog.nessie.io-impl', 'org.apache.iceberg.aws.s3.S3FileIO'),\n",
6263
" ('spark.submit.pyFiles',\n",
6364
" '/root/.ivy2/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,/root/.ivy2/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
6465
" ('spark.rdd.compress', 'True'),\n",
65-
" ('spark.app.initial.jar.urls',\n",
66-
" 'spark://41c5f4f73c86:40237/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar,spark://41c5f4f73c86:40237/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar'),\n",
6766
" ('spark.executor.extraJavaOptions',\n",
6867
" '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'),\n",
6968
" ('spark.sql.catalog.nessie.warehouse', 's3a://warehouse'),\n",
7069
" ('spark.jars',\n",
7170
" 'file:///root/.ivy2/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,file:///root/.ivy2/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
72-
" ('spark.app.startTime', '1742597984489'),\n",
7371
" ('spark.hadoop.fs.s3a.endpoint', 'http://minio:9000'),\n",
7472
" ('spark.files',\n",
7573
" 'file:///root/.ivy2/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,file:///root/.ivy2/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
7674
" ('spark.sql.catalog.nessie.type', 'nessie'),\n",
75+
" ('spark.app.initial.jar.urls',\n",
76+
" 'spark://876c347e2cee:39441/jars/org.apache.iceberg_iceberg-spark-runtime-3.5_2.12-1.8.1.jar,spark://876c347e2cee:39441/jars/org.projectnessie.nessie-integrations_nessie-spark-extensions-3.5_2.12-0.103.2.jar'),\n",
7777
" ('spark.ui.showConsoleProgress', 'true')]"
7878
]
7979
},
@@ -88,16 +88,27 @@
8888
},
8989
{
9090
"cell_type": "code",
91-
"execution_count": 4,
91+
"execution_count": 3,
9292
"metadata": {},
9393
"outputs": [
94+
{
95+
"name": "stderr",
96+
"output_type": "stream",
97+
"text": [
98+
"25/03/22 00:13:22 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n",
99+
"25/03/22 00:13:22 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n",
100+
"25/03/22 00:13:22 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n",
101+
"25/03/22 00:13:22 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n",
102+
"25/03/22 00:13:23 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException\n"
103+
]
104+
},
94105
{
95106
"data": {
96107
"text/plain": [
97108
"DataFrame[]"
98109
]
99110
},
100-
"execution_count": 4,
111+
"execution_count": 3,
101112
"metadata": {},
102113
"output_type": "execute_result"
103114
}
@@ -108,7 +119,7 @@
108119
},
109120
{
110121
"cell_type": "code",
111-
"execution_count": 11,
122+
"execution_count": 4,
112123
"metadata": {},
113124
"outputs": [
114125
{
@@ -118,7 +129,7 @@
118129
"traceback": [
119130
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
120131
"\u001b[0;31mAnalysisException\u001b[0m Traceback (most recent call last)",
121-
"Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcreate namespace default\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
132+
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mspark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcreate namespace default\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
122133
"File \u001b[0;32m/opt/spark/python/pyspark/sql/session.py:1631\u001b[0m, in \u001b[0;36mSparkSession.sql\u001b[0;34m(self, sqlQuery, args, **kwargs)\u001b[0m\n\u001b[1;32m 1627\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1628\u001b[0m litArgs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jvm\u001b[38;5;241m.\u001b[39mPythonUtils\u001b[38;5;241m.\u001b[39mtoArray(\n\u001b[1;32m 1629\u001b[0m [_to_java_column(lit(v)) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m (args \u001b[38;5;129;01mor\u001b[39;00m [])]\n\u001b[1;32m 1630\u001b[0m )\n\u001b[0;32m-> 1631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jsparkSession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[43msqlQuery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlitArgs\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1632\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(kwargs) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
123134
"File \u001b[0;32m/opt/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1316\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1318\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1319\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1321\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1322\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1323\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(temp_arg, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_detach\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n",
124135
"File \u001b[0;32m/opt/spark/python/pyspark/errors/exceptions/captured.py:185\u001b[0m, in \u001b[0;36mcapture_sql_exception.<locals>.deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 181\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(converted, UnknownException):\n\u001b[1;32m 183\u001b[0m \u001b[38;5;66;03m# Hide where the exception came from that shows a non-Pythonic\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;66;03m# JVM exception message.\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m converted \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n",
@@ -153,7 +164,7 @@
153164
},
154165
{
155166
"cell_type": "code",
156-
"execution_count": 13,
167+
"execution_count": 5,
157168
"metadata": {},
158169
"outputs": [
159170
{

0 commit comments

Comments
 (0)