From 644b3ed1f4e40a5b9b1dc3b964b4141f885b7696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Tue, 15 Oct 2024 19:03:10 +0100 Subject: [PATCH 1/5] Update adapters.sql --- dbt/include/bigquery/macros/adapters.sql | 37 ++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/dbt/include/bigquery/macros/adapters.sql b/dbt/include/bigquery/macros/adapters.sql index f166e5d05..1ef92fe16 100644 --- a/dbt/include/bigquery/macros/adapters.sql +++ b/dbt/include/bigquery/macros/adapters.sql @@ -3,7 +3,7 @@ {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set raw_cluster_by = config.get('cluster_by', none) -%} {%- set sql_header = config.get('sql_header', none) -%} - + {%- set table_format = config.get('table_format', 'default') -%} {%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%} {%- if partition_config.time_ingestion_partitioning -%} {%- set columns = get_columns_with_types_in_query_sql(sql) -%} @@ -23,10 +23,41 @@ {#-- cannot do contracts at the same time as time ingestion partitioning -#} {{ columns }} {% endif %} - {{ partition_by(partition_config) }} + {%- if table_format == "iceberg" and partition_config is not none-%} + {% do exceptions.raise_compiler_error("Partition by not yet available in iceberg tables, use cluster by instead") %} + {#-- PARTITION BY cannot be used in iceberg-#} + {%- else -%} + {{ partition_by(partition_config) }} + {% endif %} + {{ cluster_by(raw_cluster_by) }} - {{ bigquery_table_options(config, model, temporary) }} + {% if table_format == "iceberg" %} + + {% set base_location = config.get('base_location') %} + {%- if not base_location-%} + {% do exceptions.raise_compiler_error("base_location not found") %} + {% endif %} + {% set connection = config.get('connection') %} + {%- if not connection-%} + {% do exceptions.raise_compiler_error("Bq connection not found") %} + {% endif %} + {% set sub_path = relation.identifier %} + {% set connection = "WITH CONNECTION `"~connection~"`" %} + {#-- pass this through {{ bigquery_table_options(config, model, temporary) }}-#} + {% set options %} + OPTIONS( + file_format = 'PARQUET', + table_format = 'ICEBERG', + storage_uri = '{{base_location}}/{{sub_path}}' + ) + {%- endset -%} + + {{ connection }} + {{ options }} + + {% endif %} + {#-- PARTITION BY cannot be used with the AS query_statement clause. https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression From 048e3b22ccd9b7c589c92b1a151c809f4abbc6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Tue, 26 Nov 2024 21:54:39 +0000 Subject: [PATCH 2/5] Create Features-20241126-215421.yaml --- .changes/unreleased/Features-20241126-215421.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20241126-215421.yaml diff --git a/.changes/unreleased/Features-20241126-215421.yaml b/.changes/unreleased/Features-20241126-215421.yaml new file mode 100644 index 000000000..00020fb27 --- /dev/null +++ b/.changes/unreleased/Features-20241126-215421.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Adds Iceberg support as a new table format configuration +time: 2024-11-26T21:54:21.990317Z +custom: + Author: borjavb + Issue: "1370" From 5a95b443c818ad6edde0a8f12090107ace067ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Wed, 27 Nov 2024 23:31:26 +0000 Subject: [PATCH 3/5] Update adapters.sql --- dbt/include/bigquery/macros/adapters.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/include/bigquery/macros/adapters.sql b/dbt/include/bigquery/macros/adapters.sql index 1ef92fe16..913ebc824 100644 --- a/dbt/include/bigquery/macros/adapters.sql +++ b/dbt/include/bigquery/macros/adapters.sql @@ -38,13 +38,13 @@ {%- if not base_location-%} {% do exceptions.raise_compiler_error("base_location not found") %} {% endif %} - {% set connection = config.get('connection') %} + {% set connection = config.get('connection') %} {%- if not connection-%} {% do exceptions.raise_compiler_error("Bq connection not found") %} {% endif %} {% set sub_path = relation.identifier %} {% set connection = "WITH CONNECTION `"~connection~"`" %} - {#-- pass this through {{ bigquery_table_options(config, model, temporary) }}-#} + {#-- pass this through {{ bigquery_table_options() }}-#} {% set options %} OPTIONS( file_format = 'PARQUET', From 6bcf9fab621d72871f95dc293a997c15422c9f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Sat, 30 Nov 2024 22:44:22 +0000 Subject: [PATCH 4/5] reuse options macro --- dbt/include/bigquery/macros/adapters.sql | 28 ++----------------- .../macros/relations/table/options.sql | 22 +++++++++++++++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/dbt/include/bigquery/macros/adapters.sql b/dbt/include/bigquery/macros/adapters.sql index 913ebc824..690f613c6 100644 --- a/dbt/include/bigquery/macros/adapters.sql +++ b/dbt/include/bigquery/macros/adapters.sql @@ -24,8 +24,8 @@ {{ columns }} {% endif %} {%- if table_format == "iceberg" and partition_config is not none-%} + {#-- Nov 2024. Limitations: PARTITION BY cannot be used in iceberg-#} {% do exceptions.raise_compiler_error("Partition by not yet available in iceberg tables, use cluster by instead") %} - {#-- PARTITION BY cannot be used in iceberg-#} {%- else -%} {{ partition_by(partition_config) }} {% endif %} @@ -33,32 +33,10 @@ {{ cluster_by(raw_cluster_by) }} {% if table_format == "iceberg" %} - - {% set base_location = config.get('base_location') %} - {%- if not base_location-%} - {% do exceptions.raise_compiler_error("base_location not found") %} - {% endif %} - {% set connection = config.get('connection') %} - {%- if not connection-%} - {% do exceptions.raise_compiler_error("Bq connection not found") %} - {% endif %} - {% set sub_path = relation.identifier %} - {% set connection = "WITH CONNECTION `"~connection~"`" %} - {#-- pass this through {{ bigquery_table_options() }}-#} - {% set options %} - OPTIONS( - file_format = 'PARQUET', - table_format = 'ICEBERG', - storage_uri = '{{base_location}}/{{sub_path}}' - ) - {%- endset -%} - - {{ connection }} - {{ options }} - + {{ bigquery_iceberg_table_options(config, relation) }} + {{ bigquery_iceberg_connection(config) }} {% endif %} - {#-- PARTITION BY cannot be used with the AS query_statement clause. https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression -#} diff --git a/dbt/include/bigquery/macros/relations/table/options.sql b/dbt/include/bigquery/macros/relations/table/options.sql index 9f9b6b6d1..697ad2195 100644 --- a/dbt/include/bigquery/macros/relations/table/options.sql +++ b/dbt/include/bigquery/macros/relations/table/options.sql @@ -2,3 +2,25 @@ {% set opts = adapter.get_table_options(config, node, temporary) %} {%- do return(bigquery_options(opts)) -%} {%- endmacro -%} + +{% macro bigquery_iceberg_table_options(config, relation) %} + {% set base_location = config.get('base_location') %} + {%- if not base_location-%} + {% do exceptions.raise_compiler_error("base_location not found") %} + {% endif %} + {% set sub_path = relation.identifier %} + {% set storage_uri = '{{base_location}}/{{sub_path}}' %} + {% set opts = {'file_format': 'parquet', + 'table_format':'iceberg', + 'storage_uri':storage_uri } + %} + {%- do return(bigquery_options(opts)) -%} +{%- endmacro -%} + +{% macro bigquery_iceberg_connection(config) %} + {% set connection = config.get('connection') %} + {%- if not connection-%} + {% do exceptions.raise_compiler_error("BigLake connection not found") %} + {% endif %} + {%- return("WITH CONNECTION `"~connection~"`") %} +{%- endmacro -%} From 2cae0445aad184208c1ae68d4dfde0e17f3211cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borja=20V=C3=A1zquez-Barreiros?= Date: Sun, 1 Dec 2024 17:56:28 +0000 Subject: [PATCH 5/5] minor fixes --- dbt/include/bigquery/macros/adapters.sql | 4 ++-- .../bigquery/macros/relations/table/options.sql | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbt/include/bigquery/macros/adapters.sql b/dbt/include/bigquery/macros/adapters.sql index 690f613c6..949887052 100644 --- a/dbt/include/bigquery/macros/adapters.sql +++ b/dbt/include/bigquery/macros/adapters.sql @@ -33,8 +33,8 @@ {{ cluster_by(raw_cluster_by) }} {% if table_format == "iceberg" %} - {{ bigquery_iceberg_table_options(config, relation) }} - {{ bigquery_iceberg_connection(config) }} + {{ bigquery_iceberg_connection(config) }} + {{ bigquery_iceberg_table_options(config, relation) }} {% endif %} {#-- PARTITION BY cannot be used with the AS query_statement clause. diff --git a/dbt/include/bigquery/macros/relations/table/options.sql b/dbt/include/bigquery/macros/relations/table/options.sql index 697ad2195..f458cff23 100644 --- a/dbt/include/bigquery/macros/relations/table/options.sql +++ b/dbt/include/bigquery/macros/relations/table/options.sql @@ -9,18 +9,18 @@ {% do exceptions.raise_compiler_error("base_location not found") %} {% endif %} {% set sub_path = relation.identifier %} - {% set storage_uri = '{{base_location}}/{{sub_path}}' %} - {% set opts = {'file_format': 'parquet', - 'table_format':'iceberg', - 'storage_uri':storage_uri } + {% set storage_uri = base_location~'/'~sub_path %} + {% set opts = {'file_format':'"parquet"', + 'table_format':'"iceberg"', + 'storage_uri':'"'~storage_uri~'"' } %} {%- do return(bigquery_options(opts)) -%} {%- endmacro -%} {% macro bigquery_iceberg_connection(config) %} - {% set connection = config.get('connection') %} + {% set connection = config.get('bl_connection') %} {%- if not connection-%} {% do exceptions.raise_compiler_error("BigLake connection not found") %} {% endif %} - {%- return("WITH CONNECTION `"~connection~"`") %} + {%- do return("WITH CONNECTION `"~connection~"`") %} {%- endmacro -%}