From 2e0d71e5ed547fb82f79b9e0fde3bd6e93973361 Mon Sep 17 00:00:00 2001 From: Magnus Benediktsson Date: Tue, 10 Dec 2024 09:05:59 +0100 Subject: [PATCH 1/6] Handle BigQuery repeated fields data_types --- integration_tests/models/model_repeated.sql | 13 +++++ .../test_generate_model_repeated_yaml.sql | 48 +++++++++++++++++++ .../tests/test_helper_get_models.sql | 2 +- macros/vendored/dbt_core/format_column.sql | 12 +++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 integration_tests/models/model_repeated.sql create mode 100644 integration_tests/tests/test_generate_model_repeated_yaml.sql diff --git a/integration_tests/models/model_repeated.sql b/integration_tests/models/model_repeated.sql new file mode 100644 index 0000000..253c4ba --- /dev/null +++ b/integration_tests/models/model_repeated.sql @@ -0,0 +1,13 @@ +{% if target.type == "bigquery" %} + + {#--- This exists to test the BigQuery-specific behavior requested in #190 -#} + select + [1,2,3,4] as repeated_int, + [ + STRUCT(1 as int_field), + STRUCT(2 as int_field) + ] as repeated_struct + +{% else %} + select 1 as int_field +{% endif %} diff --git a/integration_tests/tests/test_generate_model_repeated_yaml.sql b/integration_tests/tests/test_generate_model_repeated_yaml.sql new file mode 100644 index 0000000..0ac7185 --- /dev/null +++ b/integration_tests/tests/test_generate_model_repeated_yaml.sql @@ -0,0 +1,48 @@ +{% set raw_schema = generate_schema_name('raw_data') %} + +{% set actual_source_yaml = codegen.generate_model_yaml( + model_names=['model_repeated'] + ) +%} + +{% if target.type == "bigquery" %} + +{% set expected_source_yaml %} +version: 2 + +models: + - name: model_repeated + description: "" + columns: + - name: repeated_int + data_type: array<{{ integer_type_value() }}> + description: "" + + - name: repeated_struct + data_type: array> + description: "" + + - name: repeated_struct.int_field + data_type: {{ integer_type_value() }} + description: "" + +{% endset %} + +{% else %} + +{% set expected_source_yaml %} +version: 2 + +models: + - name: model_repeated + description: "" + columns: + - name: int_field + data_type: {{ integer_type_value() }} + description: "" + +{% endset %} + +{% endif %} + +{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }} diff --git a/integration_tests/tests/test_helper_get_models.sql b/integration_tests/tests/test_helper_get_models.sql index d9393a3..95c4d0c 100644 --- a/integration_tests/tests/test_helper_get_models.sql +++ b/integration_tests/tests/test_helper_get_models.sql @@ -7,6 +7,6 @@ {% set actual_list = codegen.get_models(prefix='model_')|sort %} {% endif %} -{% set expected_list = ['model_data_a', 'model_from_source', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} +{% set expected_list = ['model_data_a', 'model_from_source', 'model_repeated', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} {{ assert_equal (actual_list, expected_list) }} diff --git a/macros/vendored/dbt_core/format_column.sql b/macros/vendored/dbt_core/format_column.sql index a7a6669..8addb6d 100644 --- a/macros/vendored/dbt_core/format_column.sql +++ b/macros/vendored/dbt_core/format_column.sql @@ -1,5 +1,17 @@ {% macro format_column(column) -%} + {{ return(adapter.dispatch('format_column', 'codegen')(column)) }} +{%- endmacro %} + +{# Vendored from: https://github.com/dbt-labs/dbt-adapters/blob/c7b12aee533184bad391a657d1753539d1dd496a/dbt/include/global_project/macros/relations/column/columns_spec_ddl.sql#L85-L89 #} +{% macro default__format_column(column) -%} {% set data_type = column.dtype %} {% set formatted = column.column.lower() ~ " " ~ data_type %} {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} {%- endmacro -%} + +{# Vendored from: https://github.com/dbt-labs/dbt-bigquery/blob/4d255b2f854d21d5d8871bdaa8d7ab47e7e863a3/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql#L1-L5 #} +{% macro bigquery__format_column(column) -%} + {% set data_type = column.data_type %} + {% set formatted = column.column.lower() ~ " " ~ data_type %} + {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} +{%- endmacro -%} \ No newline at end of file From 57200f4f277016177d6de8e2b61585c389d80a15 Mon Sep 17 00:00:00 2001 From: Magnus Benediktsson Date: Wed, 11 Dec 2024 13:07:54 +0100 Subject: [PATCH 2/6] include nested repated structs --- integration_tests/models/model_repeated.sql | 12 ++++++------ .../tests/test_generate_model_repeated_yaml.sql | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/integration_tests/models/model_repeated.sql b/integration_tests/models/model_repeated.sql index 253c4ba..2c08a2f 100644 --- a/integration_tests/models/model_repeated.sql +++ b/integration_tests/models/model_repeated.sql @@ -1,12 +1,12 @@ {% if target.type == "bigquery" %} {#--- This exists to test the BigQuery-specific behavior requested in #190 -#} - select - [1,2,3,4] as repeated_int, - [ - STRUCT(1 as int_field), - STRUCT(2 as int_field) - ] as repeated_struct +select + [1, 2] AS repeated_int, + [ + STRUCT(1 as nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct), + STRUCT(2 AS nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct) + ] as repeated_struct {% else %} select 1 as int_field diff --git a/integration_tests/tests/test_generate_model_repeated_yaml.sql b/integration_tests/tests/test_generate_model_repeated_yaml.sql index 0ac7185..abba9c5 100644 --- a/integration_tests/tests/test_generate_model_repeated_yaml.sql +++ b/integration_tests/tests/test_generate_model_repeated_yaml.sql @@ -15,15 +15,23 @@ models: description: "" columns: - name: repeated_int - data_type: array<{{ integer_type_value() }}> + data_type: array description: "" - name: repeated_struct - data_type: array> + data_type: array>>> description: "" - - name: repeated_struct.int_field - data_type: {{ integer_type_value() }} + - name: repeated_struct.nested_int_field + data_type: int64 + description: "" + + - name: repeated_struct.nested_repeated_struct + data_type: array> + description: "" + + - name: repeated_struct.nested_repeated_struct.string_field + data_type: string description: "" {% endset %} From 370eac54b7dd481b19e2d39a429bade5f1360d05 Mon Sep 17 00:00:00 2001 From: Magnus Benediktsson Date: Thu, 12 Dec 2024 16:15:02 +0100 Subject: [PATCH 3/6] override repeated struct data_type with array --- integration_tests/tests/test_generate_model_repeated_yaml.sql | 4 ++-- macros/vendored/dbt_core/format_column.sql | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/integration_tests/tests/test_generate_model_repeated_yaml.sql b/integration_tests/tests/test_generate_model_repeated_yaml.sql index abba9c5..401cbe7 100644 --- a/integration_tests/tests/test_generate_model_repeated_yaml.sql +++ b/integration_tests/tests/test_generate_model_repeated_yaml.sql @@ -19,7 +19,7 @@ models: description: "" - name: repeated_struct - data_type: array>>> + data_type: array description: "" - name: repeated_struct.nested_int_field @@ -27,7 +27,7 @@ models: description: "" - name: repeated_struct.nested_repeated_struct - data_type: array> + data_type: array description: "" - name: repeated_struct.nested_repeated_struct.string_field diff --git a/macros/vendored/dbt_core/format_column.sql b/macros/vendored/dbt_core/format_column.sql index 8addb6d..3688612 100644 --- a/macros/vendored/dbt_core/format_column.sql +++ b/macros/vendored/dbt_core/format_column.sql @@ -10,8 +10,12 @@ {%- endmacro -%} {# Vendored from: https://github.com/dbt-labs/dbt-bigquery/blob/4d255b2f854d21d5d8871bdaa8d7ab47e7e863a3/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql#L1-L5 #} +{# But modified to handle https://github.com/dbt-labs/dbt-codegen/issues/190 #} {% macro bigquery__format_column(column) -%} {% set data_type = column.data_type %} + {% if column.mode.lower() == "repeated" and column.dtype.lower() == "record" %} + {% set data_type = "array" %} + {% endif %} {% set formatted = column.column.lower() ~ " " ~ data_type %} {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} {%- endmacro -%} \ No newline at end of file From 69aa25da70c418b4255ec0149faceab36cc21f11 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:00:39 -0700 Subject: [PATCH 4/6] Add trailing newline --- macros/vendored/dbt_core/format_column.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/vendored/dbt_core/format_column.sql b/macros/vendored/dbt_core/format_column.sql index 3688612..2365638 100644 --- a/macros/vendored/dbt_core/format_column.sql +++ b/macros/vendored/dbt_core/format_column.sql @@ -18,4 +18,4 @@ {% endif %} {% set formatted = column.column.lower() ~ " " ~ data_type %} {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} From 8adbda7459b00da63910dcb1686d6f7bd735eca6 Mon Sep 17 00:00:00 2001 From: Magnus Benediktsson Date: Fri, 13 Dec 2024 09:39:29 +0100 Subject: [PATCH 5/6] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30e34a2..e60486e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### Fixes + +- fix data_type for BigQuery repeated fields + # dbt-codegen v0.13.1 ## What's Changed From 8a82439507ecf00443e5034c92d9d528201f3a89 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:34:18 -0700 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e60486e..30e34a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,3 @@ -### Fixes - -- fix data_type for BigQuery repeated fields - # dbt-codegen v0.13.1 ## What's Changed