Skip to content

Commit 2f13a51

Browse files
authored
Merge pull request #74 from dbt-labs/feature/generate_model_ctes
Feature/generate_model_import_ctes
2 parents c00701b + df54f84 commit 2f13a51

11 files changed

+665
-6
lines changed

README.md

Lines changed: 107 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,22 @@
33
Macros that generate dbt code, and log it to the command line.
44

55
# Contents
6-
* [generate_source](#generate_source-source)
7-
* [generate_base_model](#generate_base_model-source)
8-
* [generate_model_yaml](#generate_model_yaml-source)
6+
- [dbt-codegen](#dbt-codegen)
7+
- [Contents](#contents)
8+
- [Installation instructions](#installation-instructions)
9+
- [Macros](#macros)
10+
- [generate_source (source)](#generate_source-source)
11+
- [Arguments](#arguments)
12+
- [Usage:](#usage)
13+
- [generate_base_model (source)](#generate_base_model-source)
14+
- [Arguments:](#arguments-1)
15+
- [Usage:](#usage-1)
16+
- [generate_model_yaml (source)](#generate_model_yaml-source)
17+
- [Arguments:](#arguments-2)
18+
- [Usage:](#usage-2)
19+
- [generate_model_import_ctes (source)](#generate_model_import_ctes-source)
20+
- [Arguments:](#arguments-3)
21+
- [Usage:](#usage-3)
922

1023
# Installation instructions
1124
New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/).
@@ -164,3 +177,94 @@ models:
164177
```
165178

166179
4. Paste the output in to a schema.yml file, and refactor as required.
180+
181+
## generate_model_import_ctes ([source](macros/generate_model_import_ctes.sql))
182+
This macro generates the SQL for a given model with all references pulled up into import CTEs, which you can then paste back into the model.
183+
184+
### Arguments:
185+
* `model_name` (required): The model you wish to generate SQL with import CTEs for.
186+
* `leading_commas` (optional, default = false): Whether you want your commas to be leading (vs trailing).
187+
188+
### Usage:
189+
1. Create a model with your original SQL query
190+
2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code
191+
192+
```
193+
{{ codegen.generate_model_import_ctes(
194+
model_name = 'my_dbt_model'
195+
) }}
196+
```
197+
198+
Alternatively, call the macro as an [operation](https://docs.getdbt.com/docs/using-operations):
199+
200+
```
201+
$ dbt run-operation generate_model_import_ctes --args '{"model_name": "my_dbt_model"}'
202+
```
203+
204+
3. The new SQL - with all references pulled up into import CTEs - will be logged to the command line
205+
206+
```
207+
with customers as (
208+
209+
select * from {{ ref('stg_customers') }}
210+
211+
),
212+
213+
orders as (
214+
215+
select * from {{ ref('stg_orders') }}
216+
217+
),
218+
219+
payments as (
220+
221+
select * from {{ ref('stg_payments') }}
222+
223+
),
224+
225+
customer_orders as (
226+
227+
select
228+
customer_id,
229+
min(order_date) as first_order,
230+
max(order_date) as most_recent_order,
231+
count(order_id) as number_of_orders
232+
from orders
233+
group by customer_id
234+
235+
),
236+
237+
customer_payments as (
238+
239+
select
240+
orders.customer_id,
241+
sum(amount) as total_amount
242+
from payments
243+
left join orders on
244+
payments.order_id = orders.order_id
245+
group by orders.customer_id
246+
247+
),
248+
249+
final as (
250+
251+
select
252+
customers.customer_id,
253+
customers.first_name,
254+
customers.last_name,
255+
customer_orders.first_order,
256+
customer_orders.most_recent_order,
257+
customer_orders.number_of_orders,
258+
customer_payments.total_amount as customer_lifetime_value
259+
from customers
260+
left join customer_orders
261+
on customers.customer_id = customer_orders.customer_id
262+
left join customer_payments
263+
on customers.customer_id = customer_payments.customer_id
264+
265+
)
266+
267+
select * from final
268+
```
269+
270+
4. Replace the contents of the model's current SQL file with the compiled or logged code

integration_tests/dbt_project.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@ clean-targets:
1818
seeds:
1919
+schema: raw_data
2020
+quote_columns: false
21+
22+
vars:
23+
my_table_reference: table_c
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
select *, 2 as col2
2+
from {{ ref('model_without_import_ctes') }} as m
3+
left join (select 2 as col_a from {{ ref('data__a_relation') }}) as a on a.col_a = m.id
4+
where id = 1
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
This is my model!
3+
*/
4+
5+
{{ config(
6+
materialized='table',
7+
) }}
8+
9+
-- I love this cte
10+
with my_first_cte as (
11+
select
12+
a.col_a,
13+
b.col_b
14+
from {{ ref('data__a_relation') }} as a
15+
left join {{ ref("data__b_relation") }} as b
16+
on a.col_a = b.col_a
17+
left join {{ ref('data__a_relation') }} as aa
18+
on a.col_a = aa.col_a
19+
),
20+
my_second_cte as (
21+
select
22+
1 as id
23+
from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table
24+
union all
25+
select
26+
2 as id
27+
from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}
28+
-- union all
29+
-- select
30+
-- 3 as id
31+
-- from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table
32+
-- union all
33+
-- select
34+
-- 4 as id
35+
-- from {{ var("my_table_reference") }}
36+
-- union all
37+
-- select
38+
-- 5 as id
39+
-- from {{ var("my_other_table_reference", "table_d") }}
40+
)
41+
-- my_third_cte as (
42+
-- select
43+
-- a.col_a,
44+
-- b.col_b
45+
-- from `raw_relation_1` as a
46+
-- left join "raw_relation_2" as b
47+
-- on a.col_a = b.col_b
48+
-- left join [raw_relation_3] as aa
49+
-- on a.col_a = aa.col_b
50+
-- left join 'raw_relation_4' as ab
51+
-- on a.col_a = ab.col_b
52+
-- left join 'my_schema'.'raw_relation_5' as ac
53+
-- on a.col_a = ac.col_b
54+
-- )
55+
select * from my_second_cte
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
{% set actual_model_with_import_ctes = codegen.generate_model_import_ctes(
2+
model_name = 'model_without_import_ctes'
3+
)
4+
%}
5+
6+
{% set expected_model_with_import_ctes %}
7+
/*
8+
This is my model!
9+
*/
10+
11+
{% raw %}{{ config(
12+
materialized='table',
13+
) }}{% endraw %}
14+
15+
with codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as (
16+
17+
select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table
18+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
19+
20+
),
21+
22+
data__a_relation as (
23+
24+
select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %}
25+
26+
),
27+
28+
data__b_relation as (
29+
30+
select * from {% raw %}{{ ref("data__b_relation") }}{% endraw %}
31+
32+
),
33+
34+
development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as (
35+
36+
select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table
37+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
38+
39+
),
40+
41+
my_other_table_reference as (
42+
43+
select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %}
44+
-- CAUTION: It's best practice to use the ref or source function instead of a var
45+
46+
),
47+
48+
my_schema_raw_relation_5 as (
49+
50+
select * from 'my_schema'.'raw_relation_5'
51+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
52+
53+
),
54+
55+
my_table_reference as (
56+
57+
select * from {% raw %}{{ var("my_table_reference") }}{% endraw %}
58+
-- CAUTION: It's best practice to use the ref or source function instead of a var
59+
60+
),
61+
62+
raw_relation_1 as (
63+
64+
select * from `raw_relation_1`
65+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
66+
67+
),
68+
69+
raw_relation_2 as (
70+
71+
select * from "raw_relation_2"
72+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
73+
74+
),
75+
76+
raw_relation_3 as (
77+
78+
select * from [raw_relation_3]
79+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
80+
81+
),
82+
83+
raw_relation_4 as (
84+
85+
select * from 'raw_relation_4'
86+
-- CAUTION: It's best practice to use the ref or source function instead of a direct reference
87+
88+
),
89+
90+
source_codegen_integration_tests__data_source_table as (
91+
92+
select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %}
93+
-- CAUTION: It's best practice to create staging layer for raw sources
94+
95+
),
96+
97+
-- I love this cte
98+
my_first_cte as (
99+
select
100+
a.col_a,
101+
b.col_b
102+
from data__a_relation as a
103+
left join data__b_relation as b
104+
on a.col_a = b.col_a
105+
left join data__a_relation as aa
106+
on a.col_a = aa.col_a
107+
),
108+
my_second_cte as (
109+
select
110+
1 as id
111+
from codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table
112+
union all
113+
select
114+
2 as id
115+
from source_codegen_integration_tests__data_source_table
116+
-- union all
117+
-- select
118+
-- 3 as id
119+
-- from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table
120+
-- union all
121+
-- select
122+
-- 4 as id
123+
-- from my_table_reference
124+
-- union all
125+
-- select
126+
-- 5 as id
127+
-- from my_other_table_reference
128+
)
129+
-- my_third_cte as (
130+
-- select
131+
-- a.col_a,
132+
-- b.col_b
133+
-- from raw_relation_1 as a
134+
-- left join raw_relation_2 as b
135+
-- on a.col_a = b.col_b
136+
-- left join raw_relation_3 as aa
137+
-- on a.col_a = aa.col_b
138+
-- left join raw_relation_4 as ab
139+
-- on a.col_a = ab.col_b
140+
-- left join my_schema_raw_relation_5 as ac
141+
-- on a.col_a = ac.col_b
142+
-- )
143+
select * from my_second_cte
144+
{% endset %}
145+
146+
{{ assert_equal (actual_model_with_import_ctes | trim, expected_model_with_import_ctes | trim) }}

0 commit comments

Comments
 (0)