Skip to content

Commit ab01b0a

Browse files
authored
fix: add license header and correct issues in dbt sample (#1931)
1 parent e5ff8f7 commit ab01b0a

File tree

4 files changed

+72
-11
lines changed

4 files changed

+72
-11
lines changed

samples/dbt/.dbt.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
dbt_sample_project:
216
outputs:
317
dev: # The target environment name (e.g., dev, prod)

samples/dbt/dbt_sample_project/dbt_project.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114

215
# Name your project! Project names should contain only lowercase characters
316
# and underscores. A good package name should reflect your organization's

samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_1.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
# This example demonstrates one of the most general usages of transforming raw
216
# BigQuery data into a processed table using a dbt Python model with BigFrames.
317
# See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt.
@@ -32,7 +46,13 @@ def model(dbt, session):
3246
table = "bigquery-public-data.epa_historical_air_quality.temperature_hourly_summary"
3347

3448
# Define the specific columns to select from the BigQuery table.
35-
columns = ["state_name", "county_name", "date_local", "time_local", "sample_measurement"]
49+
columns = [
50+
"state_name",
51+
"county_name",
52+
"date_local",
53+
"time_local",
54+
"sample_measurement",
55+
]
3656

3757
# Read data from the specified BigQuery table into a BigFrames DataFrame.
3858
df = session.read_gbq(table, columns=columns)
@@ -44,14 +64,16 @@ def model(dbt, session):
4464
# Group the DataFrame by 'state_name', 'county_name', and 'date_local'. For
4565
# each group, calculate the minimum and maximum of the 'sample_measurement'
4666
# column. The result will be a BigFrames DataFrame with a MultiIndex.
47-
result = df.groupby(["state_name", "county_name", "date_local"])["sample_measurement"]\
48-
.agg(["min", "max"])
67+
result = df.groupby(["state_name", "county_name", "date_local"])[
68+
"sample_measurement"
69+
].agg(["min", "max"])
4970

5071
# Rename some columns and convert the MultiIndex of the 'result' DataFrame
5172
# into regular columns. This flattens the DataFrame so 'state_name',
5273
# 'county_name', and 'date_local' become regular columns again.
53-
result = result.rename(columns={'min': 'min_temperature', 'max': 'max_temperature'})\
54-
.reset_index()
74+
result = result.rename(
75+
columns={"min": "min_temperature", "max": "max_temperature"}
76+
).reset_index()
5577

5678
# Return the processed BigFrames DataFrame.
5779
# In a dbt Python model, this DataFrame will be materialized as a table

samples/dbt/dbt_sample_project/models/example/dbt_bigframes_code_sample_2.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
# This example demonstrates how to build an **incremental dbt Python model**
216
# using BigFrames.
3-
#
17+
#
418
# Incremental models are essential for efficiently processing large datasets by
519
# only transforming new or changed data, rather than reprocessing the entire
620
# dataset every time. If the target table already exists, dbt will perform a
@@ -13,8 +27,6 @@
1327
# directly within BigQuery, leveraging BigQuery's scalability.
1428

1529

16-
import bigframes.pandas as bpd
17-
1830
def model(dbt, session):
1931
# Optional: override settings from dbt_project.yml.
2032
# When both are set, dbt.config takes precedence over dbt_project.yml.
@@ -24,9 +36,9 @@ def model(dbt, session):
2436
submission_method="bigframes",
2537
# Materialize this model as an 'incremental' table. This tells dbt to
2638
# only process new or updated data on subsequent runs.
27-
materialized='incremental',
39+
materialized="incremental",
2840
# Use MERGE strategy to update rows during incremental runs.
29-
incremental_strategy='merge',
41+
incremental_strategy="merge",
3042
# Define the composite key that uniquely identifies a row in the
3143
# target table. This key is used by the 'merge' strategy to match
3244
# existing rows for updates during incremental runs.
@@ -41,7 +53,7 @@ def model(dbt, session):
4153
# Define a BigFrames UDF to generate a temperature description.
4254
# BigFrames UDFs allow you to define custom Python logic that executes
4355
# directly within BigQuery. This is powerful for complex transformations.
44-
@bpd.udf(dataset='dbt_sample_dataset', name='describe_udf')
56+
@session.udf(dataset="dbt_sample_dataset", name="describe_udf")
4557
def describe(
4658
max_temperature: float,
4759
min_temperature: float,

0 commit comments

Comments
 (0)