1
+ # Copyright 2019 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
# This example demonstrates one of the most general usages of transforming raw
2
16
# BigQuery data into a processed table using a dbt Python model with BigFrames.
3
17
# See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt.
@@ -32,7 +46,13 @@ def model(dbt, session):
32
46
table = "bigquery-public-data.epa_historical_air_quality.temperature_hourly_summary"
33
47
34
48
# Define the specific columns to select from the BigQuery table.
35
- columns = ["state_name" , "county_name" , "date_local" , "time_local" , "sample_measurement" ]
49
+ columns = [
50
+ "state_name" ,
51
+ "county_name" ,
52
+ "date_local" ,
53
+ "time_local" ,
54
+ "sample_measurement" ,
55
+ ]
36
56
37
57
# Read data from the specified BigQuery table into a BigFrames DataFrame.
38
58
df = session .read_gbq (table , columns = columns )
@@ -44,14 +64,16 @@ def model(dbt, session):
44
64
# Group the DataFrame by 'state_name', 'county_name', and 'date_local'. For
45
65
# each group, calculate the minimum and maximum of the 'sample_measurement'
46
66
# column. The result will be a BigFrames DataFrame with a MultiIndex.
47
- result = df .groupby (["state_name" , "county_name" , "date_local" ])["sample_measurement" ]\
48
- .agg (["min" , "max" ])
67
+ result = df .groupby (["state_name" , "county_name" , "date_local" ])[
68
+ "sample_measurement"
69
+ ].agg (["min" , "max" ])
49
70
50
71
# Rename some columns and convert the MultiIndex of the 'result' DataFrame
51
72
# into regular columns. This flattens the DataFrame so 'state_name',
52
73
# 'county_name', and 'date_local' become regular columns again.
53
- result = result .rename (columns = {'min' : 'min_temperature' , 'max' : 'max_temperature' })\
54
- .reset_index ()
74
+ result = result .rename (
75
+ columns = {"min" : "min_temperature" , "max" : "max_temperature" }
76
+ ).reset_index ()
55
77
56
78
# Return the processed BigFrames DataFrame.
57
79
# In a dbt Python model, this DataFrame will be materialized as a table
0 commit comments