1+ # Copyright 2019 Google LLC
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
115# This example demonstrates one of the most general usages of transforming raw
216# BigQuery data into a processed table using a dbt Python model with BigFrames.
317# See more from: https://cloud.google.com/bigquery/docs/dataframes-dbt.
@@ -32,7 +46,13 @@ def model(dbt, session):
3246 table = "bigquery-public-data.epa_historical_air_quality.temperature_hourly_summary"
3347
3448 # Define the specific columns to select from the BigQuery table.
35- columns = ["state_name" , "county_name" , "date_local" , "time_local" , "sample_measurement" ]
49+ columns = [
50+ "state_name" ,
51+ "county_name" ,
52+ "date_local" ,
53+ "time_local" ,
54+ "sample_measurement" ,
55+ ]
3656
3757 # Read data from the specified BigQuery table into a BigFrames DataFrame.
3858 df = session .read_gbq (table , columns = columns )
@@ -44,14 +64,16 @@ def model(dbt, session):
4464 # Group the DataFrame by 'state_name', 'county_name', and 'date_local'. For
4565 # each group, calculate the minimum and maximum of the 'sample_measurement'
4666 # column. The result will be a BigFrames DataFrame with a MultiIndex.
47- result = df .groupby (["state_name" , "county_name" , "date_local" ])["sample_measurement" ]\
48- .agg (["min" , "max" ])
67+ result = df .groupby (["state_name" , "county_name" , "date_local" ])[
68+ "sample_measurement"
69+ ].agg (["min" , "max" ])
4970
5071 # Rename some columns and convert the MultiIndex of the 'result' DataFrame
5172 # into regular columns. This flattens the DataFrame so 'state_name',
5273 # 'county_name', and 'date_local' become regular columns again.
53- result = result .rename (columns = {'min' : 'min_temperature' , 'max' : 'max_temperature' })\
54- .reset_index ()
74+ result = result .rename (
75+ columns = {"min" : "min_temperature" , "max" : "max_temperature" }
76+ ).reset_index ()
5577
5678 # Return the processed BigFrames DataFrame.
5779 # In a dbt Python model, this DataFrame will be materialized as a table
0 commit comments