-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathadwords_hive_ad_daily.sql
More file actions
175 lines (166 loc) · 4.14 KB
/
adwords_hive_ad_daily.sql
File metadata and controls
175 lines (166 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
-- session level configuration
set amz-assume-role-arn=${hivevar:role};
set parquet.compression=SNAPPY;
set hive.exec.compress.output=true;
set mapred.output.compression.type=BLOCK;
-- set hive.tez.auto.reducer.parallelism=true;
-- set hive.exec.reducers.bytes.per.reducer=256000000;
-- file size 256 MB for optimal query performance
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set tez.grouping.split-count=1;
-- switch schema
create schema if not exists adtech;
use adtech;
------ create mapping of existing raw data
-- ad table
create external table if not exists ${hivevar:raw_table} (
account_descriptive_name string,
ad_group_id string,
ad_group_name string,
ad_group_status string,
ad_network_type1 string,
ad_network_type2 string,
all_conversions string,
average_position string,
campaign_id string,
campaign_name string,
campaign_status string,
clicks string,
conversions string,
cost string,
creative_approval_status string,
creative_destination_url string,
creative_final_app_urls string,
creative_final_mobile_urls string,
creative_final_urls string,
creative_tracking_url_template string,
creative_url_custom_parameters string,
criterion_id string,
criterion_type string,
customer_descriptive_name string,
report_date string,
description string,
description1 string,
description2 string,
device string,
device_preference string,
display_url string,
external_customer_id string,
headline string,
headline_part1 string,
headline_part2 string,
id string,
impressions string,
is_negative string,
label_ids string,
labels string,
status string,
view_through_conversions string
)
PARTITIONED BY (year string, month string, day string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
LOCATION '${hivevar:raw_path}'
TBLPROPERTIES('skip.header.line.count'='1');
msck repair table ${hivevar:raw_table};
------ create schema for production data
-- ad parquet
create external table if not exists ${hivevar:table} (
account_descriptive_name string,
ad_group_id string,
ad_group_name string,
ad_group_status string,
ad_network_type1 string,
ad_network_type2 string,
all_conversions double,
average_position double,
campaign_id string,
campaign_name string,
campaign_status string,
clicks int,
conversions double,
cost bigint,
creative_approval_status string,
creative_destination_url string,
creative_final_app_urls string,
creative_final_mobile_urls string,
creative_final_urls string,
creative_tracking_url_template string,
creative_url_custom_parameters string,
criterion_id string,
criterion_type string,
customer_descriptive_name string,
report_date string,
description string,
description1 string,
description2 string,
device string,
device_preference string,
display_url string,
external_customer_id string,
headline string,
headline_part1 string,
headline_part2 string,
id string,
impressions int,
is_negative string,
label_ids string,
labels string,
status string,
view_through_conversions int
)
PARTITIONED BY (year string, month string, day string)
STORED as PARQUET
LOCATION '${hivevar:prod_path}'
TBLPROPERTIES('parquet.compress'='SNAPPY');
------ write data to parquet
-- ad
insert overwrite table ${hivevar:table} partition (year, month, day)
select
account_descriptive_name,
ad_group_id,
ad_group_name,
ad_group_status,
ad_network_type1,
ad_network_type2,
all_conversions,
average_position,
campaign_id,
campaign_name,
campaign_status,
clicks,
conversions,
cost,
creative_approval_status,
creative_destination_url,
creative_final_app_urls,
creative_final_mobile_urls,
creative_final_urls,
creative_tracking_url_template,
creative_url_custom_parameters,
criterion_id,
criterion_type,
customer_descriptive_name,
report_date,
description,
description1,
description2,
device,
device_preference,
display_url,
external_customer_id,
headline,
headline_part1,
headline_part2,
id,
impressions,
is_negative,
label_ids,
labels,
status,
view_through_conversions,
year,
month,
day
from ${hivevar:raw_table}
where year = '${hivevar:yr}' and month = '${hivevar:mth}' and day = '${hivevar:dt}';