-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOperation Analytics and Investigating Metric Spike Queries.sql
More file actions
308 lines (193 loc) · 8.21 KB
/
Operation Analytics and Investigating Metric Spike Queries.sql
File metadata and controls
308 lines (193 loc) · 8.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
--Case Study 1 (Job Data)
--Number of jobs reviewed: Amount of jobs reviewed over time.
-- Calculate the number of jobs reviewed per hour per day for November 2020
select Date , count(job_id) as [Jobs/Day], Round(sum(time_spent)/3600,2) as [Hours Spent]
from dw.jobs
where date between '2020-11-01' and '2020-11-30'
group by date
order by date
--Throughput: It is the no. of events happening per second.
--Let’s say the above metric is called throughput.
--Calculate 7 day rolling average of throughput? For throughput, do you prefer daily metric or 7-day rolling and why?
select A.*,
avg(THROUGhPUT) over(partition by JOB_ID order by DATE, date rows between 6 preceding and current row) as Last7Days_Rolling_Avg
from
(
select round(COUNT(event)/sum(time_spent),4) as Throughput , job_id ,CAST(Date AS DATE) AS Date from dw.jobs
group by date , job_id) A
order by date
--Percentage share of each language: Share of each language for different contents.
-- Calculate the percentage share of each language in the last 30 days?
SELECT Language, count(Language) *100 / sum(count(Language)) OVER () as 'Percentage'
FROM dw.jobs
where date between '2020-11-01' and '2020-11-30'
GROUP BY Language
SELECT Language , round(count(language)*100.0 / (select count(language) as TotalLanguage from dw.jobs),1) as [Percentage Share]
from dw.jobs
group by language
--How will you display duplicates from the table?
select * , ROW_NUMBER() over ( partition by job_id, actor_id order by date ) as Rank from dw.jobs
-- Investigating Metric Spike
--User Engagement: To measure the activeness of a user. Measuring if the user finds quality in a product/service.
--Calculate the weekly user engagement?
select count (distinct(user_id)) as [Active Users] from dw.users
where state IN ('active')
select count (distinct(user_id)) as [Active Users] from dw.users
where state IN ('pending')
select count (distinct(user_id)) as [Total Users] from dw.users
--Weekly user engagement
select count (distinct(user_id)) as [User Engagement] , DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) as Week from dw.events
group by DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0)
ORDER BY 2
--OR
select count(distinct(user_id)) as [Users Engagement] , datepart( week ,occurred_at) as Week from dw.events
group by datepart( week ,occurred_at)
order by 2
-- Weekly Engagement: To measure the activeness of a user. Measuring if the user finds quality in a product/service weekly.
-- Calculate the weekly engagement per device?
-- weekly engagement per device
select device ,count (distinct(user_id)) as [User Engagement] ,
DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) as Week from dw.events
group by DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) , device
ORDER BY device , week
-- or
select device , count(distinct(user_id)) as [Users Engagement] , datepart( week ,occurred_at) as Week from dw.events
group by datepart( week ,occurred_at) , device
order by 1,3
select device ,count (distinct(user_id)) as [User Engagement] ,
DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) as Week from dw.events
group by DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) , device
ORDER BY device , week
--Amount of users growing over time for a product.
-- Calculate the user growth for product?
select week,count(distinct(user_id)) as Counts ,
count(distinct(user_id)) - lag(count(distinct(user_id)),1) over (order by week) as Growth from
(
select user_id , DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) as Week from dw.events
) a
group by Week
-- user-signup weekly cohort
select * from
(
select users, signedupweek , DATEDIFF(week, signedupweek,occurred_at) as weekssincesignedup from
(
select user_id as users, DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0) as occurred_at from dw.events
group by user_id,DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(OCcurred_at AS DATE)),0)
)a
,
(
select min(DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(occurred_at AS DATE)),0)) as signedupweek ,USER_ID
from dw.events
group by user_id
) b
where a.users=b.user_id
)mm
pivot
( count(users)
for weekssincesignedup in( [0],[1] ,
[2] ,
[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18])
)b
order by signedupweek
--Email Engagement: Users engaging with the email service.
-- Calculate the email engagement metrics?
select ACTION , COUNT( ACTION) as [Email Engagements] , MoNTH from
(
select * , DATEADD(month,DATEDIFF(month, 0,CAST(OCcurred_at AS DATE)),0) as Month from dw.EMAIL_EVENTS
)a
group by ACTION , mONTH
ORDER BY 2
---------------------------------------------------------------------------------------------------------------------
-- miscellaneous
with one as (
SELECT
*,
datepart(MONTH , occurred_at) as month,
CASE WHEN LEAD (ACTION, 1 ) OVER( PARTITION BY USER_ID ORDER BY OCCURRED_AT ASC ) = 'email_open' THEN 1 ELSE 0 END AS opened_email,
CASE WHEN LEAD (ACTION, 2 ) OVER( PARTITION BY USER_ID ORDER BY OCCURRED_AT ASC ) = 'email_clickthrough' THEN 1 ELSE 0 END AS clicked_email
FROM
dw.eMAIL_EVENTS
)
SELECT
action,
month,
count(action),
sum(opened_email) as num_open,
sum(clicked_email) as num_clicked
FROM
one
WHERE action in ('sent_weekly_digest','sent_reengagement_email')
GROUP BY
action,
month
ORDER BY
action,
month
--select * from dw.events
--where user_id=14032
--select * from dw.users
--where user_id=14032
select distinct a.user_id from
(
select * from dw.events
where cast(occurred_at as date) between '2014-05-01' and '2014-05-31'
) a
join
(
select * from dw.users
where cast(created_at as date) between '2014-05-01' and '2014-05-31' and state in ('active')
) b on a.user_id=b.user_id
--weekly user-signup cohort for the month of may 2014 from the first signed up date
IF OBJECT_ID('tempdb.dbo.#retetnion') IS NOT NULL DROP TABLE #retention
select * into #retention from
(
select distinct weeksssincesignedup, users , cast(signedupweek as date) as signedupweek from
(
select DATEDIFF(week, signedupweek,occurred_at) as weeksssincesignedup ,* from
(
select DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(u.created_at AS DATE)),0) as signedupweek , u.user_id as users, e.user_id,
DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(e.OCcurred_at AS DATE)),0) as occurred_at from dw.events e join dw.users u
on u.user_id=e.user_id
where u.state in ('Active')
) m
where signedupweek between '2014-05-01' and '2014-05-31' and occurred_at between '2014-05-01' and '2014-05-31'
)mm
)mmm
pivot
( count(users)
for weeksssincesignedup in( [0],[1] ,
[2] ,
[3])
)b
order by 1
--user-signup weekly cohort
select* from #retention
order by 1
-- In percetage
select signedupweek ,( 1.0* [0]/[0]*100) as '2014-05-05' ,
(1.0*[1]/[0]*100 ) as '2014-05-12', (1.0*[2]/[0]*100 )as '2014-05-19' ,
(1.0* [3]/[0]*100 ) as '2014-05-26' from #retention
order by 1
--weekly user-signup cohort analysis for 4 months data from the first signed up date
select * from
(
select distinct weekssincesignedup, users , cast(signedupweek as date) as signedupweek from
(
select DATEDIFF(week, signedupweek,occurred_at) as weekssincesignedup ,* from
(
select DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(u.created_at AS DATE)),0) as signedupweek , u.user_id as users, e.user_id,
DATEADD(WEEK,DATEDIFF(WEEK, 0,CAST(e.OCcurred_at AS DATE)),0) as occurred_at from dw.events e join dw.users u
on u.user_id=e.user_id
where u.state in ('Active')
) m
where signedupweek between
'2014-05-01' and '2014-08-31' and occurred_at between '2014-05-01' and '2014-08-31'
)mm
)mmm
pivot
( count(users)
for weekssincesignedup in( [0],[1] ,
[2] ,
[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16])
)b
order by signedupweek