Skip to content

Commit 04ad32d

Browse files
Add Covid data exploration queries
1 parent 8222546 commit 04ad32d

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
2+
select top 10 *
3+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.csv',
4+
format='csv', parser_version='2.0', FIRSTROW = 2) as a
5+
6+
select top 10
7+
cases = json_value(doc, '$.cases'),
8+
*
9+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.jsonl',
10+
format='csv', fieldterminator ='0x0b', fieldquote = '0x0b') with (doc nvarchar(max)) as a
11+
12+
select top 10 *
13+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet',
14+
format='parquet') as a
15+
16+
17+
select continent = ISNULL(continent_exp, 'Total'), cases = sum(cases), deaths = sum(deaths)
18+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet',
19+
format='parquet') as cases
20+
group by continent_exp with rollup
21+
order by sum(cases) desc
22+
23+
select countries_and_territories, geo_id
24+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet',
25+
format='parquet') as cases
26+
where countries_and_territories like '%bul%'
27+
28+
29+
select DATE_REP, CASES, DEATHS
30+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet',
31+
format='parquet') as a
32+
where geo_id = 'RS'
33+
order by date_rep
34+
35+
-- cumulative values - running total:
36+
select DATE_REP, CASES,
37+
CUMULATIVE = SUM(CASES) OVER (ORDER BY date_rep)
38+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet',
39+
format='parquet') as a
40+
where geo_id = 'RS'
41+
order by date_rep;
42+
43+
select DATE_REP,
44+
CASES,
45+
CASES_AVG = AVG(CASES) OVER(order by date_rep ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING )
46+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet', format='parquet') as a
47+
where geo_id = 'RS' order by date_rep;
48+
49+
with diff as (
50+
select geo_id, date_rep, countries_and_territories,
51+
current_avg = AVG(CASES) OVER(partition by geo_id order by date_rep ROWS BETWEEN 7 PRECEDING AND CURRENT ROW ),
52+
prev_avg = AVG(CASES) OVER(partition by geo_id order by date_rep ROWS BETWEEN 14 PRECEDING AND 7 PRECEDING )
53+
from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet', format='parquet') as a
54+
)
55+
select country = countries_and_territories,
56+
[cases/day (this week)] = current_avg,
57+
[cases/day (prev week)] = prev_avg,
58+
[change%] = CAST( 100*(1.*current_avg / prev_avg - 1) AS NUMERIC(4,1))
59+
from diff
60+
where date_rep = CAST('2020-10-04T00:00:00.0000000' as datetime2)
61+
and current_avg > prev_avg
62+
and prev_avg > 100
63+
order by (1. * current_avg / prev_avg -1) desc

0 commit comments

Comments
 (0)