|
| 1 | +with |
| 2 | +raw_data as ( |
| 3 | +select DATE_REP, CASES, |
| 4 | + DEATHS = LAG(DEATHS, 9) OVER (ORDER BY date_rep desc) |
| 5 | +from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet', |
| 6 | + format='parquet') as a |
| 7 | +where geo_id = 'UK' |
| 8 | +and date_rep between '2020-03-03' and '2020-06-06' |
| 9 | +), |
| 10 | +data as ( select x = CAST(CASES AS BIGINT), y = CAST(DEATHS AS INT) FROM raw_data ) |
| 11 | +select PearsonsR = (Avg(x * y) - (Avg(x) * Avg(y))) / (StDevP(x) * StDevP(y)) |
| 12 | +from data |
| 13 | + |
| 14 | + |
| 15 | + |
| 16 | +with |
| 17 | +raw_data as ( |
| 18 | + select geo_id, date_rep, countries_and_territories, |
| 19 | + deaths = AVG(deaths) OVER(partition by geo_id order by date_rep ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ), |
| 20 | + cases = AVG(cases) OVER(partition by geo_id order by date_rep desc ROWS BETWEEN 11 PRECEDING AND 7 PRECEDING ) |
| 21 | + from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet', format='parquet') as a |
| 22 | +), |
| 23 | +data as ( |
| 24 | + select |
| 25 | + x = CAST(cases AS BIGINT), |
| 26 | + y = CAST(deaths AS BIGINT) |
| 27 | + from raw_data where cases > 100 and deaths > 10 |
| 28 | +) |
| 29 | +SELECT PearsonsR = (Avg(x * y) - (Avg(x) * Avg(y))) / (StDevP(x) * StDevP(y)), |
| 30 | + SpearmanRho = 1 - (6 * SUM(POWER(x - y, 2))) / CONVERT(NUMERIC(36, 2), (COUNT(*) * (POWER(COUNT_BIG(*), 2) - 1))) |
| 31 | +FROM data; |
| 32 | + |
| 33 | +--Kendall's rank correlation sample estimate τ |
| 34 | +with |
| 35 | +raw_data as ( |
| 36 | + select geo_id, date_rep, countries_and_territories, |
| 37 | + deaths = AVG(deaths) OVER(partition by geo_id order by date_rep ROWS BETWEEN 3 PRECEDING AND CURRENT ROW ), |
| 38 | + cases = AVG(cases) OVER(partition by geo_id order by date_rep desc ROWS BETWEEN 11 PRECEDING AND 7 PRECEDING ) |
| 39 | + from openrowset(bulk 'https://pandemicdatalake.blob.core.windows.net/public/curated/covid-19/ecdc_cases/latest/ecdc_cases.parquet', format='parquet') as a |
| 40 | +), |
| 41 | +data as ( |
| 42 | + select |
| 43 | + x = CAST(cases AS BIGINT), |
| 44 | + y = CAST(deaths AS BIGINT), |
| 45 | + class = geo_id, |
| 46 | + id = date_rep |
| 47 | + from raw_data where cases > 100 and deaths > 10 |
| 48 | +) |
| 49 | +SELECT |
| 50 | + CONVERT(NUMERIC(8,2),(SUM(CASE WHEN (i.x < j.x AND i.y < j.y) OR (i.x > j.x AND i.y > j.y) THEN 1 ELSE 0 END)) -- concordant |
| 51 | + - SUM(CASE WHEN (i.x < j.x AND i.y > j.y) OR (i.x > j.x AND i.y < j.y) THEN 1 ELSE 0 END)) -- discordant |
| 52 | + /COUNT(*) AS Tau |
| 53 | +FROM data i CROSS JOIN data j |
| 54 | +WHERE i.class = j.class |
| 55 | +AND i.id<>j.id |
0 commit comments