Skip to content

Commit 8a2267d

Browse files
Update tutorial-data-analyst.md
1 parent 08a0b72 commit 8a2267d

File tree

1 file changed

+22
-24
lines changed

1 file changed

+22
-24
lines changed

articles/synapse-analytics/sql/tutorial-data-analyst.md

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,14 @@ In particular, you analyze the [New York City (NYC) Taxi dataset](https://azure.
1919

2020
The focus of the analysis is to find trends in changes of number of taxi rides over time. You analyze two other Azure Open Datasets ([Public Holidays](https://azure.microsoft.com/services/open-datasets/catalog/public-holidays/) and [Weather Data](https://azure.microsoft.com/services/open-datasets/catalog/noaa-integrated-surface-data/)) to understand the outliers in number of taxi rides.
2121

22-
## Create credentials
22+
## Create Data source
23+
24+
Data source object is used to reference Azure storage account where you need to analyze data. Public Azure storage don't need some credential to acces storage.
2325

2426
```sql
25-
-- There is no secret. We are using public storage account which doesn't need a secret.
26-
CREATE CREDENTIAL [https://azureopendatastorage.blob.core.windows.net/nyctlc]
27-
WITH IDENTITY='SHARED ACCESS SIGNATURE',
28-
SECRET = ''
29-
GO
30-
31-
CREATE CREDENTIAL [https://azureopendatastorage.blob.core.windows.net/holidaydatacontainer]
32-
WITH IDENTITY='SHARED ACCESS SIGNATURE',
33-
SECRET = ''
34-
GO
35-
36-
CREATE CREDENTIAL [https://azureopendatastorage.blob.core.windows.net/isdweatherdatacontainer]
37-
WITH IDENTITY='SHARED ACCESS SIGNATURE',
38-
SECRET = ''
39-
GO
27+
-- There is no credential in data surce. We are using public storage account which doesn't need a secret.
28+
CREATE EXTERNAL DATA SOURCE AzureOpenData
29+
WITH ( LOCATION = 'https://azureopendatastorage.blob.core.windows.net/')
4030
```
4131

4232
## Automatic schema inference
@@ -48,7 +38,8 @@ Let's first familiarize with the NYC Taxi data by running the following query:
4838
```sql
4939
SELECT TOP 100 * FROM
5040
OPENROWSET(
51-
BULK 'https://azureopendatastorage.blob.core.windows.net/nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
41+
BULK 'nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
42+
DATA_SOURCE = 'AzureOpenData',
5243
FORMAT='PARQUET'
5344
) AS [nyc]
5445
```
@@ -62,7 +53,8 @@ Similarly, we can query the public holidays dataset using the following query:
6253
```sql
6354
SELECT TOP 100 * FROM
6455
OPENROWSET(
65-
BULK 'https://azureopendatastorage.blob.core.windows.net/holidaydatacontainer/Processed/*.parquet',
56+
BULK 'holidaydatacontainer/Processed/*.parquet',
57+
DATA_SOURCE = 'AzureOpenData',
6658
FORMAT='PARQUET'
6759
) AS [holidays]
6860
```
@@ -78,7 +70,8 @@ SELECT
7870
TOP 100 *
7971
FROM
8072
OPENROWSET(
81-
BULK 'https://azureopendatastorage.blob.core.windows.net/isdweatherdatacontainer/ISDWeather/year=*/month=*/*.parquet',
73+
BULK 'isdweatherdatacontainer/ISDWeather/year=*/month=*/*.parquet',
74+
DATA_SOURCE = 'AzureOpenData',
8275
FORMAT='PARQUET'
8376
) AS [weather]
8477
```
@@ -99,7 +92,8 @@ SELECT
9992
COUNT(*) AS rides_per_year
10093
FROM
10194
OPENROWSET(
102-
BULK 'https://azureopendatastorage.blob.core.windows.net/nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
95+
BULK 'nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
96+
DATA_SOURCE = 'AzureOpenData',
10397
FORMAT='PARQUET'
10498
) AS [nyc]
10599
WHERE nyc.filepath(1) >= '2009' AND nyc.filepath(1) <= '2019'
@@ -128,7 +122,8 @@ SELECT
128122
COUNT(*) as rides_per_day
129123
FROM
130124
OPENROWSET(
131-
BULK 'https://azureopendatastorage.blob.core.windows.net/nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
125+
BULK 'nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
126+
DATA_SOURCE = 'AzureOpenData',
132127
FORMAT='PARQUET'
133128
) AS [nyc]
134129
WHERE nyc.filepath(1) = '2016'
@@ -156,7 +151,8 @@ WITH taxi_rides AS
156151
COUNT(*) as rides_per_day
157152
FROM
158153
OPENROWSET(
159-
BULK 'https://azureopendatastorage.blob.core.windows.net/nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
154+
BULK 'nyctlc/yellow/puYear=*/puMonth=*/*.parquet',
155+
DATA_SOURCE = 'AzureOpenData',
160156
FORMAT='PARQUET'
161157
) AS [nyc]
162158
WHERE nyc.filepath(1) = '2016'
@@ -169,7 +165,8 @@ public_holidays AS
169165
date
170166
FROM
171167
OPENROWSET(
172-
BULK 'https://azureopendatastorage.blob.core.windows.net/holidaydatacontainer/Processed/*.parquet',
168+
BULK 'holidaydatacontainer/Processed/*.parquet',
169+
DATA_SOURCE = 'AzureOpenData',
173170
FORMAT='PARQUET'
174171
) AS [holidays]
175172
WHERE countryorregion = 'United States' AND YEAR(date) = 2016
@@ -208,7 +205,8 @@ SELECT
208205
MAX(snowdepth) AS max_snowdepth
209206
FROM
210207
OPENROWSET(
211-
BULK 'https://azureopendatastorage.blob.core.windows.net/isdweatherdatacontainer/ISDWeather/year=*/month=*/*.parquet',
208+
BULK 'isdweatherdatacontainer/ISDWeather/year=*/month=*/*.parquet',
209+
DATA_SOURCE = 'AzureOpenData',
212210
FORMAT='PARQUET'
213211
) AS [weather]
214212
WHERE countryorregion = 'US' AND CAST([datetime] AS DATE) = '2016-01-23' AND stationname = 'JOHN F KENNEDY INTERNATIONAL AIRPORT'

0 commit comments

Comments
 (0)