Skip to content

Commit 1c78f8c

Browse files
Fix notebook format issues
1 parent 0a9b886 commit 1c78f8c

File tree

2 files changed

+895
-2
lines changed

2 files changed

+895
-2
lines changed
Lines changed: 368 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,368 @@
1-
{"cells":[{"cell_type":"markdown","source":["# PySpark Code to Move Data from Bronze to Silver"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"ab7f0b7c-b0cc-44ec-9948-61d68f4b0b13"},{"cell_type":"markdown","source":["This is an example on how to work with the medallion architecture. From Bronze to Silver"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"44f47922-4e3b-45cc-81a6-c5de97634f73"},{"cell_type":"markdown","source":["## Working with 2020orders information"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"4e1afae8-b2ef-4e4c-9ac9-485139f19e9c"},{"cell_type":"code","source":["from pyspark.sql.types import *\n","import pyspark.sql.functions \n","from pyspark.sql import *"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"febb6c3e-6841-42c1-a633-0da056b7f69c"},{"cell_type":"code","source":["# Read the data from the bronze layer:\n","df_raw_2020orders = spark.read.format(\"delta\").load(\"abfss://[email protected]/raw_Bronze.Lakehouse/Tables/2020orders\")\n","\n","df_raw_2020orders.head(2)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":4,"statement_ids":[4],"state":"finished","livy_statement_state":"available","session_id":"ecb846c9-e988-4906-95bc-af67b3aacd94","normalized_state":"finished","queued_time":"2024-10-25T19:11:30.8930212Z","session_start_time":null,"execution_start_time":"2024-10-25T19:14:18.2840528Z","execution_finish_time":"2024-10-25T19:14:24.7554027Z","parent_msg_id":"4d60fa03-18dd-4bbc-adab-cb12ff2ba6cf"},"text/plain":"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 4, Finished, Available, Finished)"},"metadata":{}},{"output_type":"execute_result","execution_count":11,"data":{"text/plain":"[Row(ID='SO45376', Count=1, Date='1/5/2020', Name='Edgar Mehta', Style='Mountain-100 Silver, 38', price=3399.99, tax=271.9992),\n Row(ID='SO45381', Count=1, Date='1/6/2020', Name='Jordan Long', Style='Mountain-100 Silver, 38', price=3399.99, tax=271.9992)]"},"metadata":{}}],"execution_count":2,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"6357dc47-1bb1-4391-8f38-b5d5a2abf5b2"},{"cell_type":"code","source":["# Clean the data (e.g., filter out rows with null values in the 'age' column):\n","df_cleaned = df_raw_2020orders.filter(df_raw_2020orders[\"Date\"].isNotNull())\n","print(df_cleaned)"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":10,"statement_ids":[10],"state":"finished","livy_statement_state":"available","session_id":"ecb846c9-e988-4906-95bc-af67b3aacd94","normalized_state":"finished","queued_time":"2024-10-25T19:21:35.4162864Z","session_start_time":null,"execution_start_time":"2024-10-25T19:21:35.9099818Z","execution_finish_time":"2024-10-25T19:21:36.2079156Z","parent_msg_id":"d65f6fd9-d9ab-4498-ab5d-0710bab459be"},"text/plain":"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 10, Finished, Available, Finished)"},"metadata":{}},{"output_type":"stream","name":"stdout","text":["DataFrame[ID: string, Count: int, Date: string, Name: string, Style: string, price: double, tax: double]\n"]}],"execution_count":8,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"5f5c8125-cbf5-4e00-9d8f-0c437f25b37f"},{"cell_type":"code","source":["# Save the cleaned data to the \"cleansed_Silver\" table in the Silver lakehouse:\n","df_cleaned.write.format(\"delta\").mode(\"overwrite\").save(\"abfss://[email protected]/cleansed_test_Silver.Lakehouse/Tables/2020orders_silver\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":12,"statement_ids":[12],"state":"finished","livy_statement_state":"available","session_id":"ecb846c9-e988-4906-95bc-af67b3aacd94","normalized_state":"finished","queued_time":"2024-10-25T19:23:52.3238132Z","session_start_time":null,"execution_start_time":"2024-10-25T19:23:52.7414203Z","execution_finish_time":"2024-10-25T19:24:09.4412514Z","parent_msg_id":"8c92d669-7856-4961-a9d0-c38d54833ee4"},"text/plain":"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 12, Finished, Available, Finished)"},"metadata":{}}],"execution_count":10,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"c83d4e46-2b49-490f-aadb-87a350c85e89"},{"cell_type":"markdown","source":["## Working with products information"],"metadata":{"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"f830afb3-2b02-4076-800a-85ca9fc33fea"},{"cell_type":"code","source":["# Read data from the Bronze layer\n","bronze_df = spark.read.format(\"delta\").load(\"abfss://[email protected]/raw_Bronze.Lakehouse/Tables/products\")\n","# Perform transformations (if any)\n","silver_df = bronze_df # Assuming no transformations for simplicity\n","# Write data to the Silver layer\n","silver_df.write.mode(\"overwrite\").format(\"delta\").save(\"abfss://[email protected]/cleansed_test_Silver.Lakehouse/Tables/products_silver\")"],"outputs":[{"output_type":"display_data","data":{"application/vnd.livy.statement-meta+json":{"spark_pool":null,"statement_id":15,"statement_ids":[15],"state":"finished","livy_statement_state":"available","session_id":"ecb846c9-e988-4906-95bc-af67b3aacd94","normalized_state":"finished","queued_time":"2024-10-25T19:27:20.1106109Z","session_start_time":null,"execution_start_time":"2024-10-25T19:27:20.5334249Z","execution_finish_time":"2024-10-25T19:27:25.4936309Z","parent_msg_id":"bf665ff4-43d5-4b02-90a6-6c28640576c3"},"text/plain":"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 15, Finished, Available, Finished)"},"metadata":{}}],"execution_count":13,"metadata":{"jupyter":{"source_hidden":false,"outputs_hidden":false},"nteract":{"transient":{"deleting":false}},"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"7f72ac98-4ece-4a8a-a5c5-5e1fc7273382"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"widgets":{},"nteract":{"version":"[email protected]"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{"lakehouse":{"default_lakehouse":"251cd515-16a3-4555-a3d2-dfd12adb2335","default_lakehouse_name":"raw_Bronze","default_lakehouse_workspace_id":"597e0afc-c8db-4f4d-8464-d13570f5b075"}}},"nbformat":4,"nbformat_minor":5}
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "ab7f0b7c-b0cc-44ec-9948-61d68f4b0b13",
6+
"metadata": {
7+
"microsoft": {
8+
"language": "python",
9+
"language_group": "synapse_pyspark"
10+
},
11+
"nteract": {
12+
"transient": {
13+
"deleting": false
14+
}
15+
}
16+
},
17+
"source": [
18+
"# PySpark Code to Move Data from Bronze to Silver"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"id": "44f47922-4e3b-45cc-81a6-c5de97634f73",
24+
"metadata": {
25+
"microsoft": {
26+
"language": "python",
27+
"language_group": "synapse_pyspark"
28+
},
29+
"nteract": {
30+
"transient": {
31+
"deleting": false
32+
}
33+
}
34+
},
35+
"source": [
36+
"This is an example on how to work with the medallion architecture. From Bronze to Silver"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"id": "4e1afae8-b2ef-4e4c-9ac9-485139f19e9c",
42+
"metadata": {
43+
"microsoft": {
44+
"language": "python",
45+
"language_group": "synapse_pyspark"
46+
},
47+
"nteract": {
48+
"transient": {
49+
"deleting": false
50+
}
51+
}
52+
},
53+
"source": [
54+
"## Working with 2020orders information"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"id": "febb6c3e-6841-42c1-a633-0da056b7f69c",
61+
"metadata": {
62+
"microsoft": {
63+
"language": "python",
64+
"language_group": "synapse_pyspark"
65+
}
66+
},
67+
"outputs": [],
68+
"source": [
69+
"from pyspark.sql.types import *\n",
70+
"import pyspark.sql.functions \n",
71+
"from pyspark.sql import *"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": 2,
77+
"id": "6357dc47-1bb1-4391-8f38-b5d5a2abf5b2",
78+
"metadata": {
79+
"jupyter": {
80+
"outputs_hidden": false,
81+
"source_hidden": false
82+
},
83+
"microsoft": {
84+
"language": "python",
85+
"language_group": "synapse_pyspark"
86+
},
87+
"nteract": {
88+
"transient": {
89+
"deleting": false
90+
}
91+
}
92+
},
93+
"outputs": [
94+
{
95+
"data": {
96+
"application/vnd.livy.statement-meta+json": {
97+
"execution_finish_time": "2024-10-25T19:14:24.7554027Z",
98+
"execution_start_time": "2024-10-25T19:14:18.2840528Z",
99+
"livy_statement_state": "available",
100+
"normalized_state": "finished",
101+
"parent_msg_id": "4d60fa03-18dd-4bbc-adab-cb12ff2ba6cf",
102+
"queued_time": "2024-10-25T19:11:30.8930212Z",
103+
"session_id": "ecb846c9-e988-4906-95bc-af67b3aacd94",
104+
"session_start_time": null,
105+
"spark_pool": null,
106+
"state": "finished",
107+
"statement_id": 4,
108+
"statement_ids": [
109+
4
110+
]
111+
},
112+
"text/plain": [
113+
"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 4, Finished, Available, Finished)"
114+
]
115+
},
116+
"metadata": {},
117+
"output_type": "display_data"
118+
},
119+
{
120+
"data": {
121+
"text/plain": [
122+
"[Row(ID='SO45376', Count=1, Date='1/5/2020', Name='Edgar Mehta', Style='Mountain-100 Silver, 38', price=3399.99, tax=271.9992),\n",
123+
" Row(ID='SO45381', Count=1, Date='1/6/2020', Name='Jordan Long', Style='Mountain-100 Silver, 38', price=3399.99, tax=271.9992)]"
124+
]
125+
},
126+
"execution_count": 11,
127+
"metadata": {},
128+
"output_type": "execute_result"
129+
}
130+
],
131+
"source": [
132+
"# Read the data from the bronze layer:\n",
133+
"df_raw_2020orders = spark.read.format(\"delta\").load(\"abfss://[email protected]/raw_Bronze.Lakehouse/Tables/2020orders\")\n",
134+
"\n",
135+
"df_raw_2020orders.head(2)"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": 8,
141+
"id": "5f5c8125-cbf5-4e00-9d8f-0c437f25b37f",
142+
"metadata": {
143+
"jupyter": {
144+
"outputs_hidden": false,
145+
"source_hidden": false
146+
},
147+
"microsoft": {
148+
"language": "python",
149+
"language_group": "synapse_pyspark"
150+
},
151+
"nteract": {
152+
"transient": {
153+
"deleting": false
154+
}
155+
}
156+
},
157+
"outputs": [
158+
{
159+
"data": {
160+
"application/vnd.livy.statement-meta+json": {
161+
"execution_finish_time": "2024-10-25T19:21:36.2079156Z",
162+
"execution_start_time": "2024-10-25T19:21:35.9099818Z",
163+
"livy_statement_state": "available",
164+
"normalized_state": "finished",
165+
"parent_msg_id": "d65f6fd9-d9ab-4498-ab5d-0710bab459be",
166+
"queued_time": "2024-10-25T19:21:35.4162864Z",
167+
"session_id": "ecb846c9-e988-4906-95bc-af67b3aacd94",
168+
"session_start_time": null,
169+
"spark_pool": null,
170+
"state": "finished",
171+
"statement_id": 10,
172+
"statement_ids": [
173+
10
174+
]
175+
},
176+
"text/plain": [
177+
"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 10, Finished, Available, Finished)"
178+
]
179+
},
180+
"metadata": {},
181+
"output_type": "display_data"
182+
},
183+
{
184+
"name": "stdout",
185+
"output_type": "stream",
186+
"text": [
187+
"DataFrame[ID: string, Count: int, Date: string, Name: string, Style: string, price: double, tax: double]\n"
188+
]
189+
}
190+
],
191+
"source": [
192+
"# Clean the data (e.g., filter out rows with null values in the 'age' column):\n",
193+
"df_cleaned = df_raw_2020orders.filter(df_raw_2020orders[\"Date\"].isNotNull())\n",
194+
"print(df_cleaned)"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": 10,
200+
"id": "c83d4e46-2b49-490f-aadb-87a350c85e89",
201+
"metadata": {
202+
"jupyter": {
203+
"outputs_hidden": false,
204+
"source_hidden": false
205+
},
206+
"microsoft": {
207+
"language": "python",
208+
"language_group": "synapse_pyspark"
209+
},
210+
"nteract": {
211+
"transient": {
212+
"deleting": false
213+
}
214+
}
215+
},
216+
"outputs": [
217+
{
218+
"data": {
219+
"application/vnd.livy.statement-meta+json": {
220+
"execution_finish_time": "2024-10-25T19:24:09.4412514Z",
221+
"execution_start_time": "2024-10-25T19:23:52.7414203Z",
222+
"livy_statement_state": "available",
223+
"normalized_state": "finished",
224+
"parent_msg_id": "8c92d669-7856-4961-a9d0-c38d54833ee4",
225+
"queued_time": "2024-10-25T19:23:52.3238132Z",
226+
"session_id": "ecb846c9-e988-4906-95bc-af67b3aacd94",
227+
"session_start_time": null,
228+
"spark_pool": null,
229+
"state": "finished",
230+
"statement_id": 12,
231+
"statement_ids": [
232+
12
233+
]
234+
},
235+
"text/plain": [
236+
"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 12, Finished, Available, Finished)"
237+
]
238+
},
239+
"metadata": {},
240+
"output_type": "display_data"
241+
}
242+
],
243+
"source": [
244+
"# Save the cleaned data to the \"cleansed_Silver\" table in the Silver lakehouse:\n",
245+
"df_cleaned.write.format(\"delta\").mode(\"overwrite\").save(\"abfss://[email protected]/cleansed_test_Silver.Lakehouse/Tables/2020orders_silver\")"
246+
]
247+
},
248+
{
249+
"cell_type": "markdown",
250+
"id": "f830afb3-2b02-4076-800a-85ca9fc33fea",
251+
"metadata": {
252+
"microsoft": {
253+
"language": "python",
254+
"language_group": "synapse_pyspark"
255+
},
256+
"nteract": {
257+
"transient": {
258+
"deleting": false
259+
}
260+
}
261+
},
262+
"source": [
263+
"## Working with products information"
264+
]
265+
},
266+
{
267+
"cell_type": "code",
268+
"execution_count": 13,
269+
"id": "7f72ac98-4ece-4a8a-a5c5-5e1fc7273382",
270+
"metadata": {
271+
"jupyter": {
272+
"outputs_hidden": false,
273+
"source_hidden": false
274+
},
275+
"microsoft": {
276+
"language": "python",
277+
"language_group": "synapse_pyspark"
278+
},
279+
"nteract": {
280+
"transient": {
281+
"deleting": false
282+
}
283+
}
284+
},
285+
"outputs": [
286+
{
287+
"data": {
288+
"application/vnd.livy.statement-meta+json": {
289+
"execution_finish_time": "2024-10-25T19:27:25.4936309Z",
290+
"execution_start_time": "2024-10-25T19:27:20.5334249Z",
291+
"livy_statement_state": "available",
292+
"normalized_state": "finished",
293+
"parent_msg_id": "bf665ff4-43d5-4b02-90a6-6c28640576c3",
294+
"queued_time": "2024-10-25T19:27:20.1106109Z",
295+
"session_id": "ecb846c9-e988-4906-95bc-af67b3aacd94",
296+
"session_start_time": null,
297+
"spark_pool": null,
298+
"state": "finished",
299+
"statement_id": 15,
300+
"statement_ids": [
301+
15
302+
]
303+
},
304+
"text/plain": [
305+
"StatementMeta(, ecb846c9-e988-4906-95bc-af67b3aacd94, 15, Finished, Available, Finished)"
306+
]
307+
},
308+
"metadata": {},
309+
"output_type": "display_data"
310+
}
311+
],
312+
"source": [
313+
"# Read data from the Bronze layer\n",
314+
"bronze_df = spark.read.format(\"delta\").load(\"abfss://[email protected]/raw_Bronze.Lakehouse/Tables/products\")\n",
315+
"# Perform transformations (if any)\n",
316+
"silver_df = bronze_df # Assuming no transformations for simplicity\n",
317+
"# Write data to the Silver layer\n",
318+
"silver_df.write.mode(\"overwrite\").format(\"delta\").save(\"abfss://[email protected]/cleansed_test_Silver.Lakehouse/Tables/products_silver\")"
319+
]
320+
}
321+
],
322+
"metadata": {
323+
"dependencies": {
324+
"lakehouse": {
325+
"default_lakehouse": "251cd515-16a3-4555-a3d2-dfd12adb2335",
326+
"default_lakehouse_name": "raw_Bronze",
327+
"default_lakehouse_workspace_id": "597e0afc-c8db-4f4d-8464-d13570f5b075"
328+
}
329+
},
330+
"kernel_info": {
331+
"name": "synapse_pyspark"
332+
},
333+
"kernelspec": {
334+
"display_name": "Synapse PySpark",
335+
"language": "Python",
336+
"name": "synapse_pyspark"
337+
},
338+
"language_info": {
339+
"name": "python"
340+
},
341+
"microsoft": {
342+
"language": "python",
343+
"language_group": "synapse_pyspark",
344+
"ms_spell_check": {
345+
"ms_spell_check_language": "en"
346+
}
347+
},
348+
"nteract": {
349+
"version": "[email protected]"
350+
},
351+
"spark_compute": {
352+
"compute_id": "/trident/default",
353+
"session_options": {
354+
"conf": {
355+
"spark.synapse.nbs.session.timeout": "1200000"
356+
}
357+
}
358+
},
359+
"widgets": {
360+
"application/vnd.jupyter.widget-state+json": {
361+
"state": {},
362+
"version": "1.0"
363+
}
364+
}
365+
},
366+
"nbformat": 4,
367+
"nbformat_minor": 5
368+
}

0 commit comments

Comments
 (0)