Skip to content

Commit a3a98a7

Browse files
authored
Merge pull request #195 from divakaivan/main
Week 3 Spark Fundamentals 1st notebook 1st cell fix missing column
2 parents 8707ce0 + 98ef5e0 commit a3a98a7

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

bootcamp/materials/3-spark-fundamentals/notebooks/event_data_pyspark.ipynb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": null,
66
"id": "81cca085-dba2-42eb-a13b-fa64b6e86583",
77
"metadata": {},
88
"outputs": [
@@ -53,7 +53,11 @@
5353
"\n",
5454
"spark\n",
5555
"\n",
56-
"df = spark.read.option(\"header\", \"true\").csv(\"/home/iceberg/data/events.csv\").withColumn(\"event_date\", expr(\"DATE_TRUNC('day', event_time)\"))\n",
56+
"events = spark.read.option(\"header\", \"true\").csv(\"/home/iceberg/data/events.csv\").withColumn(\"event_date\", expr(\"DATE_TRUNC('day', event_time)\"))\n",
57+
"devices = spark.read.option(\"header\",\"true\").csv(\"/home/iceberg/data/devices.csv\")\n",
58+
"\n",
59+
"df = events.join(devices,on=\"device_id\",how=\"left\")\n",
60+
"df = df.withColumnsRenamed({'browser_type': 'browser_family', 'os_type': 'os_family'})\n",
5761
"\n",
5862
"df.show()"
5963
]
@@ -570,7 +574,6 @@
570574
{
571575
"cell_type": "code",
572576
"execution_count": null,
573-
"id": "faaed2df",
574577
"metadata": {
575578
"collapsed": false,
576579
"jupyter": {

0 commit comments

Comments
 (0)