forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyspark-string-to-array.py
More file actions
32 lines (26 loc) · 852 Bytes
/
pyspark-string-to-array.py
File metadata and controls
32 lines (26 loc) · 852 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# -*- coding: utf-8 -*-
"""
author SparkByExamples.com
"""
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.appName('SparkByExamples.com') \
.getOrCreate()
data = [("James, A, Smith","2018","M",3000),
("Michael, Rose, Jones","2010","M",4000),
("Robert,K,Williams","2010","M",4000),
("Maria,Anne,Jones","2005","F",4000),
("Jen,Mary,Brown","2010","",-1)
]
columns=["name","dob_year","gender","salary"]
df=spark.createDataFrame(data,columns)
df.printSchema()
df.show(truncate=False)
from pyspark.sql.functions import split, col
df2 = df.select(split(col("name"),",").alias("NameArray")) \
.drop("name")
df2.printSchema()
df2.show()
df.createOrReplaceTempView("PERSON")
spark.sql("select SPLIT(name,',') as NameArray from PERSON") \
.show()