forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyspark-create-dataframe.py
More file actions
33 lines (24 loc) · 890 Bytes
/
pyspark-create-dataframe.py
File metadata and controls
33 lines (24 loc) · 890 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# -*- coding: utf-8 -*-
'''
Created on Sat Jan 11 19:38:27 2020
@author: sparkbyexamples.com
'''
import pyspark
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
from pyspark.sql.functions import *
columns = ["language","users_count"]
data = [("Java", "20000"), ("Python", "100000"), ("Scala", "3000")]
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
rdd = spark.sparkContext.parallelize(data)
dfFromRDD1 = rdd.toDF()
dfFromRDD1.printSchema()
dfFromRDD1 = rdd.toDF(columns)
dfFromRDD1.printSchema()
dfFromRDD2 = spark.createDataFrame(rdd).toDF(*columns)
dfFromRDD2.printSchema()
dfFromData2 = spark.createDataFrame(data).toDF(*columns)
dfFromData2.printSchema()
rowData = map(lambda x: Row(*x), data)
dfFromData3 = spark.createDataFrame(rowData,columns)
dfFromData3.printSchema()