forked from malevolentstrix/RealTime-TwitterAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspark_app.py
More file actions
54 lines (35 loc) · 1.24 KB
/
spark_app.py
File metadata and controls
54 lines (35 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import pyspark
import requests
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode
from pyspark.sql.functions import split
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["SPARK_LOCAL_HOSTNAME"] = "localhost"
def send_data(tags: dict) -> None:
url = 'http://localhost:5001/updateData'
response = requests.post(url, json=tags)
def process_row(row: pyspark.sql.types.Row) -> None:
tags = row.asDict()
print(tags)
send_data(tags)
def new():
spark = SparkSession.builder.appName("SparkTwitterAnalysis").getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")
lines = spark.readStream.format("socket").option(
"host", "127.0.0.1").option("port", 9009).load()
words = lines.select(explode(split(lines.value, " ")).alias("hashtag"))
wordCounts = words.groupBy("hashtag").count()
query = wordCounts.writeStream.foreach(
process_row).outputMode('Update').start()
query.awaitTermination()
if __name__ == '__main__':
try:
new()
except BrokenPipeError:
exit("Pipe Broken, Exiting...")
except KeyboardInterrupt:
exit("Keyboard Interrupt, Exiting..")
except Exception as e:
exit("Error in Spark App")