-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathword_count.py
More file actions
27 lines (21 loc) · 810 Bytes
/
word_count.py
File metadata and controls
27 lines (21 loc) · 810 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import logging
import sys
from pyspark.sql import SparkSession
from data_transformations.wordcount import word_count_transformer
LOG_FILENAME = 'project.log'
APP_NAME = "WordCount"
if __name__ == '__main__':
logging.basicConfig(filename=LOG_FILENAME, level=logging.INFO)
logging.info(sys.argv)
if len(sys.argv) != 3:
logging.warning("Input .txt file and output path are required")
sys.exit(1)
spark = SparkSession.builder.appName(APP_NAME).getOrCreate()
sc = spark.sparkContext
app_name = sc.appName
logging.info("Application Initialized: " + app_name)
input_path = sys.argv[1]
output_path = sys.argv[2]
word_count_transformer.run(spark, input_path, output_path)
logging.info("Application Done: " + spark.sparkContext.appName)
spark.stop()