99from typing import Any , Dict , List , Optional , Tuple
1010
1111import yaml
12- from jinja2 import Environment , FileSystemLoader
1312
1413from flink_sql_runner .flink_clients import (FlinkCli ,
1514 FlinkStandaloneClusterRunner ,
1615 FlinkYarnRunner )
16+ from flink_sql_runner .jinja import JinjaTemplateResolver
1717from flink_sql_runner .job_configuration import JobConfiguration
18- from flink_sql_runner .s3 import get_content , get_latest_object , upload_content
18+ from flink_sql_runner .manifest import ManifestManager
19+ from flink_sql_runner .s3 import get_latest_object
1920
2021logging .basicConfig (level = logging .INFO , format = "%(asctime)s %(message)s" )
2122
@@ -66,87 +67,69 @@ def parse_args():
6667 return parser .parse_known_args ()
6768
6869
69- class JinjaTemplateResolver (object ):
70- def resolve (
71- self ,
72- template_dir : str ,
73- template_file : str ,
74- vars : Dict [str , str ],
75- output_file_path : str ,
76- ) -> None :
77- environment = Environment (loader = FileSystemLoader (template_dir ))
78- template = environment .get_template (template_file )
79- content = template .render (** vars )
80- with open (output_file_path , mode = "w" , encoding = "utf-8" ) as run_file :
81- run_file .truncate ()
82- run_file .write (content )
83-
84-
85- class EmrJobRunner (object ):
70+ class FlinkJobRunner (object ):
8671 def __init__ (
8772 self ,
88- job_config_path : str ,
73+ job_name : str ,
74+ new_job_conf : Optional [JobConfiguration ],
8975 pyflink_runner_dir : str ,
90- external_job_config_bucket : str ,
91- external_job_config_prefix : str ,
9276 table_definition_paths : str ,
9377 pyexec_path : str ,
9478 flink_cli_runner : FlinkCli ,
9579 jinja_template_resolver : JinjaTemplateResolver ,
80+ manifest_manager : ManifestManager ,
9681 passthrough_args : List [str ],
9782 ):
98- self .job_config_path = job_config_path
83+ self .job_name = job_name
84+ self .new_job_conf = new_job_conf
9985 self .pyflink_runner_dir = pyflink_runner_dir
100- self .external_job_config_bucket = external_job_config_bucket
101- self .external_job_config_prefix = external_job_config_prefix
10286 self .table_definition_paths = table_definition_paths
10387 self .pyexec_path = pyexec_path
10488 self .pyclientexec_path = pyexec_path
10589 self .flink_cli_runner = flink_cli_runner
10690 self .jinja_template_resolver = jinja_template_resolver
91+ self .manifest_manager = manifest_manager
10792 self .passthrough_args = passthrough_args
108- self .new_job_conf = JobConfiguration (self .__read_config (job_config_path ))
10993
11094 def run (self ) -> None :
111- logging .info (f"Deploying '{ self .new_job_conf .get_name ()} '." )
95+ if self .new_job_conf is None :
96+ logging .info (f"Deleting job '{ self .job_name } '." )
97+ job_manifest = self .manifest_manager .fetch_job_manifest (self .job_name )
98+ if job_manifest is None :
99+ raise ValueError (f"Job manifest for { self .job_name } not found." )
100+ self .__stop_with_savepoint (job_manifest )
101+ return
102+
103+ logging .info (f"Deploying '{ self .job_name } '." )
112104 if self .new_job_conf .is_sql ():
113105 logging .info (f"Deploying query: |{ self .new_job_conf .get_sql ()} |" )
114106 else :
115107 logging .info (f"Deploying code:\n { self .new_job_conf .get_code ()} " )
116108
117- external_config = self .__fetch_job_manifest (
118- self .external_job_config_bucket ,
119- self .external_job_config_prefix ,
120- self .new_job_conf .get_name (),
121- )
109+ external_config = self .manifest_manager .fetch_job_manifest (self .job_name )
122110 logging .info (f"External config:\n { external_config } " )
123111
124112 if not external_config :
125113 # The job manifest did not exist. Starting a newly created job.
126114 self .__start_new_job (self .new_job_conf )
127- self .__upload_job_manifest (self .new_job_conf )
115+ self .manifest_manager . upload_job_manifest (self .new_job_conf )
128116 elif external_config and not self .__has_job_manifest_changed (external_config , self .new_job_conf ):
129117 # The job manifest has not been modified. There is no need to restart the job. Just ensure it's running.
130- if self .__is_job_running (self .new_job_conf . get_name () ):
118+ if self .__is_job_running (self .job_name ):
131119 logging .info ("Job manifest has not changed. Skipping job restart." )
132120 else :
133121 self .__start_job_with_unchanged_query (external_config , self .new_job_conf )
134122 else :
135123 # The job manifest has been modified. Job needs to be restarted.
136- if self .__is_job_running (self .new_job_conf . get_name () ):
124+ if self .__is_job_running (self .job_name ):
137125 # Stop the job using the old config (query-version in particular).
138126 self .__stop_with_savepoint (external_config )
139127
140128 if external_config and not self .__has_job_definition_changed (external_config , self .new_job_conf ):
141129 self .__start_job_with_unchanged_query (external_config , self .new_job_conf )
142130 else :
143131 self .__start_new_job_with_changed_query (external_config , self .new_job_conf )
144- self .__upload_job_manifest (self .new_job_conf )
145-
146- @staticmethod
147- def __read_config (config_file : str ):
148- with open (config_file ) as qf :
149- return yaml .load (qf , yaml .FullLoader )
132+ self .manifest_manager .upload_job_manifest (self .new_job_conf )
150133
151134 def __is_job_running (self , job_name : str ) -> bool :
152135 return self .flink_cli_runner .is_job_running (job_name )
@@ -169,12 +152,6 @@ def __start_new_job_with_changed_query(self, external_config, job_conf):
169152 job_conf .set_meta_query_create_timestamp (datetime .datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" ))
170153 self .__start_with_clean_state (job_conf )
171154
172- def __upload_job_manifest (self , job_conf ):
173- upload_path = os .path .join (self .external_job_config_prefix , f"{ job_conf .get_name ()} .yaml" )
174- logging .info (f"Uploading the new config file to 's3://{ self .external_job_config_bucket } /{ upload_path } '." )
175- upload_content (yaml .dump (job_conf .to_dict ()), self .external_job_config_bucket , upload_path )
176- logging .info ("The config file has been uploaded." )
177-
178155 def __stop_with_savepoint (self , job_conf : JobConfiguration ) -> None :
179156 job_id = self .flink_cli_runner .get_job_id (job_conf .get_name ())
180157 savepoint_path = os .path .join (job_conf .get_flink_savepoints_dir (), job_conf .get_meta_query_version_str ())
@@ -316,12 +293,6 @@ def __find_latest_state_internal(
316293 logging .info (f"State found at '{ state_path } '." )
317294 return state_path , last_created_ts
318295
319- def __fetch_job_manifest (self , bucket_name : str , prefix : str , job_name : str ) -> Optional [JobConfiguration ]:
320- object_key = os .path .join (prefix , f"{ job_name } .yaml" )
321- logging .info (f"Looking for config at s3://{ bucket_name } /{ object_key } ." )
322- raw_manifest = get_content (bucket_name , object_key )
323- return JobConfiguration (yaml .safe_load (raw_manifest )) if raw_manifest else None
324-
325296 def __has_job_manifest_changed (self , old_job_conf : JobConfiguration , new_job_conf : JobConfiguration ) -> bool :
326297 return self .__has_job_definition_changed (old_job_conf , new_job_conf ) or self .__have_flink_properties_changed (
327298 old_job_conf , new_job_conf
@@ -352,21 +323,26 @@ def __escape_query(query: str) -> str:
352323 return query .replace ("`" , "\\ `" )
353324
354325
326+ def read_config (config_file : str ):
327+ with open (config_file ) as qf :
328+ return yaml .load (qf , yaml .FullLoader )
329+
330+
355331if __name__ == "__main__" :
356332 args , passthrough_args = parse_args ()
357- flink_cli_runner = (
358- FlinkYarnRunner () if args . deployment_target == "yarn" else FlinkStandaloneClusterRunner ( args . jobmanager_address )
359- )
360- jinja_template_resolver = JinjaTemplateResolver ()
361-
362- EmrJobRunner (
363- args .job_config_path ,
364- args . pyflink_runner_dir ,
365- args . external_job_config_bucket ,
366- args .external_job_config_prefix ,
367- args .base_output_path ,
368- args . pyexec_path ,
369- flink_cli_runner ,
370- jinja_template_resolver ,
371- passthrough_args ,
333+ configuration = JobConfiguration ( read_config ( args . job_config_path ))
334+ FlinkJobRunner (
335+ job_name = configuration . get_name (),
336+ new_job_conf = configuration ,
337+ pyflink_runner_dir = args . pyflink_runner_dir ,
338+ table_definition_paths = args . base_output_path ,
339+ pyexec_path = args .pyexec_path ,
340+ flink_cli_runner = (
341+ FlinkYarnRunner ()
342+ if args .deployment_target == "yarn"
343+ else FlinkStandaloneClusterRunner ( args .jobmanager_address )
344+ ) ,
345+ jinja_template_resolver = JinjaTemplateResolver () ,
346+ manifest_manager = ManifestManager ( args . external_job_config_bucket , args . external_job_config_prefix ) ,
347+ passthrough_args = passthrough_args ,
372348 ).run ()
0 commit comments