@@ -92,6 +92,9 @@ def trigger(*argv):
9292 raise KeyError (msg )
9393
9494 task_name = args .get ("task" , "not specified" )
95+ # `{{parent_run_id}}` is the run of entire workflow, whereas `{{run_id}}` is the run of a task
96+ workflow_run_id = args .get ("parent_run_id" , "unknown_run_id" )
97+ job_id = args .get ("job_id" )
9598 if task_name not in _TASKS :
9699 msg = f'task "{ task_name } " not found. Valid tasks are: { ", " .join (_TASKS .keys ())} '
97100 raise KeyError (msg )
@@ -101,9 +104,49 @@ def trigger(*argv):
101104 current_task = _TASKS [task_name ]
102105 print (current_task .doc )
103106
104- _install ()
105-
106- cfg = WorkspaceConfig .from_file (Path (args ["config" ]))
107- logging .getLogger ("databricks" ).setLevel (cfg .log_level )
108-
109- current_task .fn (cfg )
107+ config_path = Path (args ["config" ])
108+ cfg = WorkspaceConfig .from_file (config_path )
109+
110+ # see https://docs.python.org/3/howto/logging-cookbook.html
111+ databricks_logger = logging .getLogger ("databricks" )
112+ databricks_logger .setLevel (logging .DEBUG )
113+
114+ ucx_logger = logging .getLogger ("databricks.labs.ucx" )
115+ ucx_logger .setLevel (logging .DEBUG )
116+
117+ log_path = config_path .parent / "logs" / current_task .workflow / f"run-{ workflow_run_id } "
118+ log_path .mkdir (parents = True , exist_ok = True )
119+
120+ log_file = log_path / f"{ task_name } .log"
121+ file_handler = logging .FileHandler (log_file .as_posix ())
122+ log_format = "%(asctime)s %(levelname)s [%(name)s] {%(threadName)s} %(message)s"
123+ log_formatter = logging .Formatter (fmt = log_format , datefmt = "%H:%M:%S" )
124+ file_handler .setFormatter (log_formatter )
125+ file_handler .setLevel (logging .DEBUG )
126+
127+ console_handler = _install (cfg .log_level )
128+ databricks_logger .removeHandler (console_handler )
129+ databricks_logger .addHandler (file_handler )
130+
131+ ucx_logger .info (f"See debug logs at { log_file } " )
132+
133+ log_readme = log_path .joinpath ("README.md" )
134+ if not log_readme .exists ():
135+ # this may race when run from multiple tasks, but let's accept the risk for now.
136+ with log_readme .open (mode = "w" ) as f :
137+ f .write (f"# Logs for the UCX { current_task .workflow } workflow\n " )
138+ f .write ("This folder contains UCX log files.\n \n " )
139+ f .write (f"See the [{ current_task .workflow } job](/#job/{ job_id } ) and " )
140+ f .write (f"[run #{ workflow_run_id } ](/#job/{ job_id } /run/{ workflow_run_id } )\n " )
141+
142+ try :
143+ current_task .fn (cfg )
144+ except BaseException as error :
145+ log_file_for_cli = str (log_file ).lstrip ("/Workspace" )
146+ cli_command = f"databricks workspace export /{ log_file_for_cli } "
147+ ucx_logger .error (f"Task crashed. Execute `{ cli_command } ` locally to troubleshoot with more details. { error } " )
148+ databricks_logger .debug ("Task crash details" , exc_info = error )
149+ file_handler .flush ()
150+ raise
151+ finally :
152+ file_handler .close ()
0 commit comments