Skip to content

Commit 41f4373

Browse files
authored
Bring back reverted logger code (#440)
1 parent b231941 commit 41f4373

File tree

1 file changed

+49
-6
lines changed
  • src/databricks/labs/ucx/framework

1 file changed

+49
-6
lines changed

src/databricks/labs/ucx/framework/tasks.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ def trigger(*argv):
9292
raise KeyError(msg)
9393

9494
task_name = args.get("task", "not specified")
95+
# `{{parent_run_id}}` is the run of entire workflow, whereas `{{run_id}}` is the run of a task
96+
workflow_run_id = args.get("parent_run_id", "unknown_run_id")
97+
job_id = args.get("job_id")
9598
if task_name not in _TASKS:
9699
msg = f'task "{task_name}" not found. Valid tasks are: {", ".join(_TASKS.keys())}'
97100
raise KeyError(msg)
@@ -101,9 +104,49 @@ def trigger(*argv):
101104
current_task = _TASKS[task_name]
102105
print(current_task.doc)
103106

104-
_install()
105-
106-
cfg = WorkspaceConfig.from_file(Path(args["config"]))
107-
logging.getLogger("databricks").setLevel(cfg.log_level)
108-
109-
current_task.fn(cfg)
107+
config_path = Path(args["config"])
108+
cfg = WorkspaceConfig.from_file(config_path)
109+
110+
# see https://docs.python.org/3/howto/logging-cookbook.html
111+
databricks_logger = logging.getLogger("databricks")
112+
databricks_logger.setLevel(logging.DEBUG)
113+
114+
ucx_logger = logging.getLogger("databricks.labs.ucx")
115+
ucx_logger.setLevel(logging.DEBUG)
116+
117+
log_path = config_path.parent / "logs" / current_task.workflow / f"run-{workflow_run_id}"
118+
log_path.mkdir(parents=True, exist_ok=True)
119+
120+
log_file = log_path / f"{task_name}.log"
121+
file_handler = logging.FileHandler(log_file.as_posix())
122+
log_format = "%(asctime)s %(levelname)s [%(name)s] {%(threadName)s} %(message)s"
123+
log_formatter = logging.Formatter(fmt=log_format, datefmt="%H:%M:%S")
124+
file_handler.setFormatter(log_formatter)
125+
file_handler.setLevel(logging.DEBUG)
126+
127+
console_handler = _install(cfg.log_level)
128+
databricks_logger.removeHandler(console_handler)
129+
databricks_logger.addHandler(file_handler)
130+
131+
ucx_logger.info(f"See debug logs at {log_file}")
132+
133+
log_readme = log_path.joinpath("README.md")
134+
if not log_readme.exists():
135+
# this may race when run from multiple tasks, but let's accept the risk for now.
136+
with log_readme.open(mode="w") as f:
137+
f.write(f"# Logs for the UCX {current_task.workflow} workflow\n")
138+
f.write("This folder contains UCX log files.\n\n")
139+
f.write(f"See the [{current_task.workflow} job](/#job/{job_id}) and ")
140+
f.write(f"[run #{workflow_run_id}](/#job/{job_id}/run/{workflow_run_id})\n")
141+
142+
try:
143+
current_task.fn(cfg)
144+
except BaseException as error:
145+
log_file_for_cli = str(log_file).lstrip("/Workspace")
146+
cli_command = f"databricks workspace export /{log_file_for_cli}"
147+
ucx_logger.error(f"Task crashed. Execute `{cli_command}` locally to troubleshoot with more details. {error}")
148+
databricks_logger.debug("Task crash details", exc_info=error)
149+
file_handler.flush()
150+
raise
151+
finally:
152+
file_handler.close()

0 commit comments

Comments
 (0)