Skip to content

Commit 518c9a0

Browse files
Daniel Ohayonfacebook-github-bot
authored andcommitted
flag to log full trace on error
Summary: While trying to debug an asyncio RuntimeError, I realized that the current exception logging in log_event does not include the full stacktrace of the error (it only starts from where the error was thrown). Adding the ability to include the full stack to help with debugging Differential Revision: D72800297
1 parent bec9317 commit 518c9a0

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

torchx/runner/events/__init__.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(
9090
app_metadata: Optional[Dict[str, str]] = None,
9191
runcfg: Optional[str] = None,
9292
workspace: Optional[str] = None,
93+
log_full_trace_on_error: bool = False,
9394
) -> None:
9495
self._torchx_event: TorchxEvent = self._generate_torchx_event(
9596
api,
@@ -103,6 +104,7 @@ def __init__(
103104
self._start_cpu_time_ns = 0
104105
self._start_wall_time_ns = 0
105106
self._start_epoch_time_usec = 0
107+
self.log_full_trace_on_error = log_full_trace_on_error
106108

107109
def __enter__(self) -> "log_event":
108110
self._start_cpu_time_ns = time.process_time_ns()
@@ -125,15 +127,23 @@ def __exit__(
125127
) // 1000
126128
if traceback_type:
127129
self._torchx_event.raw_exception = traceback.format_exc()
130+
128131
typ, value, tb = sys.exc_info()
129132
if tb:
130133
last_frame = traceback.extract_tb(tb)[-1]
134+
135+
exception_info = {
136+
"filename": last_frame.filename,
137+
"lineno": last_frame.lineno,
138+
"name": last_frame.name,
139+
}
140+
if self.log_full_trace_on_error:
141+
frames = traceback.extract_stack()[:-1]
142+
exception_info["stacktrace"] = "".join(
143+
traceback.format_list(frames)
144+
)
131145
self._torchx_event.exception_source_location = json.dumps(
132-
{
133-
"filename": last_frame.filename,
134-
"lineno": last_frame.lineno,
135-
"name": last_frame.name,
136-
}
146+
exception_info
137147
)
138148
if exec_type:
139149
self._torchx_event.exception_type = exec_type.__name__

0 commit comments

Comments
 (0)