Skip to content

Commit 8f406b7

Browse files
committed
Displayed executor errors
1 parent 50a0b28 commit 8f406b7

File tree

9 files changed

+92
-16
lines changed

9 files changed

+92
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Updated slim-select version to support range selection (\#827);
88
* Supported pre/post `pinned_package_versions` in task-collection (\#827);
99
* Adapted frontend to new task-group deletion endpoint (\#833);
10+
* Displayed executor errors (\#833);
1011

1112
# 1.19.3
1213

__tests__/job_utilities.test.js

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import {
33
extractJobErrorParts,
44
extractRelevantJobError,
55
generateNewUniqueDatasetName,
6-
getFirstTaskIndexForContinuingWorkflow
6+
getFirstTaskIndexForContinuingWorkflow,
7+
showExecutorErrorLog
78
} from '$lib/common/job_utilities.js';
89

910
const completeTracebackError = `TASK ERROR:Task id: 15 (Create OME-Zarr structure), e.workflow_task_order=0
@@ -323,6 +324,27 @@ describe('get first task index for continuing workflow', () => {
323324
});
324325
});
325326

327+
it('should handle executor_error_log', () => {
328+
const job = /** @type {import('fractal-components/types/api').ApplyWorkflowV2} */ ({});
329+
expect(showExecutorErrorLog({ ...job, status: 'done', executor_error_log: null })).toBeFalsy();
330+
expect(showExecutorErrorLog({ ...job, status: 'failed', executor_error_log: null })).toBeFalsy();
331+
expect(
332+
showExecutorErrorLog({
333+
...job,
334+
status: 'failed',
335+
executor_error_log: `slurmstepd: error: Detected 1 oom_kill event in StepId=111.0. Some of the step tasks have been OOM Killed.
336+
srun: error: u20-cva0000-113: task 0: Out Of Memory`
337+
})
338+
).toBeTruthy();
339+
expect(
340+
showExecutorErrorLog({
341+
...job,
342+
status: 'failed',
343+
executor_error_log: `foobar`
344+
})
345+
).toBeFalsy();
346+
});
347+
326348
/**
327349
* @param {string[]} names
328350
*/

__tests__/v2/JobLogsModal.test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Command "/tmp/FRACTAL_TASKS_DIR/.fractal/fractal-tasks-core0.14.1/venv/bin/pytho
3535
await result.component.show(mockApplyWorkflow({ id: 1, status: 'failed', log: null }), false);
3636
const pre = /** @type {HTMLElement} */ (result.container.querySelector('pre'));
3737
expect(pre.classList.contains('highlight')).eq(true);
38-
expect(pre.innerHTML).eq(error);
38+
expect(pre.querySelector('div')?.innerHTML).eq(error);
3939
});
4040

4141
it('display log with highlighting and hidden details', async () => {
@@ -85,7 +85,7 @@ allowed_channels
8585
await result.component.show(mockApplyWorkflow({ id: 1, status: 'done', log: null }), false);
8686
const pre = /** @type {HTMLElement} */ (result.container.querySelector('pre'));
8787
expect(pre.classList.contains('highlight')).eq(false);
88-
expect(pre.innerHTML).eq(log);
88+
expect(pre.querySelector('div')?.innerHTML).eq(log);
8989
});
9090

9191
it('error while loading job for user', async () => {

components/src/lib/types/api.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ export type ApplyWorkflowV2 = {
146146
user_email: string;
147147
slurm_account: string | null;
148148
attribute_filters: { [key: string]: string | number | boolean };
149+
executor_error_log: string | null;
149150
};
150151

151152
export type JobStatus = 'submitted' | 'done' | 'failed';

playwright.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ export default defineConfig({
8888

8989
webServer: [
9090
{
91-
command: './tests/start-test-server.sh --branch 2743-delete-task-group-path-on-disk-during-task-group-deletion',
91+
command: './tests/start-test-server.sh --branch main',
9292
port: 8000,
9393
waitForPort: true,
9494
stdout: 'pipe',

src/lib/common/job_utilities.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,20 @@ export function getFirstTaskIndexForContinuingWorkflow(
185185

186186
return undefined;
187187
}
188+
189+
/**
190+
* @param {import('fractal-components/types/api').ApplyWorkflowV2} job
191+
*/
192+
export function showExecutorErrorLog(job) {
193+
if (job.status !== 'failed' || !job.executor_error_log) {
194+
return false;
195+
}
196+
const log = job.executor_error_log.toLowerCase();
197+
const errorKeywords = ['oom', 'killed', 'srun: error', 'out of memory', 'due to time limit'];
198+
for (const keyword of errorKeywords) {
199+
if (log.includes(keyword)) {
200+
return true;
201+
}
202+
}
203+
return false;
204+
}

src/lib/components/common/ExpandableLog.svelte

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
onclick={expandDetails}>... (details hidden, click here to expand)</button
5050
>{/if}{/each}</pre>
5151
{:else}
52-
<pre class:highlight>{logParts.map((p) => p.text).join('\n')}</pre>
52+
<pre class:highlight><div class="ps-3 pe-3">{logParts.map((p) => p.text).join('\n')}</div></pre>
5353
{/if}
5454
</div>
5555

src/lib/components/v2/jobs/JobLogsModal.svelte

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<script>
22
import { env } from '$env/dynamic/public';
3-
import { extractJobErrorParts } from '$lib/common/job_utilities';
3+
import { extractJobErrorParts, showExecutorErrorLog } from '$lib/common/job_utilities';
44
import { onDestroy } from 'svelte';
55
import Modal from '../../common/Modal.svelte';
66
import ExpandableLog from '$lib/components/common/ExpandableLog.svelte';
@@ -15,6 +15,8 @@
1515
let admin = false;
1616
let log = '';
1717
let loading = $state(true);
18+
/** @type {'main'|'slurm'} */
19+
let selectedTab = $state('main');
1820
1921
const updateJobInterval = env.PUBLIC_UPDATE_JOBS_INTERVAL
2022
? parseInt(env.PUBLIC_UPDATE_JOBS_INTERVAL)
@@ -31,6 +33,7 @@
3133
if (errorAlert) {
3234
errorAlert.hide();
3335
}
36+
selectedTab = showExecutorErrorLog(selectedJob) ? 'slurm' : 'main';
3437
job = selectedJob;
3538
admin = isAdminPage;
3639
log = '';
@@ -124,14 +127,42 @@
124127
</div>
125128
{/snippet}
126129
{#snippet body()}
127-
<div id="errorAlert-workflowJobLogsModal"></div>
128-
{#if loading}
129-
<div class="spinner-border spinner-border-sm" role="status">
130-
<span class="visually-hidden">Loading...</span>
131-
</div>
132-
Loading...
130+
{#if job && showExecutorErrorLog(job)}
131+
<ul class="nav nav-tabs mb-3">
132+
<li class="nav-item">
133+
<button
134+
class="nav-link"
135+
class:active={selectedTab === 'main'}
136+
onclick={() => (selectedTab = 'main')}
137+
aria-current={selectedTab === 'main' ? 'page' : undefined}
138+
>
139+
Main Log
140+
</button>
141+
</li>
142+
<li class="nav-item">
143+
<button
144+
class="nav-link"
145+
class:active={selectedTab === 'slurm'}
146+
onclick={() => (selectedTab = 'slurm')}
147+
aria-current={selectedTab === 'slurm' ? 'page' : undefined}
148+
>
149+
SLURM Error Log
150+
</button>
151+
</li>
152+
</ul>
153+
{/if}
154+
{#if selectedTab === 'main'}
155+
<div id="errorAlert-workflowJobLogsModal"></div>
156+
{#if loading}
157+
<div class="spinner-border spinner-border-sm" role="status">
158+
<span class="visually-hidden">Loading...</span>
159+
</div>
160+
Loading...
161+
{:else}
162+
<ExpandableLog bind:logParts highlight={job?.status === 'failed'} />
163+
{/if}
133164
{:else}
134-
<ExpandableLog bind:logParts highlight={job?.status === 'failed'} />
165+
<pre><div class="ps-3 pe-3">{job?.executor_error_log}</div></pre>
135166
{/if}
136167
{/snippet}
137168
</Modal>

src/routes/v2/projects/[projectId]/workflows/[workflowId]/+page.svelte

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import VersionUpdate from '$lib/components/v2/workflow/VersionUpdate.svelte';
1313
import ImagesStatus from '$lib/components/jobs/ImagesStatus.svelte';
1414
import TasksOrderModal from '$lib/components/v2/workflow/TasksOrderModal.svelte';
15-
import { extractRelevantJobError } from '$lib/common/job_utilities';
15+
import { extractRelevantJobError, showExecutorErrorLog } from '$lib/common/job_utilities';
1616
import JobLogsModal from '$lib/components/v2/jobs/JobLogsModal.svelte';
1717
import TaskInfoTab from '$lib/components/v2/workflow/TaskInfoTab.svelte';
1818
import InputFiltersTab from '$lib/components/v2/workflow/InputFiltersTab.svelte';
@@ -49,7 +49,7 @@
4949
5050
let jobError = $state('');
5151
/** @type {import('fractal-components/types/api').ApplyWorkflowV2|undefined} */
52-
let failedJob;
52+
let failedJob = $state();
5353
/** @type {JobLogsModal|undefined} */
5454
let jobLogsModal = $state();
5555
@@ -821,7 +821,11 @@
821821
<div class="text-muted mb-2 fw-bolder">
822822
The last job failed with the following error:
823823
</div>
824-
<pre class="text-danger mb-0">{jobError}</pre>
824+
{#if failedJob && showExecutorErrorLog(failedJob)}
825+
<pre class="text-danger mb-0">{failedJob.executor_error_log}</pre>
826+
{:else}
827+
<pre class="text-danger mb-0">{jobError}</pre>
828+
{/if}
825829
</div>
826830
<div class="col-md-2 col-sm-3">
827831
<button class="btn btn-outline-secondary float-end" onclick={showJobLogsModal}>

0 commit comments

Comments
 (0)