Skip to content

Commit 36e1d85

Browse files
committed
[erts] Fix scheduler_wall_time tests in statistics_SUITE
Failures while all dirty schedulers hogged resulted in timeout instead of failure
1 parent 412bff5 commit 36e1d85

File tree

2 files changed

+43
-29
lines changed

2 files changed

+43
-29
lines changed

erts/emulator/beam/beam_debug.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,13 +1242,15 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
12421242
dirty_send_message(c_p, arg2, AM_done);
12431243
ERTS_BIF_PREP_RET(ret, am_ok);
12441244
}
1245-
else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)) {
1245+
else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)
1246+
|| ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1)) {
12461247
Process *real_c_p = erts_proc_shadow2real(c_p);
12471248
Eterm *hp, *hp2;
12481249
Uint sz;
12491250
int i;
12501251
ErtsSchedulerData *esdp = erts_get_scheduler_data();
12511252
int dirty_io = esdp->type == ERTS_SCHED_DIRTY_IO;
1253+
int no_wait_alloc = ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1);
12521254

12531255
if (ERTS_PROC_IS_EXITING(real_c_p))
12541256
goto badarg;
@@ -1262,16 +1264,21 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
12621264
erts_thr_yield();
12631265
}
12641266

1265-
ms_wait(c_p, make_small(1000), 0);
1266-
1267-
/* Should still be able to allocate memory */
1268-
hp = HAlloc(c_p, 3); /* Likely on heap */
1269-
sz = 10000;
1270-
hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
1271-
*hp2 = make_pos_bignum_header(sz);
1272-
for (i = 1; i < sz; i++)
1273-
hp2[i] = (Eterm) 4711;
1274-
ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
1267+
if (no_wait_alloc) {
1268+
ERTS_BIF_PREP_RET(ret, am_ok);
1269+
}
1270+
else {
1271+
ms_wait(c_p, make_small(1000), 0);
1272+
1273+
/* Should still be able to allocate memory */
1274+
hp = HAlloc(c_p, 3); /* Likely on heap */
1275+
sz = 10000;
1276+
hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
1277+
*hp2 = make_pos_bignum_header(sz);
1278+
for (i = 1; i < sz; i++)
1279+
hp2[i] = (Eterm) 4711;
1280+
ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
1281+
}
12751282
}
12761283
else {
12771284
badarg:

erts/emulator/test/statistics_SUITE.erl

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,12 @@ scheduler_wall_time_test(Type) ->
337337
end.
338338

339339
run_scheduler_wall_time_test(Type) ->
340+
%% All dirty schedulers will be hogged during a period of time
341+
%% during these tests. If the testcase fails during this time, all
342+
%% disc io and large gc will be blocked making the test case
343+
%% timeout instead of failing, i.e., do not add stuff that can
344+
%% fail the test case while all dirty schedulers are hogged...
345+
340346
%% Should return undefined if system_flag is not turned on yet
341347
undefined = statistics(Type),
342348
%% Turn on statistics
@@ -383,7 +389,7 @@ run_scheduler_wall_time_test(Type) ->
383389
end,
384390
StartDirtyHog = fun(Func) ->
385391
F = fun() ->
386-
erts_debug:Func(alive_waitexiting,
392+
erts_debug:Func(alive_waitexitingonly,
387393
MeMySelfAndI)
388394
end,
389395
Pid = spawn_link(F),
@@ -394,8 +400,9 @@ run_scheduler_wall_time_test(Type) ->
394400
%% Max on one, the other schedulers empty (hopefully)
395401
%% Be generous the process can jump between schedulers
396402
%% which is ok and we don't want the test to fail for wrong reasons
397-
_L1 = [S1Load|EmptyScheds1] = get_load(Type),
398-
{true,_} = {S1Load > 50,S1Load},
403+
L1 = get_load(Type),
404+
[High1Load|EmptyScheds1] = lists:reverse(lists:sort(L1)),
405+
{true,_} = {High1Load > 50,High1Load},
399406
{false,_} = {lists:any(fun(Load) -> Load > 50 end, EmptyScheds1),EmptyScheds1},
400407
{true,_} = {lists:sum(EmptyScheds1) < 60,EmptyScheds1},
401408

@@ -417,33 +424,33 @@ run_scheduler_wall_time_test(Type) ->
417424
%% 100% load. Need to take into consideration an odd number of
418425
%% schedulers and also special consideration for when there is
419426
%% only 1 scheduler
420-
LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
421-
Schedulers =/= 1],
427+
422428
LastDirtyCPUHogs = [StartDirtyHog(dirty_cpu)
423429
|| _ <- lists:seq(1, (DirtyCPUSchedulers+1) div 2),
424430
DirtyCPUSchedulers =/= 1],
425431
LastDirtyIOHogs = [StartDirtyHog(dirty_io)
426432
|| _ <- lists:seq(1, (DirtyIOSchedulers+1) div 2),
427433
DirtyIOSchedulers =/= 1],
434+
LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
435+
Schedulers =/= 1],
428436
FullScheds = get_load(Type),
429437
ct:log("FullScheds: ~w",[FullScheds]),
438+
439+
AllHogs = [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
440+
++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs],
441+
442+
KillHog = fun (HP) -> unlink(HP), exit(HP, kill) end,
443+
WaitKilledHog = fun (HP) -> false = is_process_alive(HP) end,
444+
[KillHog(Pid) || Pid <- AllHogs],
445+
[WaitKilledHog(Pid) || Pid <- AllHogs],
446+
receive after 1000 -> ok end, %% Give dirty schedulers time to complete...
447+
430448
{false,_} = {lists:any(fun(Load) -> Load < 80 end, FullScheds),FullScheds},
431449
FullLoad = lists:sum(FullScheds) div TotLoadSchedulers,
432450
if FullLoad > 90 -> ok;
433451
true -> exit({fullload, FullLoad})
434452
end,
435453

436-
KillHog = fun (HP) ->
437-
HPM = erlang:monitor(process, HP),
438-
unlink(HP),
439-
exit(HP, kill),
440-
receive
441-
{'DOWN', HPM, process, HP, killed} ->
442-
ok
443-
end
444-
end,
445-
[KillHog(Pid) || Pid <- [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
446-
++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs]],
447454
receive after 2000 -> ok end, %% Give dirty schedulers time to complete...
448455
AfterLoad = get_load(Type),
449456
io:format("AfterLoad=~p~n", [AfterLoad]),
@@ -454,12 +461,12 @@ run_scheduler_wall_time_test(Type) ->
454461
end.
455462

456463
get_load(Type) ->
464+
%% Returns info for each *online* scheduler in scheduler id order
457465
Start = erlang:statistics(Type),
458466
timer:sleep(1500),
459467
End = erlang:statistics(Type),
460468

461-
lists:reverse(
462-
lists:sort(load_percentage(online_statistics(Start),online_statistics(End)))).
469+
load_percentage(online_statistics(Start),online_statistics(End)).
463470

464471
%% We are only interested in schedulers that are online to remove all
465472
%% offline normal and dirty cpu schedulers (dirty io cannot be offline)

0 commit comments

Comments
 (0)