Skip to content

Commit 0115f40

Browse files
Refactor tests to avoid most race conditions
These tests have passed 10 consecutive runs on CI with no failures. There were race conditions regarding processing of record callbacks, so we swap to a model of using a "DONE" record that, when processed, signals that the IOC process has finished all of its callbacks. Logging also added to some tests that had weird failures on CI, so if they appear again we'll have more information.
1 parent 4014707 commit 0115f40

File tree

3 files changed

+144
-58
lines changed

3 files changed

+144
-58
lines changed

tests/conftest.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
# Length picked to match string record length, so we can re-use test strings.
2020
WAVEFORM_LENGTH = 40
2121

22+
# Default timeout for many operations across testing
23+
TIMEOUT = 10 # Seconds
24+
2225
def create_random_prefix():
2326
"""Create 12-character random string, for generating unique Device Names"""
2427
return "".join(random.choice(string.ascii_uppercase) for _ in range(12))
@@ -99,3 +102,21 @@ def enable_code_coverage():
99102
pass
100103
else:
101104
cleanup_on_sigterm()
105+
106+
def select_and_recv(conn, expected_char = None):
107+
"""Wait for the given Connection to have data to receive, and return it.
108+
If a character is provided check its correct before returning it.
109+
This function imports Cothread, and so must NOT be called before any
110+
multiprocessing sub-processes are spawned."""
111+
from cothread import select
112+
rrdy, _, _ = select([conn], [], [], TIMEOUT)
113+
if rrdy:
114+
val = conn.recv()
115+
else:
116+
pytest.fail("Did not receive expected char before TIMEOUT expired")
117+
118+
if expected_char:
119+
assert val == expected_char, \
120+
"Expected character did not match"
121+
122+
return val

tests/test_record_values.py

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
from enum import Enum
77
from math import isnan, inf, nan
88

9-
from conftest import requires_cothread, WAVEFORM_LENGTH
9+
from conftest import (
10+
requires_cothread,
11+
WAVEFORM_LENGTH,
12+
select_and_recv,
13+
TIMEOUT
14+
)
1015

1116
from softioc import asyncio_dispatcher, builder, softioc
1217
from softioc.pythonSoftIoc import RecordWrapper
@@ -16,7 +21,6 @@
1621

1722
# Test parameters
1823
DEVICE_NAME = "RECORD-VALUE-TESTS"
19-
TIMEOUT = 5 # Seconds
2024

2125
# The maximum length string for StringIn/Out records
2226
MAX_LEN_STR = "a 39 char string exactly maximum length"
@@ -329,7 +333,7 @@ def run_ioc(record_configurations: list, conn, set_enum, get_enum):
329333
dispatcher = asyncio_dispatcher.AsyncioDispatcher()
330334
builder.LoadDatabase()
331335
softioc.iocInit(dispatcher)
332-
conn.send("IOC started")
336+
conn.send("R") # "Ready"
333337

334338
# Record list and record config list should always be in line
335339
for record, configuration in zip(records, record_configurations):
@@ -351,18 +355,19 @@ def run_ioc(record_configurations: list, conn, set_enum, get_enum):
351355
if set_enum == SetValueEnum.CAPUT:
352356
if conn.poll(TIMEOUT):
353357
val = conn.recv()
354-
if val is not None:
358+
if val == "G":
355359
conn.send(record.get())
360+
else:
361+
pytest.fail(f"Received unexpected character {val}")
356362

357363
# Keep process alive while main thread works.
358364
# This is most applicable to CAGET tests.
359365
while (True):
360366
if conn.poll(TIMEOUT):
361367
val = conn.recv()
362-
if val == "EXIT":
368+
if val == "D": # "Done"
363369
break
364370

365-
366371
def run_test_function(
367372
record_configurations: list, set_enum: SetValueEnum, get_enum: GetValueEnum
368373
):
@@ -379,19 +384,17 @@ def run_test_function(
379384

380385
ioc_process.start()
381386

387+
382388
# Wait for message that IOC has started
383-
if parent_conn.poll(TIMEOUT):
384-
parent_conn.recv()
385-
else:
386-
pytest.fail("IOC process did not start before TIMEOUT expired")
389+
select_and_recv(parent_conn, "R")
387390

388-
try:
389-
# Cannot do these imports before the subprocess starts, as cothread
390-
# isn't threadsafe (in the way we require)
391-
from cothread import Yield
392-
from cothread.catools import caget, caput, _channel_cache
393-
from cothread.dbr import DBR_CHAR_STR
391+
# Cannot do these imports before the subprocess starts, as the subprocess
392+
# would inherit cothread's internal state which would break things!
393+
from cothread import Yield
394+
from cothread.catools import caget, caput, _channel_cache
395+
from cothread.dbr import DBR_CHAR_STR
394396

397+
try:
395398
# cothread remembers connected IOCs. As we potentially restart the same
396399
# named IOC multiple times, we have to purge the cache else the
397400
# result from caget/caput cache would be a DisconnectError during the
@@ -424,10 +427,7 @@ def run_test_function(
424427

425428
if set_enum == SetValueEnum.CAPUT:
426429
if get_enum == GetValueEnum.GET:
427-
if parent_conn.poll(TIMEOUT):
428-
parent_conn.recv()
429-
else:
430-
pytest.fail("IOC did not provide initial record value")
430+
select_and_recv(parent_conn)
431431
caput(
432432
DEVICE_NAME + ":" + record_name,
433433
initial_value,
@@ -437,18 +437,16 @@ def run_test_function(
437437
)
438438

439439
if get_enum == GetValueEnum.GET:
440-
parent_conn.send("Do another get!")
440+
parent_conn.send("G") # "Get"
441+
441442
# Ensure IOC process has time to execute.
442443
# I saw failures on MacOS where it appeared the IOC had not
443444
# processed the put'ted value as the caget returned the same
444445
# value as was originally passed in.
445446
Yield(timeout=TIMEOUT)
446447

447448
if get_enum == GetValueEnum.GET:
448-
if parent_conn.poll(TIMEOUT):
449-
rec_val = parent_conn.recv()
450-
else:
451-
pytest.fail("IOC did not provide record value in queue")
449+
rec_val = select_and_recv(parent_conn)
452450
else:
453451
rec_val = caget(
454452
DEVICE_NAME + ":" + record_name,
@@ -477,10 +475,8 @@ def run_test_function(
477475
# Purge cache to suppress spurious "IOC disconnected" exceptions
478476
_channel_cache.purge()
479477

478+
parent_conn.send("D") # "Done"
480479

481-
parent_conn.send("EXIT")
482-
483-
# ioc_process.terminate()
484480
ioc_process.join(timeout=TIMEOUT)
485481
if ioc_process.exitcode is None:
486482
pytest.fail("Process did not terminate")
@@ -771,11 +767,16 @@ def none_value_test_func(self, record_func, queue):
771767
builder.LoadDatabase()
772768
softioc.iocInit()
773769

770+
print("CHILD: Soft IOC started, about to .set(None)")
771+
774772
try:
775773
record.set(None)
774+
print("CHILD: Uh-OH! No exception thrown when setting None!")
776775
except Exception as e:
777776
queue.put(e)
778777

778+
print("CHILD: We really should never get here...")
779+
779780
queue.put(Exception("FAIL:Test did not raise exception during .set()"))
780781

781782
@requires_cothread
@@ -791,13 +792,18 @@ def test_value_none_rejected_set_after_init(self, record_func_reject_none):
791792

792793
process.start()
793794

795+
print("PARENT: Child process started, waiting for returned exception")
796+
794797
try:
795-
exception = queue.get(timeout=5)
798+
exception = queue.get(timeout=TIMEOUT)
796799

797800
assert isinstance(exception, self.expected_exceptions)
798801
finally:
802+
print("PARENT: Issuing terminate to child process")
799803
process.terminate()
800-
process.join(timeout=3)
804+
process.join(timeout=TIMEOUT)
805+
if process.exitcode is None:
806+
pytest.fail("Process did not terminate")
801807

802808

803809
class TestInvalidValues:

0 commit comments

Comments
 (0)