Skip to content

Commit de3ce5f

Browse files
committed
remove pytest-fork
1 parent b715661 commit de3ce5f

File tree

2 files changed

+133
-21
lines changed

2 files changed

+133
-21
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ test = [
124124
"octopus-automl[examples]",
125125
"pytest>=8.4.2",
126126
"pytest-cov>=6.2.1",
127-
"pytest-forked>=1.6.0",
128127
"pytest-order>=1.3.0",
129128
"octopus-automl[autogluon]",
130129
"octopus-automl[boruta]",

tests/modules/test_training_feature_importances.py

Lines changed: 133 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
"""Feature Importance Test Suite for Training Class.
22
33
Tests all feature importance methods across all available models.
4-
Each test is marked with ``@pytest.mark.forked`` so it runs in its own
5-
subprocess — this provides complete isolation between tests, preventing:
4+
5+
SHAP-based tests (``calculate_fi_featuresused_shap``) run in isolated
6+
subprocesses via ``subprocess.run()`` to prevent:
67
78
- CatBoost's C++ destructor segfault when Python's GC finalizes objects
89
- numba/llvmlite LLVM pass-manager crash from accumulated JIT compilations
9-
- Memory accumulation from session-scoped model caches
10+
- SHAP + CatBoost + NaN background data segfaults
11+
12+
All other FI methods (internal, permutation, LOFO) run directly in the
13+
pytest process — they are safe, faster, and maintain normal pytest protocol
14+
(fixture setup/teardown, SetupState tracking, etc.).
15+
16+
This replaces the previous ``@pytest.mark.forked`` approach, which bypassed
17+
pytest's ``SetupState`` for every test and caused fixture teardown errors
18+
(``AssertionError: previous item was not torn down properly``) when
19+
transitioning to non-forked tests in CI.
1020
11-
See datasets_local/specifications_refactorfi/02_ci_segfault_investigation.md
12-
for details on why this structure was chosen.
21+
See:
22+
- datasets_local/specifications_refactorfi/02_ci_segfault_investigation.md
23+
- datasets_local/specifications_refactorfi/07_ci_warnings_error_report_proposal.md §2.1.6
1324
1425
Usage:
1526
pytest test_training_feature_importances.py -v
1627
"""
1728

29+
import gc
30+
import signal
31+
import subprocess
32+
import sys
1833
import warnings
34+
from pathlib import Path
1935

2036
import numpy as np
2137
import pandas as pd
@@ -46,6 +62,14 @@
4662
# "calculate_fi_shap", # Excluded: kernel SHAP is too slow/memory-heavy for CI
4763
]
4864

65+
# Methods that require subprocess isolation due to segfault risk
66+
_SUBPROCESS_FI_METHODS = frozenset(
67+
{
68+
"calculate_fi_featuresused_shap",
69+
"calculate_fi_shap",
70+
}
71+
)
72+
4973
ML_TYPE_CONFIGS = {
5074
MLType.BINARY: {
5175
"target_assignments": {"target": "target_class"},
@@ -238,22 +262,13 @@ def _run_fi_method(training: Training, method_name: str) -> list[str]:
238262
raise ValueError(f"Unknown method: {method_name}")
239263

240264

241-
@pytest.mark.forked
242-
@pytest.mark.parametrize("ml_type,model_name,fi_method", _generate_model_fi_params())
243-
def test_feature_importance(ml_type, model_name, fi_method):
244-
"""Test a single FI method for a single model in an isolated subprocess.
245-
246-
Each (model, FI method) combination runs in its own forked process
247-
(``@pytest.mark.forked``). This means:
248-
249-
- A segfault clearly identifies the exact (model, method) that crashed
250-
- Complete process isolation prevents CatBoost GC segfaults,
251-
numba/llvmlite LLVM crashes, and memory accumulation
252-
- The trade-off is fitting the model once per test (slightly slower)
253-
254-
Test IDs look like::
265+
def _run_test_body(ml_type: MLType, model_name: str, fi_method: str) -> None:
266+
"""Core test logic — called in-process or from subprocess entry point.
255267
256-
test_feature_importance[binary-CatBoostClassifier-calculate_fi_featuresused_shap]
268+
Args:
269+
ml_type: The machine learning task type.
270+
model_name: Name of the model to test.
271+
fi_method: Name of the feature importance method to run.
257272
"""
258273
warnings.filterwarnings("ignore")
259274

@@ -278,3 +293,101 @@ def test_feature_importance(ml_type, model_name, fi_method):
278293
# built-in feature importances (e.g. GaussianProcess, SVM with non-linear kernel)
279294
if fi_method != "calculate_fi_internal":
280295
assert len(fi_data) > 0, f"Feature importance '{key}' is empty after {fi_method}"
296+
297+
298+
def _run_in_subprocess(ml_type: MLType, model_name: str, fi_method: str) -> None:
299+
"""Run a single FI test in an isolated subprocess.
300+
301+
Invokes this module's ``__main__`` block in a fresh Python interpreter
302+
via ``subprocess.run()``. This provides complete process isolation
303+
without using ``os.fork()`` (which is deprecated in multi-threaded
304+
processes on Python 3.12+).
305+
306+
Args:
307+
ml_type: The machine learning task type.
308+
model_name: Name of the model to test.
309+
fi_method: Name of the feature importance method to run.
310+
311+
Raises:
312+
pytest.fail: If the subprocess exits with a non-zero code or crashes.
313+
"""
314+
result = subprocess.run(
315+
[
316+
sys.executable,
317+
str(Path(__file__)),
318+
"--ml-type",
319+
ml_type.value,
320+
"--model-name",
321+
model_name,
322+
"--fi-method",
323+
fi_method,
324+
],
325+
capture_output=True,
326+
text=True,
327+
timeout=300,
328+
check=False,
329+
)
330+
331+
if result.returncode != 0:
332+
# Truncate output to avoid overwhelming the test report
333+
stdout_tail = result.stdout[-2000:] if result.stdout else "(empty)"
334+
stderr_tail = result.stderr[-2000:] if result.stderr else "(empty)"
335+
336+
if result.returncode < 0:
337+
# Negative return code means the process was killed by a signal
338+
try:
339+
sig_name = signal.Signals(-result.returncode).name
340+
except (ValueError, AttributeError):
341+
sig_name = f"signal {-result.returncode}"
342+
header = f"Subprocess CRASHED ({sig_name})"
343+
else:
344+
header = "Subprocess test failed"
345+
346+
pytest.fail(
347+
f"{header} for {ml_type.value}-{model_name}-{fi_method}\n"
348+
f"Exit code: {result.returncode}\n"
349+
f"stdout:\n{stdout_tail}\n"
350+
f"stderr:\n{stderr_tail}"
351+
)
352+
353+
354+
@pytest.mark.parametrize("ml_type,model_name,fi_method", _generate_model_fi_params())
355+
def test_feature_importance(ml_type, model_name, fi_method):
356+
"""Test a single FI method for a single model.
357+
358+
SHAP-based methods run in isolated subprocesses to prevent segfaults
359+
from CatBoost GC, numba/LLVM crashes, and memory accumulation.
360+
All other FI methods run directly in the pytest process for speed
361+
and proper pytest protocol compliance.
362+
363+
Test IDs look like::
364+
365+
test_feature_importance[binary-CatBoostClassifier-calculate_fi_featuresused_shap]
366+
"""
367+
if fi_method in _SUBPROCESS_FI_METHODS:
368+
_run_in_subprocess(ml_type, model_name, fi_method)
369+
else:
370+
_run_test_body(ml_type, model_name, fi_method)
371+
gc.collect()
372+
373+
374+
# ---------------------------------------------------------------------------
375+
# Subprocess entry point
376+
# ---------------------------------------------------------------------------
377+
# When this module is executed as a script (via _run_in_subprocess), it runs
378+
# a single (ml_type, model_name, fi_method) combination and exits. A non-zero
379+
# exit code (including signal-based crashes) is detected by the parent pytest
380+
# process and reported as a test failure with full diagnostics.
381+
# ---------------------------------------------------------------------------
382+
if __name__ == "__main__":
383+
import argparse
384+
385+
parser = argparse.ArgumentParser(
386+
description="Run a single feature-importance test in isolation.",
387+
)
388+
parser.add_argument("--ml-type", required=True, help="MLType enum value")
389+
parser.add_argument("--model-name", required=True, help="Model registry name")
390+
parser.add_argument("--fi-method", required=True, help="FI method name")
391+
args = parser.parse_args()
392+
393+
_run_test_body(MLType(args.ml_type), args.model_name, args.fi_method)

0 commit comments

Comments
 (0)