11"""Feature Importance Test Suite for Training Class.
22
33Tests all feature importance methods across all available models.
4- Each test is marked with ``@pytest.mark.forked`` so it runs in its own
5- subprocess — this provides complete isolation between tests, preventing:
4+
5+ SHAP-based tests (``calculate_fi_featuresused_shap``) run in isolated
6+ subprocesses via ``subprocess.run()`` to prevent:
67
78- CatBoost's C++ destructor segfault when Python's GC finalizes objects
89- numba/llvmlite LLVM pass-manager crash from accumulated JIT compilations
9- - Memory accumulation from session-scoped model caches
10+ - SHAP + CatBoost + NaN background data segfaults
11+
12+ All other FI methods (internal, permutation, LOFO) run directly in the
13+ pytest process — they are safe, faster, and maintain normal pytest protocol
14+ (fixture setup/teardown, SetupState tracking, etc.).
15+
16+ This replaces the previous ``@pytest.mark.forked`` approach, which bypassed
17+ pytest's ``SetupState`` for every test and caused fixture teardown errors
18+ (``AssertionError: previous item was not torn down properly``) when
19+ transitioning to non-forked tests in CI.
1020
11- See datasets_local/specifications_refactorfi/02_ci_segfault_investigation.md
12- for details on why this structure was chosen.
21+ See:
22+ - datasets_local/specifications_refactorfi/02_ci_segfault_investigation.md
23+ - datasets_local/specifications_refactorfi/07_ci_warnings_error_report_proposal.md §2.1.6
1324
1425Usage:
1526 pytest test_training_feature_importances.py -v
1627"""
1728
29+ import gc
30+ import signal
31+ import subprocess
32+ import sys
1833import warnings
34+ from pathlib import Path
1935
2036import numpy as np
2137import pandas as pd
4662 # "calculate_fi_shap", # Excluded: kernel SHAP is too slow/memory-heavy for CI
4763]
4864
65+ # Methods that require subprocess isolation due to segfault risk
66+ _SUBPROCESS_FI_METHODS = frozenset (
67+ {
68+ "calculate_fi_featuresused_shap" ,
69+ "calculate_fi_shap" ,
70+ }
71+ )
72+
4973ML_TYPE_CONFIGS = {
5074 MLType .BINARY : {
5175 "target_assignments" : {"target" : "target_class" },
@@ -238,22 +262,13 @@ def _run_fi_method(training: Training, method_name: str) -> list[str]:
238262 raise ValueError (f"Unknown method: { method_name } " )
239263
240264
241- @pytest .mark .forked
242- @pytest .mark .parametrize ("ml_type,model_name,fi_method" , _generate_model_fi_params ())
243- def test_feature_importance (ml_type , model_name , fi_method ):
244- """Test a single FI method for a single model in an isolated subprocess.
245-
246- Each (model, FI method) combination runs in its own forked process
247- (``@pytest.mark.forked``). This means:
248-
249- - A segfault clearly identifies the exact (model, method) that crashed
250- - Complete process isolation prevents CatBoost GC segfaults,
251- numba/llvmlite LLVM crashes, and memory accumulation
252- - The trade-off is fitting the model once per test (slightly slower)
253-
254- Test IDs look like::
265+ def _run_test_body (ml_type : MLType , model_name : str , fi_method : str ) -> None :
266+ """Core test logic — called in-process or from subprocess entry point.
255267
256- test_feature_importance[binary-CatBoostClassifier-calculate_fi_featuresused_shap]
268+ Args:
269+ ml_type: The machine learning task type.
270+ model_name: Name of the model to test.
271+ fi_method: Name of the feature importance method to run.
257272 """
258273 warnings .filterwarnings ("ignore" )
259274
@@ -278,3 +293,101 @@ def test_feature_importance(ml_type, model_name, fi_method):
278293 # built-in feature importances (e.g. GaussianProcess, SVM with non-linear kernel)
279294 if fi_method != "calculate_fi_internal" :
280295 assert len (fi_data ) > 0 , f"Feature importance '{ key } ' is empty after { fi_method } "
296+
297+
298+ def _run_in_subprocess (ml_type : MLType , model_name : str , fi_method : str ) -> None :
299+ """Run a single FI test in an isolated subprocess.
300+
301+ Invokes this module's ``__main__`` block in a fresh Python interpreter
302+ via ``subprocess.run()``. This provides complete process isolation
303+ without using ``os.fork()`` (which is deprecated in multi-threaded
304+ processes on Python 3.12+).
305+
306+ Args:
307+ ml_type: The machine learning task type.
308+ model_name: Name of the model to test.
309+ fi_method: Name of the feature importance method to run.
310+
311+ Raises:
312+ pytest.fail: If the subprocess exits with a non-zero code or crashes.
313+ """
314+ result = subprocess .run (
315+ [
316+ sys .executable ,
317+ str (Path (__file__ )),
318+ "--ml-type" ,
319+ ml_type .value ,
320+ "--model-name" ,
321+ model_name ,
322+ "--fi-method" ,
323+ fi_method ,
324+ ],
325+ capture_output = True ,
326+ text = True ,
327+ timeout = 300 ,
328+ check = False ,
329+ )
330+
331+ if result .returncode != 0 :
332+ # Truncate output to avoid overwhelming the test report
333+ stdout_tail = result .stdout [- 2000 :] if result .stdout else "(empty)"
334+ stderr_tail = result .stderr [- 2000 :] if result .stderr else "(empty)"
335+
336+ if result .returncode < 0 :
337+ # Negative return code means the process was killed by a signal
338+ try :
339+ sig_name = signal .Signals (- result .returncode ).name
340+ except (ValueError , AttributeError ):
341+ sig_name = f"signal { - result .returncode } "
342+ header = f"Subprocess CRASHED ({ sig_name } )"
343+ else :
344+ header = "Subprocess test failed"
345+
346+ pytest .fail (
347+ f"{ header } for { ml_type .value } -{ model_name } -{ fi_method } \n "
348+ f"Exit code: { result .returncode } \n "
349+ f"stdout:\n { stdout_tail } \n "
350+ f"stderr:\n { stderr_tail } "
351+ )
352+
353+
354+ @pytest .mark .parametrize ("ml_type,model_name,fi_method" , _generate_model_fi_params ())
355+ def test_feature_importance (ml_type , model_name , fi_method ):
356+ """Test a single FI method for a single model.
357+
358+ SHAP-based methods run in isolated subprocesses to prevent segfaults
359+ from CatBoost GC, numba/LLVM crashes, and memory accumulation.
360+ All other FI methods run directly in the pytest process for speed
361+ and proper pytest protocol compliance.
362+
363+ Test IDs look like::
364+
365+ test_feature_importance[binary-CatBoostClassifier-calculate_fi_featuresused_shap]
366+ """
367+ if fi_method in _SUBPROCESS_FI_METHODS :
368+ _run_in_subprocess (ml_type , model_name , fi_method )
369+ else :
370+ _run_test_body (ml_type , model_name , fi_method )
371+ gc .collect ()
372+
373+
374+ # ---------------------------------------------------------------------------
375+ # Subprocess entry point
376+ # ---------------------------------------------------------------------------
377+ # When this module is executed as a script (via _run_in_subprocess), it runs
378+ # a single (ml_type, model_name, fi_method) combination and exits. A non-zero
379+ # exit code (including signal-based crashes) is detected by the parent pytest
380+ # process and reported as a test failure with full diagnostics.
381+ # ---------------------------------------------------------------------------
382+ if __name__ == "__main__" :
383+ import argparse
384+
385+ parser = argparse .ArgumentParser (
386+ description = "Run a single feature-importance test in isolation." ,
387+ )
388+ parser .add_argument ("--ml-type" , required = True , help = "MLType enum value" )
389+ parser .add_argument ("--model-name" , required = True , help = "Model registry name" )
390+ parser .add_argument ("--fi-method" , required = True , help = "FI method name" )
391+ args = parser .parse_args ()
392+
393+ _run_test_body (MLType (args .ml_type ), args .model_name , args .fi_method )
0 commit comments