Skip to content

gptq kernel segfaults on load/import on Python 3.14-nogil #312

@Qubitium

Description

@Qubitium

Env:

Intel Xeon Granite Rapids
Ubuntu 24.04

Python 3.14-nogil (freethreading enabled)

pip show torch transformers kernels
Name: torch
Version: 2.10.0+cpu

Name: transformers
Version: 5.3.0

Name: kernels
Version: 0.12.2

@danieldk @MekkCyber

I have no idea what's go. Was the kernel incorrectly compiled (possiblity 1), thread race in the importer (strange but possibility 2).

Run below:

OMP_NUM_THREADS=32 MKL_NUM_THREADS=32 OPENBLAS_NUM_THREADS=32 NUMEXPR_NUM_THREADS=32 python scripts/repro_hf_kernel_import_crash.py --mode both

Reproducing Script:

#!/usr/bin/env python3
"""
Standalone reproducer for HF kernels import crashes.

This script intentionally does not import gptqmodel.
It only uses torch + kernels package APIs.
"""

from __future__ import annotations

import argparse
import faulthandler
import os
import platform
import sys
import sysconfig
import traceback
from pathlib import Path


def _print_runtime_env() -> None:
    print("=== Runtime ===", flush=True)
    print(f"python: {sys.version}", flush=True)
    print(f"platform: {platform.platform()}", flush=True)
    print(f"machine: {platform.machine()}", flush=True)
    print(f"Py_GIL_DISABLED: {sysconfig.get_config_var('Py_GIL_DISABLED')}", flush=True)
    if hasattr(sys, "_is_gil_enabled"):
        print(f"sys._is_gil_enabled(): {sys._is_gil_enabled()}", flush=True)

    for key in (
        "PYTHON_GIL",
        "OMP_NUM_THREADS",
        "MKL_NUM_THREADS",
        "OPENBLAS_NUM_THREADS",
        "NUMEXPR_NUM_THREADS",
    ):
        print(f"{key}={os.getenv(key)}", flush=True)

    try:
        import torch

        print(f"torch: {torch.__version__}", flush=True)
        print(f"torch.compiled_with_cxx11_abi(): {torch.compiled_with_cxx11_abi()}", flush=True)
    except Exception as exc:
        print(f"torch import failed: {exc!r}", flush=True)

    try:
        import kernels

        print(f"kernels package: {kernels.__file__}", flush=True)
    except Exception as exc:
        print(f"kernels import failed: {exc!r}", flush=True)


def _select_cpu_variant(variants: list[Path]) -> Path:
    cpu_variants = [path for path in variants if "-cpu-" in path.name]
    if not cpu_variants:
        raise RuntimeError("No CPU kernel variants found in build directory.")

    machine = platform.machine().lower()
    matched = [path for path in cpu_variants if machine in path.name.lower()]
    if matched:
        return sorted(matched, key=lambda p: p.name)[0]
    return sorted(cpu_variants, key=lambda p: p.name)[0]


def _repro_get_kernel(repo_id: str) -> None:
    print("\n=== Repro Path: kernels.get_kernel ===", flush=True)
    from kernels import get_kernel

    print(f"calling get_kernel({repo_id!r})", flush=True)
    kernel = get_kernel(repo_id)
    print(f"kernel object: {kernel!r}", flush=True)
    print(f"gemm_int4_forward: {getattr(kernel, 'gemm_int4_forward')!r}", flush=True)


def _repro_import_variant(repo_id: str, revision: str) -> None:
    print("\n=== Repro Path: kernels.utils._import_from_path ===", flush=True)
    from kernels.utils import _import_from_path, install_kernel_all_variants, package_name_from_repo_id

    print(f"installing kernel variants for repo={repo_id!r}, revision={revision!r}", flush=True)
    build_dir = Path(install_kernel_all_variants(repo_id, revision=revision))
    print(f"build_dir: {build_dir}", flush=True)
    variants = sorted([path for path in build_dir.iterdir() if path.is_dir()], key=lambda p: p.name)
    for variant in variants:
        print(f"variant: {variant.name}", flush=True)

    selected = _select_cpu_variant(variants)
    package_name = package_name_from_repo_id(repo_id)
    print(f"selected cpu variant: {selected.name}", flush=True)
    print(f"package_name: {package_name}", flush=True)
    print("importing extension module via _import_from_path(...)", flush=True)
    module = _import_from_path(package_name, selected)
    print(f"module: {module!r}", flush=True)
    print(f"gemm_int4_forward: {getattr(module, 'gemm_int4_forward')!r}", flush=True)


def main() -> int:
    parser = argparse.ArgumentParser(description="Standalone HF kernel import crash reproducer.")
    parser.add_argument("--repo-id", default="kernels-community/quantization-gptq")
    parser.add_argument("--revision", default="main")
    parser.add_argument(
        "--mode",
        choices=("both", "get-kernel", "import-variant"),
        default="both",
        help="Which import path to run.",
    )
    args = parser.parse_args()

    faulthandler.enable(all_threads=True)
    _print_runtime_env()

    try:
        if args.mode in ("both", "get-kernel"):
            _repro_get_kernel(args.repo_id)
        if args.mode in ("both", "import-variant"):
            _repro_import_variant(args.repo_id, args.revision)
    except Exception:
        print("\nException caught (non-segfault path):", flush=True)
        traceback.print_exc()
        return 1

    print("\nCompleted without Python exception.", flush=True)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())

Segfault Output

(vm314t) root@gpu:~/gptqmodel#  OMP_NUM_THREADS=32 MKL_NUM_THREADS=32 OPENBLAS_NUM_THREADS=32 NUMEXPR_NUM_THREADS=32 python scripts/repro_hf_kernel_import_crash.py --mode both                                                            
=== Runtime ===   20   0   24672   5320   3356 S   0.0   0.0   0:00.08 systemd-udevd                                                                                                                                                          
python: 3.14.3 free-threading build (main, Feb  4 2026, 09:28:29) [GCC 13.3.0]
platform: Linux-6.18.6-x86_64-with-glibc2.39
machine: x86_64
Py_GIL_DISABLED: 1
sys._is_gil_enabled(): False
PYTHON_GIL=None
OMP_NUM_THREADS=32
MKL_NUM_THREADS=32
OPENBLAS_NUM_THREADS=32
NUMEXPR_NUM_THREADS=32
torch: 2.10.0+cpu
torch.compiled_with_cxx11_abi(): True
kernels package: /root/vm314t/lib/python3.14t/site-packages/kernels/__init__.py

=== Repro Path: kernels.get_kernel ===
calling get_kernel('kernels-community/quantization-gptq')
Fetching 6 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 3978.79it/s]
Download complete: : 0.00B [00:00, ?B/s]              Fatal Python error: Segmentation fault                                                                                                                            | 0/6 [00:00<?, ?it/s]

Thread 0x00007839257646c0 [Thread-1] (most recent call first):
  File "/usr/lib/python3.14/threading.py", line 373 in wait
  File "/usr/lib/python3.14/threading.py", line 670 in wait
  File "/root/vm314t/lib/python3.14t/site-packages/tqdm/_monitor.py", line 60 in run
  File "/usr/lib/python3.14/threading.py", line 1082 in _bootstrap_inner
  File "/usr/lib/python3.14/threading.py", line 1044 in _bootstrap

Current thread 0x000078398f2cd080 [python] (most recent call first):
  File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
  File "<frozen importlib._bootstrap_external>", line 1053 in create_module
  File "<frozen importlib._bootstrap>", line 816 in module_from_spec
  File "<frozen importlib._bootstrap>", line 924 in _load_unlocked
  File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
  File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
  File "<frozen importlib._bootstrap>", line 1426 in _handle_fromlist
  File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/_ops.py", line 2 in <module>
  File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
  File "<frozen importlib._bootstrap_external>", line 759 in exec_module
  File "<frozen importlib._bootstrap>", line 938 in _load_unlocked
  File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
  File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/custom_ops.py", line 2 in <module>
  File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
  File "<frozen importlib._bootstrap_external>", line 759 in exec_module
  File "<frozen importlib._bootstrap>", line 938 in _load_unlocked
  File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
  File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/__init__.py", line 1 in <module>
  File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
  File "<frozen importlib._bootstrap_external>", line 759 in exec_module
  File "/root/vm314t/lib/python3.14t/site-packages/kernels/utils.py", line 158 in _import_from_path
  File "/root/vm314t/lib/python3.14t/site-packages/kernels/utils.py", line 315 in get_kernel
  File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 75 in _repro_get_kernel
  File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 118 in main
  File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 131 in <module>

Current thread's C stack trace (most recent call first):
  Binary file "python", at _Py_DumpStack+0x4a [0x67808a]
  Binary file "python" [0x71377a]
  Binary file "/lib/x86_64-linux-gnu/libc.so.6", at +0x45330 [0x78398f313330]
  Binary file "/lib/x86_64-linux-gnu/libc.so.6", at +0x19b136 [0x78398f469136]
  Binary file "python" [0x518169]
  Binary file "python" [0x62e9d3]
  Binary file "python" [0x7d56ff]
  Binary file "python" [0x7d6032]
  Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
  Binary file "python" [0x5efa6e]
  Binary file "python" [0x49e9ae]
  Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
  Binary file "python", at PyImport_ImportModuleLevelObject+0x791 [0x635621]
  Binary file "python" [0x5e3ba4]
  Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
  Binary file "python" [0x5efa6e]
  Binary file "python" [0x49e9ae]
  Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
  Binary file "python", at PyImport_ImportModuleLevelObject+0x7ea [0x63567a]
  Binary file "python", at _PyEval_ImportName+0xcf [0x5f141f]
  Binary file "python", at _PyEval_EvalFrameDefault+0xde0 [0x4282f0]
  Binary file "python" [0x7c2f95]
  Binary file "python", at PyEval_EvalCode+0xab [0x5ec80b]
  Binary file "python" [0x5e55ac]
  Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
  Binary file "python" [0x5efa6e]
  Binary file "python" [0x49e9ae]
  Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
  Binary file "python", at PyImport_ImportModuleLevelObject+0x791 [0x635621]
  Binary file "python", at _PyEval_ImportName+0xcf [0x5f141f]
  Binary file "python", at _PyEval_EvalFrameDefault+0xde0 [0x4282f0]
  Binary file "python" [0x7c2f95]
  <truncated rest of calls>

Extension modules: numpy._core._multiarray_umath, numpy.linalg._umath_linalg, torch._C, torch._C._dynamo.autograd_compiler, torch._C._dynamo.eval_frame, torch._C._dynamo.guards, torch._C._dynamo.utils, torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._nn, torch._C._sparse, torch._C._special, yaml._yaml (total: 14)
Segmentation fault

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions