-
Notifications
You must be signed in to change notification settings - Fork 51
Open
Description
Env:
Intel Xeon Granite Rapids
Ubuntu 24.04
Python 3.14-nogil (freethreading enabled)
pip show torch transformers kernels
Name: torch
Version: 2.10.0+cpu
Name: transformers
Version: 5.3.0
Name: kernels
Version: 0.12.2
I have no idea what's go. Was the kernel incorrectly compiled (possiblity 1), thread race in the importer (strange but possibility 2).
Run below:
OMP_NUM_THREADS=32 MKL_NUM_THREADS=32 OPENBLAS_NUM_THREADS=32 NUMEXPR_NUM_THREADS=32 python scripts/repro_hf_kernel_import_crash.py --mode both
Reproducing Script:
#!/usr/bin/env python3
"""
Standalone reproducer for HF kernels import crashes.
This script intentionally does not import gptqmodel.
It only uses torch + kernels package APIs.
"""
from __future__ import annotations
import argparse
import faulthandler
import os
import platform
import sys
import sysconfig
import traceback
from pathlib import Path
def _print_runtime_env() -> None:
print("=== Runtime ===", flush=True)
print(f"python: {sys.version}", flush=True)
print(f"platform: {platform.platform()}", flush=True)
print(f"machine: {platform.machine()}", flush=True)
print(f"Py_GIL_DISABLED: {sysconfig.get_config_var('Py_GIL_DISABLED')}", flush=True)
if hasattr(sys, "_is_gil_enabled"):
print(f"sys._is_gil_enabled(): {sys._is_gil_enabled()}", flush=True)
for key in (
"PYTHON_GIL",
"OMP_NUM_THREADS",
"MKL_NUM_THREADS",
"OPENBLAS_NUM_THREADS",
"NUMEXPR_NUM_THREADS",
):
print(f"{key}={os.getenv(key)}", flush=True)
try:
import torch
print(f"torch: {torch.__version__}", flush=True)
print(f"torch.compiled_with_cxx11_abi(): {torch.compiled_with_cxx11_abi()}", flush=True)
except Exception as exc:
print(f"torch import failed: {exc!r}", flush=True)
try:
import kernels
print(f"kernels package: {kernels.__file__}", flush=True)
except Exception as exc:
print(f"kernels import failed: {exc!r}", flush=True)
def _select_cpu_variant(variants: list[Path]) -> Path:
cpu_variants = [path for path in variants if "-cpu-" in path.name]
if not cpu_variants:
raise RuntimeError("No CPU kernel variants found in build directory.")
machine = platform.machine().lower()
matched = [path for path in cpu_variants if machine in path.name.lower()]
if matched:
return sorted(matched, key=lambda p: p.name)[0]
return sorted(cpu_variants, key=lambda p: p.name)[0]
def _repro_get_kernel(repo_id: str) -> None:
print("\n=== Repro Path: kernels.get_kernel ===", flush=True)
from kernels import get_kernel
print(f"calling get_kernel({repo_id!r})", flush=True)
kernel = get_kernel(repo_id)
print(f"kernel object: {kernel!r}", flush=True)
print(f"gemm_int4_forward: {getattr(kernel, 'gemm_int4_forward')!r}", flush=True)
def _repro_import_variant(repo_id: str, revision: str) -> None:
print("\n=== Repro Path: kernels.utils._import_from_path ===", flush=True)
from kernels.utils import _import_from_path, install_kernel_all_variants, package_name_from_repo_id
print(f"installing kernel variants for repo={repo_id!r}, revision={revision!r}", flush=True)
build_dir = Path(install_kernel_all_variants(repo_id, revision=revision))
print(f"build_dir: {build_dir}", flush=True)
variants = sorted([path for path in build_dir.iterdir() if path.is_dir()], key=lambda p: p.name)
for variant in variants:
print(f"variant: {variant.name}", flush=True)
selected = _select_cpu_variant(variants)
package_name = package_name_from_repo_id(repo_id)
print(f"selected cpu variant: {selected.name}", flush=True)
print(f"package_name: {package_name}", flush=True)
print("importing extension module via _import_from_path(...)", flush=True)
module = _import_from_path(package_name, selected)
print(f"module: {module!r}", flush=True)
print(f"gemm_int4_forward: {getattr(module, 'gemm_int4_forward')!r}", flush=True)
def main() -> int:
parser = argparse.ArgumentParser(description="Standalone HF kernel import crash reproducer.")
parser.add_argument("--repo-id", default="kernels-community/quantization-gptq")
parser.add_argument("--revision", default="main")
parser.add_argument(
"--mode",
choices=("both", "get-kernel", "import-variant"),
default="both",
help="Which import path to run.",
)
args = parser.parse_args()
faulthandler.enable(all_threads=True)
_print_runtime_env()
try:
if args.mode in ("both", "get-kernel"):
_repro_get_kernel(args.repo_id)
if args.mode in ("both", "import-variant"):
_repro_import_variant(args.repo_id, args.revision)
except Exception:
print("\nException caught (non-segfault path):", flush=True)
traceback.print_exc()
return 1
print("\nCompleted without Python exception.", flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())Segfault Output
(vm314t) root@gpu:~/gptqmodel# OMP_NUM_THREADS=32 MKL_NUM_THREADS=32 OPENBLAS_NUM_THREADS=32 NUMEXPR_NUM_THREADS=32 python scripts/repro_hf_kernel_import_crash.py --mode both
=== Runtime === 20 0 24672 5320 3356 S 0.0 0.0 0:00.08 systemd-udevd
python: 3.14.3 free-threading build (main, Feb 4 2026, 09:28:29) [GCC 13.3.0]
platform: Linux-6.18.6-x86_64-with-glibc2.39
machine: x86_64
Py_GIL_DISABLED: 1
sys._is_gil_enabled(): False
PYTHON_GIL=None
OMP_NUM_THREADS=32
MKL_NUM_THREADS=32
OPENBLAS_NUM_THREADS=32
NUMEXPR_NUM_THREADS=32
torch: 2.10.0+cpu
torch.compiled_with_cxx11_abi(): True
kernels package: /root/vm314t/lib/python3.14t/site-packages/kernels/__init__.py
=== Repro Path: kernels.get_kernel ===
calling get_kernel('kernels-community/quantization-gptq')
Fetching 6 files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 3978.79it/s]
Download complete: : 0.00B [00:00, ?B/s] Fatal Python error: Segmentation fault | 0/6 [00:00<?, ?it/s]
Thread 0x00007839257646c0 [Thread-1] (most recent call first):
File "/usr/lib/python3.14/threading.py", line 373 in wait
File "/usr/lib/python3.14/threading.py", line 670 in wait
File "/root/vm314t/lib/python3.14t/site-packages/tqdm/_monitor.py", line 60 in run
File "/usr/lib/python3.14/threading.py", line 1082 in _bootstrap_inner
File "/usr/lib/python3.14/threading.py", line 1044 in _bootstrap
Current thread 0x000078398f2cd080 [python] (most recent call first):
File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
File "<frozen importlib._bootstrap_external>", line 1053 in create_module
File "<frozen importlib._bootstrap>", line 816 in module_from_spec
File "<frozen importlib._bootstrap>", line 924 in _load_unlocked
File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1426 in _handle_fromlist
File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/_ops.py", line 2 in <module>
File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
File "<frozen importlib._bootstrap_external>", line 759 in exec_module
File "<frozen importlib._bootstrap>", line 938 in _load_unlocked
File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/custom_ops.py", line 2 in <module>
File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
File "<frozen importlib._bootstrap_external>", line 759 in exec_module
File "<frozen importlib._bootstrap>", line 938 in _load_unlocked
File "<frozen importlib._bootstrap>", line 1342 in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 1371 in _find_and_load
File "/root/.cache/huggingface/hub/models--kernels-community--quantization-gptq/snapshots/cf6f3f1052ececfc65659134b4ac534eae4dea15/build/torch210-cxx11-cpu-x86_64-linux/__init__.py", line 1 in <module>
File "<frozen importlib._bootstrap>", line 491 in _call_with_frames_removed
File "<frozen importlib._bootstrap_external>", line 759 in exec_module
File "/root/vm314t/lib/python3.14t/site-packages/kernels/utils.py", line 158 in _import_from_path
File "/root/vm314t/lib/python3.14t/site-packages/kernels/utils.py", line 315 in get_kernel
File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 75 in _repro_get_kernel
File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 118 in main
File "/root/gptqmodel/scripts/repro_hf_kernel_import_crash.py", line 131 in <module>
Current thread's C stack trace (most recent call first):
Binary file "python", at _Py_DumpStack+0x4a [0x67808a]
Binary file "python" [0x71377a]
Binary file "/lib/x86_64-linux-gnu/libc.so.6", at +0x45330 [0x78398f313330]
Binary file "/lib/x86_64-linux-gnu/libc.so.6", at +0x19b136 [0x78398f469136]
Binary file "python" [0x518169]
Binary file "python" [0x62e9d3]
Binary file "python" [0x7d56ff]
Binary file "python" [0x7d6032]
Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
Binary file "python" [0x5efa6e]
Binary file "python" [0x49e9ae]
Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
Binary file "python", at PyImport_ImportModuleLevelObject+0x791 [0x635621]
Binary file "python" [0x5e3ba4]
Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
Binary file "python" [0x5efa6e]
Binary file "python" [0x49e9ae]
Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
Binary file "python", at PyImport_ImportModuleLevelObject+0x7ea [0x63567a]
Binary file "python", at _PyEval_ImportName+0xcf [0x5f141f]
Binary file "python", at _PyEval_EvalFrameDefault+0xde0 [0x4282f0]
Binary file "python" [0x7c2f95]
Binary file "python", at PyEval_EvalCode+0xab [0x5ec80b]
Binary file "python" [0x5e55ac]
Binary file "python", at _PyEval_EvalFrameDefault+0x8ed1 [0x4303e1]
Binary file "python" [0x5efa6e]
Binary file "python" [0x49e9ae]
Binary file "python", at PyObject_CallMethodObjArgs+0xec [0x49f4ec]
Binary file "python", at PyImport_ImportModuleLevelObject+0x791 [0x635621]
Binary file "python", at _PyEval_ImportName+0xcf [0x5f141f]
Binary file "python", at _PyEval_EvalFrameDefault+0xde0 [0x4282f0]
Binary file "python" [0x7c2f95]
<truncated rest of calls>
Extension modules: numpy._core._multiarray_umath, numpy.linalg._umath_linalg, torch._C, torch._C._dynamo.autograd_compiler, torch._C._dynamo.eval_frame, torch._C._dynamo.guards, torch._C._dynamo.utils, torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._nn, torch._C._sparse, torch._C._special, yaml._yaml (total: 14)
Segmentation fault
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels