Skip to content

Commit 9c3563d

Browse files
authored
[BACKEND][NVIDIA] Update ptxas_options knobs default value (#8121)
This PR sets the default value of the ptxas_options knob to be the value of the `PTXAS_OPTIONS` environment variable. We need this option when we want to apply extra ptxas options across a range of Triton kernels, without changing their call sites. # New contributor declaration - [x] I have written a PR description following these [rules](https://cbea.ms/git-commit/#why-not-how). - [x] I have run `pre-commit run --from-ref origin/main --to-ref HEAD`. - Select one of the following. - [x] This PR does not need a test because this is a small change of reading ptxas options from environment variables. - Select one of the following. - [x] I have not added any `lit` tests.
1 parent 7871be2 commit 9c3563d

File tree

4 files changed

+15
-1
lines changed

4 files changed

+15
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ See [`python/triton/knobs.py`](python/triton/knobs.py) for the full list of conf
232232
- `TRITON_F32_DEFAULT` sets the default input precision of `tl.dot` when using 32-bit floats, which can be either `ieee`, `tf32`, or `tf32x3`.
233233
- `TRITON_FRONT_END_DEBUGGING=1` disables exception wrapping when an error occurs in the compiler frontend, allowing the full stack trace to be seen.
234234
- `TRITON_DISABLE_LINE_INFO=1` removes all line information from the module.
235+
- `PTXAS_OPTIONS` passes additional command-line options to the PTX assembler `ptxas` (only on NVIDIA).
235236

236237
> [!NOTE]
237238
> Some of these environment variables don't have a knob in `knobs.py`-- those are only relevant to the C++ layer(s), hence they don't exist in the python layer.

python/test/unit/test_knobs.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,30 +247,42 @@ def test_nvidia_tool(fresh_knobs, tmp_path, monkeypatch):
247247
default_ptxas = triton_root / "backends/nvidia/bin/ptxas"
248248

249249
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == default_ptxas.resolve()
250+
assert fresh_knobs.nvidia.ptxas_options is None
250251

251252
tmp_ptxas = tmp_path / "ptxas-special"
252253
shutil.copy(default_ptxas, tmp_ptxas)
253254
monkeypatch.setenv("TRITON_PTXAS_PATH", str(tmp_ptxas))
255+
monkeypatch.setenv("PTXAS_OPTIONS", "--verbose")
254256
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == tmp_ptxas.resolve()
257+
assert fresh_knobs.nvidia.ptxas_options == "--verbose"
255258

256259
# Don't prop so that the `del` is correctly tested
257260
fresh_knobs.propagate_env = False
258261
fresh_knobs.nvidia.ptxas = str(default_ptxas)
262+
fresh_knobs.nvidia.ptxas_options = "--device-debug"
259263
fresh_knobs.propagate_env = True
260264
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == default_ptxas.resolve()
265+
assert fresh_knobs.nvidia.ptxas_options == "--device-debug"
261266

262267
del fresh_knobs.nvidia.ptxas
268+
del fresh_knobs.nvidia.ptxas_options
263269
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == tmp_ptxas.resolve()
270+
assert fresh_knobs.nvidia.ptxas_options == "--verbose"
264271

265272
# Triple check scope works
266273
with fresh_knobs.nvidia.scope():
267274
fresh_knobs.nvidia.ptxas = str(default_ptxas)
275+
fresh_knobs.nvidia.ptxas_options = "--device-debug"
268276
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == default_ptxas.resolve()
277+
assert fresh_knobs.nvidia.ptxas_options == "--device-debug"
269278

270279
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == tmp_ptxas.resolve()
280+
assert fresh_knobs.nvidia.ptxas_options == "--verbose"
271281

272282
monkeypatch.delenv("TRITON_PTXAS_PATH")
283+
monkeypatch.delenv("PTXAS_OPTIONS")
273284
assert Path(fresh_knobs.nvidia.ptxas.path).resolve() == default_ptxas.resolve()
285+
assert fresh_knobs.nvidia.ptxas_options is None
274286

275287

276288
def test_opt_bool(fresh_knobs, monkeypatch):

python/triton/knobs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ class nvidia_knobs(base_knobs):
478478

479479
dump_nvptx: env_bool = env_bool("NVPTX_ENABLE_DUMP")
480480
disable_ptxas_opt: env_bool = env_bool("DISABLE_PTXAS_OPT")
481+
ptxas_options: env_opt_str = env_opt_str("PTXAS_OPTIONS")
481482
mock_ptx_version: env_opt_str = env_opt_str("TRITON_MOCK_PTX_VERSION")
482483
dump_ptxas_log: env_bool = env_bool("TRITON_DUMP_PTXAS_LOG")
483484

third_party/nvidia/backend/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class CUDAOptions:
113113
maxnreg: Optional[int] = None
114114
cluster_dims: tuple = (1, 1, 1)
115115
ptx_version: int = None
116-
ptx_options: str = None
116+
ptx_options: Optional[str] = knobs.nvidia.ptxas_options
117117
ir_override: Optional[str] = None # filename of a user-defined IR (*.{ttir|ttgir|llir|ptx})
118118
enable_fp_fusion: bool = True
119119
launch_cooperative_grid: bool = False

0 commit comments

Comments
 (0)