Skip to content

Commit 4ab6871

Browse files
authored
Add shader dump when compile triton (#4482)
Add dump information for debug when autotune the Triton kernel: 1. Print the Triton cache director when dump the autotuner information with `TRITON_PRINT_AUTOTUNING` 2. Add a new knob `TRITON_INTEL_ENABLE_IGC_SHADER_DUMP` to dump the shader information in IGC when compiling the Triton kernel. Signed-off-by: Lu,Chengjun <[email protected]>
1 parent 956c21d commit 4ab6871

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

python/triton/compiler/compiler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ def compile(src, target=None, options=None):
320320
**options.__dict__,
321321
**env_vars,
322322
}
323+
324+
metadata["cache_dir"] = fn_cache_manager.cache_dir
323325
metadata["triton_version"] = __version__
324326
# run compilation pipeline and populate metadata
325327
stages = dict()

python/triton/knobs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ class nvidia_knobs(base_knobs):
489489
class intel_knobs(base_knobs):
490490
spirv_dis: env_intel_tool = env_intel_tool("spirv-dis")
491491

492+
dump_shader_info: env_bool = env_bool("TRITON_INTEL_ENABLE_IGC_SHADER_DUMP", False)
492493
gen_native_code: env_bool = env_bool("TRITON_XPU_GEN_NATIVE_CODE", False)
493494
tile_load_ll: env_bool = env_bool("TRITON_XPU_ENABLE_TILE_LOAD_LINEAR_LAYOUT", True)
494495
advanced_path: env_bool = env_bool("TRITON_INTEL_ADVANCED_PATH", False)

third_party/intel/backend/compiler.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ def __post_init__(self):
5555
object.__setattr__(self, 'extern_libs', tuple(extern_libs.items()))
5656
if self.num_warps <= 0 or (self.num_warps & (self.num_warps - 1)) != 0:
5757
raise AssertionError("num_warps must be a power of 2")
58-
self.generate_native_code = knobs.intel.gen_native_code or self.generate_native_code
58+
self.generate_native_code = (knobs.intel.gen_native_code
59+
or knobs.intel.dump_shader_info) or self.generate_native_code
5960

6061
def hash(self):
6162
key = '_'.join([f'{name}-{val}' for name, val in self.__dict__.items()])
@@ -390,6 +391,13 @@ def make_spv(src, metadata, options):
390391
if knobs.intel.disable_igc_opt:
391392
metadata["build_flags"] += " -cl-opt-disable"
392393

394+
shader_dump_opt = ""
395+
if knobs.intel.dump_shader_info:
396+
# The IGC (Intel Graphic Compiler) only parses the options at first time in JIT-ing the binary per process.
397+
# Have to use the `ocloc` to generate the binary in sub-process to work around the limitation.
398+
assert options.generate_native_code, "Only support native code generation with shader dump"
399+
shader_dump_opt = f" -igc_opts ',DumpToCustomDir={metadata['cache_dir']},ShaderDumpEnable=1'"
400+
393401
metadata["generate_native_code"] = options.generate_native_code
394402

395403
if options.generate_native_code:
@@ -401,7 +409,7 @@ def make_spv(src, metadata, options):
401409

402410
ocloc_cmd = [
403411
'ocloc', 'compile', '-file', fsrc.name, '-o', fbin, '-spirv_input', '-device', 'pvc', '-options',
404-
metadata["build_flags"]
412+
metadata["build_flags"] + shader_dump_opt
405413
]
406414

407415
try:
@@ -417,7 +425,7 @@ def make_spv(src, metadata, options):
417425
"""
418426
metadata["build_flags"] += " -cl-intel-256-GRF-per-thread"
419427
# re-run with new build flags
420-
ocloc_cmd[-1] = metadata["build_flags"]
428+
ocloc_cmd[-1] = metadata["build_flags"] + shader_dump_opt
421429
subprocess.run(ocloc_cmd, check=True, close_fds=False, stdout=flog,
422430
stderr=subprocess.STDOUT)
423431
os.remove(flog.name)

0 commit comments

Comments
 (0)