Skip to content

Commit 6163213

Browse files
committed
try o0
Signed-off-by: dchigarev <[email protected]>
1 parent 21565a3 commit 6163213

File tree

3 files changed

+7
-8
lines changed

3 files changed

+7
-8
lines changed

lib/Conversion/TritonGPUToLLVM/Utility.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ SmallVector<Value> loadSharedToDistributed(RankedTensorType dstTy,
666666
assert(vecTy.getNumElements() % 64 == 0);
667667
for (int i = 0; i < vecTy.getNumElements(); i+=64) {
668668
auto smallVecTy = vec_ty(elemLlvmTy, 64);
669-
auto vecAddrNew = gep(vecAddr.getType(), i32_ty, vecAddr, SmallVector<Value>({i32_val(i)}));
669+
auto vecAddrNew = gep(vecAddr.getType(), i32_ty, vecAddr, SmallVector<Value>({i32_val(i)}), true);
670670
auto vecVal = load(smallVecTy, vecAddrNew);
671671
vecVal.setAlignment(smallVecTy.getNumElements() *
672672
elemLlvmTy.getIntOrFloatBitWidth() / 8);

python/triton/compiler/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def compile(src, target=None, options=None):
298298
metadata_group[metadata_filename] = fn_cache_manager.put(json.dumps(metadata, default=vars), metadata_filename,
299299
binary=False)
300300
fn_cache_manager.put_group(metadata_filename, metadata_group)
301-
if os.environ.get("TR_PRINT_IR", "0") == "1":
301+
if os.environ.get("TR_PRINT_IR", "1") == "1":
302302
print("printing IR...")
303303
for name, path in metadata_group.items():
304304
print(f"==================== {name} ======================", flush=True)

third_party/intel/backend/compiler.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,10 @@ def __init__(self, target: tuple) -> None:
132132
raise TypeError("target.arch is not a dict")
133133
dirname = os.path.dirname(os.path.realpath(__file__))
134134
mod = compile_module_from_src(Path(os.path.join(dirname, "arch_parser.c")).read_text(), "arch_utils")
135-
# breakpoint()
136-
# self.device_arch = mod.parse_device_arch(target.arch.get('architecture', 0))
137-
# self.properties = self.parse_target(target.arch)
138-
self.device_arch = "dg2"
139-
self.properties = {'name': 'Intel(R) Arc(TM) A770 Graphics', 'platform_name': 'Intel(R) oneAPI Unified Runtime over Level-Zero', 'vendor': 'Intel(R) Corporation', 'version': '12.55.8', 'gpu_eu_count': 512, 'gpu_subslice_count': 32, 'max_work_group_size': 1024, 'max_num_sub_groups': 128, 'sub_group_sizes': [8, 16, 32], 'has_fp64': False, 'has_subgroup_matrix_multiply_accumulate': True, 'has_subgroup_matrix_multiply_accumulate_tensor_float32': False, 'has_subgroup_2d_block_io': False, 'has_bfloat16_conversions': True}
135+
self.device_arch = mod.parse_device_arch(target.arch.get('architecture', 0))
136+
self.properties = self.parse_target(target.arch)
137+
# self.device_arch = "dg2"
138+
# self.properties = {'name': 'Intel(R) Arc(TM) A770 Graphics', 'platform_name': 'Intel(R) oneAPI Unified Runtime over Level-Zero', 'vendor': 'Intel(R) Corporation', 'version': '12.55.8', 'gpu_eu_count': 512, 'gpu_subslice_count': 32, 'max_work_group_size': 1024, 'max_num_sub_groups': 128, 'sub_group_sizes': [8, 16, 32], 'has_fp64': False, 'has_subgroup_matrix_multiply_accumulate': True, 'has_subgroup_matrix_multiply_accumulate_tensor_float32': False, 'has_subgroup_2d_block_io': False, 'has_bfloat16_conversions': True}
140139
print("DEVICE PROPS:")
141140
print(self.device_arch)
142141
print(self.properties)
@@ -331,7 +330,7 @@ def make_llir(src, metadata, options):
331330
if options.extern_libs:
332331
paths = [path for (name, path) in options.extern_libs]
333332
llvm.link_extern_libs(llvm_mod, paths)
334-
intel.optimize_module(llvm_mod, llvm.OPTIMIZE_O3)
333+
intel.optimize_module(llvm_mod, llvm.OPTIMIZE_O0)
335334
intel.post_process_llir(llvm_mod)
336335

337336
# Get some metadata

0 commit comments

Comments
 (0)