|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2025 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | + |
| 9 | +; REQUIRES: regkeys |
| 10 | +; RUN: igc_opt --typed-pointers %s -S -o - -igc-clone-address-arithmetic --regkey=RematFlowThreshold=20 --regkey=RematRPELimit=0 --dce | FileCheck %s |
| 11 | + |
| 12 | +; Function Attrs: convergent nounwind null_pointer_is_valid |
| 13 | +define spir_kernel void @widget(half addrspace(1)* align 2 %arg, half addrspace(1)* align 2 %arg1, half addrspace(1)* align 2 %arg2, float addrspace(1)* nocapture align 4 %arg3, i8 addrspace(1)* nocapture readonly align 1 %arg4, i32 addrspace(1)* nocapture readonly align 4 %arg5, i8 addrspace(1)* nocapture readonly align 1 %arg6, i32 addrspace(1)* nocapture readonly align 4 %arg7, half addrspace(1)* align 2 %arg8, <8 x i32> %arg9, i16 %arg10) #0 { |
| 14 | +bb: |
| 15 | + %tmp = extractelement <8 x i32> %arg9, i64 1 |
| 16 | + %tmp11 = extractelement <8 x i32> %arg9, i64 6 |
| 17 | + %tmp12 = icmp slt i32 %tmp11, 0 |
| 18 | + %tmp14 = add i32 %tmp11, 127 |
| 19 | + %spec.select = select i1 %tmp12, i32 %tmp14, i32 %tmp11 |
| 20 | + %tmp17 = ashr i32 %spec.select, 7 |
| 21 | + %tmp18 = shl i32 %tmp17, 7 |
| 22 | + %tmp19 = sub i32 %tmp11, %tmp18 |
| 23 | + %tmp20 = mul i32 %tmp17, 25165824 |
| 24 | + %tmp21 = mul nsw i32 %tmp19, 196608 |
| 25 | + %tmp22 = add i32 %tmp20, %tmp21 |
| 26 | + %tmp23 = shl nsw i32 %tmp19, 17 |
| 27 | + %tmp24 = sext i32 %tmp22 to i64 |
| 28 | + %tmp25 = ptrtoint half addrspace(1)* %arg to i64 |
| 29 | + %tmp26 = shl nsw i64 %tmp24, 1 |
| 30 | + %tmp27 = add i64 %tmp26, %tmp25 |
| 31 | + %tmp28 = sext i32 %tmp21 to i64 |
| 32 | + %tmp29 = ptrtoint half addrspace(1)* %arg1 to i64 |
| 33 | + %tmp30 = shl nsw i64 %tmp28, 1 |
| 34 | + %tmp31 = add i64 %tmp30, %tmp29 |
| 35 | + %tmp32 = sext i32 %tmp23 to i64 |
| 36 | + %tmp33 = ptrtoint half addrspace(1)* %arg2 to i64 |
| 37 | + %tmp34 = shl nsw i64 %tmp32, 1 |
| 38 | + %tmp35 = add i64 %tmp34, %tmp33 |
| 39 | + %tmp36 = shl i32 %tmp, 7 |
| 40 | + %tmp37 = zext i16 %arg10 to i32 |
| 41 | + %tmp38 = and i32 %tmp37, 112 |
| 42 | + %tmp39 = or i32 %tmp38, %tmp36 |
| 43 | + %tmp40 = or i32 %tmp39, 1 |
| 44 | + %tmp41 = or i32 %tmp39, 2 |
| 45 | + %tmp42 = or i32 %tmp39, 4 |
| 46 | + %tmp43 = or i32 %tmp39, 5 |
| 47 | + %tmp44 = or i32 %tmp39, 6 |
| 48 | + %tmp45 = or i32 %tmp39, 7 |
| 49 | + %tmp48 = and i64 %tmp27, -64 |
| 50 | + call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp48, i32 0, i32 1023, i32 383, i32 0, i32 %tmp36, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4) |
| 51 | + %tmp51 = and i64 %tmp31, -64 |
| 52 | + call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp51, i32 0, i32 1023, i32 383, i32 0, i32 0, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4) |
| 53 | + call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp51, i32 0, i32 1023, i32 383, i32 0, i32 32, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4) |
| 54 | + %tmp54 = and i64 %tmp35, -64 |
| 55 | + call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %tmp54, i32 255, i32 1023, i32 255, i32 0, i32 0, i32 16, i32 32, i32 32, i32 1, i1 false, i1 false, i32 4) |
| 56 | + br label %bb57 |
| 57 | + |
| 58 | +bb57: ; preds = %bb57, %bb |
| 59 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false) |
| 60 | + %tmp58 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 61 | + %tmp59 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 62 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false) |
| 63 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false) |
| 64 | + %tmp60 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 65 | + %tmp61 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 66 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false) |
| 67 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 6, i32 0, i1 false) |
| 68 | + %tmp62 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 69 | + %tmp63 = call <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32* null, i32 0, i32 0, i32 32, i32 8, i32 16, i32 1, i1 true, i1 false, i32 0) |
| 70 | + call void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32* null, i32 5, i32 0, i1 false) |
| 71 | + %tmp84 = icmp slt i32 %tmp40, 0 |
| 72 | + %tmp85 = icmp slt i32 %tmp41, 0 |
| 73 | + %tmp87 = icmp slt i32 %tmp42, 0 |
| 74 | + %tmp88 = icmp slt i32 %tmp43, 0 |
| 75 | + %tmp89 = icmp slt i32 %tmp44, 0 |
| 76 | + %tmp90 = icmp slt i32 %tmp45, 0 |
| 77 | + |
| 78 | +; CHECK: [[REMAT_1:%remat.*]] = or i32 %tmp39, 1 |
| 79 | +; CHECK: [[CLONED_1:%cloned_.*]] = icmp slt i32 [[REMAT_1]] |
| 80 | +; CHECK: = select i1 [[CLONED_1]] |
| 81 | +; CHECK: [[REMAT_2:%remat.*]] = or i32 %tmp39, 2 |
| 82 | +; CHECK: [[CLONED_2:%cloned_.*]] = icmp slt i32 [[REMAT_2]] |
| 83 | +; CHECK: = select i1 [[CLONED_2]] |
| 84 | + |
| 85 | + %tmp95 = select i1 %tmp84, float 0xFFF0000000000000, float 0.000000e+00 |
| 86 | + %tmp96 = select i1 %tmp85, float 0xFFF0000000000000, float 0.000000e+00 |
| 87 | + %tmp98 = select i1 %tmp87, float 0xFFF0000000000000, float 0.000000e+00 |
| 88 | + %tmp99 = select i1 %tmp88, float 0xFFF0000000000000, float 0.000000e+00 |
| 89 | + %tmp100 = select i1 %tmp89, float 0xFFF0000000000000, float 0.000000e+00 |
| 90 | + %tmp101 = select i1 %tmp90, float 0xFFF0000000000000, float 0.000000e+00 |
| 91 | + %tmp103 = insertelement <8 x float> <float 0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, float %tmp95, i64 1 |
| 92 | + %tmp104 = insertelement <8 x float> %tmp103, float %tmp96, i64 2 |
| 93 | + %tmp105 = insertelement <8 x float> %tmp104, float 0.000000e+00, i64 3 |
| 94 | + %tmp106 = insertelement <8 x float> %tmp105, float %tmp98, i64 4 |
| 95 | + %tmp107 = insertelement <8 x float> %tmp106, float %tmp99, i64 5 |
| 96 | + %tmp108 = insertelement <8 x float> %tmp107, float %tmp100, i64 6 |
| 97 | + %tmp109 = insertelement <8 x float> %tmp108, float %tmp101, i64 7 |
| 98 | + %tmp110 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> zeroinitializer, <8 x float> %tmp109) |
| 99 | + %tmp111 = extractelement <8 x float> %tmp110, i64 0 |
| 100 | + %tmp118 = call float @llvm.genx.GenISA.WaveAll.f32(float %tmp111, i8 12, i32 0) |
| 101 | + br label %bb57 |
| 102 | +} |
| 103 | + |
| 104 | +; Function Attrs: convergent inaccessiblememonly nounwind |
| 105 | +declare float @llvm.genx.GenISA.WaveAll.f32(float, i8, i32) #1 |
| 106 | + |
| 107 | +; Function Attrs: convergent nounwind readnone willreturn |
| 108 | +declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #2 |
| 109 | + |
| 110 | +; Function Attrs: nounwind |
| 111 | +declare void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32) #3 |
| 112 | + |
| 113 | +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn |
| 114 | +declare i32 @llvm.umin.i32(i32, i32) #4 |
| 115 | + |
| 116 | +; Function Attrs: nounwind readnone speculatable willreturn |
| 117 | +declare i32* @llvm.genx.GenISA.LSC2DBlockCreateAddrPayload.p0i32(i64, i32, i32, i32, i32, i32, i32, i32, i32) #5 |
| 118 | + |
| 119 | +; Function Attrs: argmemonly nounwind speculatable willreturn writeonly |
| 120 | +declare void @llvm.genx.GenISA.LSC2DBlockSetAddrPayloadField.p0i32.i32(i32*, i32, i32, i1) #6 |
| 121 | + |
| 122 | +; Function Attrs: nounwind willreturn |
| 123 | +declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v8i32.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7 |
| 124 | + |
| 125 | +; Function Attrs: nounwind willreturn |
| 126 | +declare <32 x i32> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v32i32.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7 |
| 127 | + |
| 128 | +; Function Attrs: nounwind willreturn |
| 129 | +declare <32 x i16> @llvm.genx.GenISA.LSC2DBlockReadAddrPayload.v32i16.p0i32(i32*, i32, i32, i32, i32, i32, i32, i1, i1, i32) #7 |
| 130 | + |
| 131 | +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn |
| 132 | +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #4 |
| 133 | + |
| 134 | +attributes #0 = { convergent nounwind null_pointer_is_valid } |
| 135 | +attributes #1 = { convergent inaccessiblememonly nounwind } |
| 136 | +attributes #2 = { convergent nounwind readnone willreturn } |
| 137 | +attributes #3 = { nounwind } |
| 138 | +attributes #4 = { nocallback nofree nosync nounwind readnone speculatable willreturn } |
| 139 | +attributes #5 = { nounwind readnone speculatable willreturn } |
| 140 | +attributes #6 = { argmemonly nounwind speculatable willreturn writeonly } |
| 141 | +attributes #7 = { nounwind willreturn } |
| 142 | + |
| 143 | +!igc.functions = !{} |
0 commit comments