22
33module attributes {ttig.min_sg_size = 16 : i32 , ttig.support_bf16_conversion , ttig.support_dpas , ttig.support_sg_2d_block , ttig.target_arch = " spir64" , " ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 1 : i32 , ttg.shared = 0 : i32 , ttg.target = " xpu" , " ttg.threads-per-warp" = 32 : i32 } {
44 // CHECK-LABEL: llvm.mlir.global external @global_smem() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
5- // CHECK-LABEL: llvm.func spir_kernelcc @kernel(%arg0: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>)
5+ // CHECK-LABEL: llvm.func spir_kernelcc @kernel(%arg0: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>, %arg4: !llvm.ptr<1> )
66 tt.func public @kernel (%arg0: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
77 %0 = tt.load %arg0 : !tt.ptr <f32 >
88 %1 = tt.load %arg1 : !tt.ptr <f32 >
99 // CHECK: [[LOAD0:%.*]] = llvm.extractelement {{.*}}[{{.*}}]
1010 // CHECK: [[LOAD1:%.*]] = llvm.extractelement {{.*}}[{{.*}}]
1111 // CHECK: [[POISON:%.*]] = llvm.mlir.poison : !llvm.ptr<3>
12- // CHECK: llvm.call spir_funccc @noinline_simple_fn__fp32_fp32_Pfp32__([[LOAD0]], [[LOAD1]], %arg2, [[POISON]], %arg3)
12+ // CHECK: llvm.call spir_funccc @noinline_simple_fn__fp32_fp32_Pfp32__([[LOAD0]], [[LOAD1]], %arg2, [[POISON]], %arg3, %arg4 )
1313 tt.call @noinline_simple_fn__fp32_fp32_Pfp32__ (%0 , %1 , %arg2 ) : (f32 , f32 , !tt.ptr <f32 >) -> ()
1414 tt.return
1515 }
16- // CHECK: llvm.func internal spir_funccc @noinline_simple_fn__fp32_fp32_Pfp32__(%arg0: f32, %arg1: f32, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<3>, %arg4: !llvm.ptr<1>)
16+ // CHECK: llvm.func internal spir_funccc @noinline_simple_fn__fp32_fp32_Pfp32__(%arg0: f32, %arg1: f32, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<3>, %arg4: !llvm.ptr<1>, %arg5: !llvm.ptr<1> )
1717 tt.func private @noinline_simple_fn__fp32_fp32_Pfp32__ (%arg0: f32 {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 1 : i64 }, %arg1: f32 {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 1 : i64 }, %arg2: !tt.ptr <f32 > {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 16 : i64 }) attributes {noinline = true } {
1818 %0 = arith.addf %arg0 , %arg1 fastmath <fast > : f32
1919 tt.store %arg2 , %0 : !tt.ptr <f32 >
@@ -29,17 +29,17 @@ module attributes {ttig.min_sg_size = 16 : i32, ttig.support_bf16_conversion, tt
2929#smem = #ttg.shared_memory
3030module attributes {ttig.min_sg_size = 16 : i32 , ttig.support_bf16_conversion , ttig.support_dpas , ttig.support_sg_2d_block , ttig.target_arch = " spir64" , " ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 1 : i32 , ttg.shared = 1280 : i32 , ttg.target = " xpu" , " ttg.threads-per-warp" = 16 : i32 } {
3131 // CHECK-LABEL: llvm.mlir.global external @global_smem() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
32- // CHECK-LABEL: llvm.func spir_kernelcc @kernel(%arg0: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>, %arg4: !llvm.ptr<3>)
32+ // CHECK-LABEL: llvm.func spir_kernelcc @kernel(%arg0: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>, %arg4: !llvm.ptr<1>, %arg5: !llvm.ptr< 3>)
3333 tt.func public @kernel (%arg0: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }, %arg1: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }, %arg2: !tt.ptr <f32 > {tt.divisibility = 16 : i32 }) attributes {noinline = false } {
3434 %0 = tt.load %arg0 : !tt.ptr <f32 >
3535 %1 = tt.load %arg1 : !tt.ptr <f32 >
3636 // CHECK: [[LOAD0:%.*]] = llvm.extractelement {{.*}}[{{.*}}]
3737 // CHECK: [[LOAD1:%.*]] = llvm.extractelement {{.*}}[{{.*}}]
38- // CHECK: llvm.call spir_funccc @noinline_shared_fn__fp32_fp32_Pfp32__([[LOAD0]], [[LOAD1]], %arg2, %arg4 , %arg3)
38+ // CHECK: llvm.call spir_funccc @noinline_shared_fn__fp32_fp32_Pfp32__([[LOAD0]], [[LOAD1]], %arg2, %arg5 , %arg3, %arg4 )
3939 tt.call @noinline_shared_fn__fp32_fp32_Pfp32__ (%0 , %1 , %arg2 ) {allocation.offset = 0 : i32 } : (f32 , f32 , !tt.ptr <f32 >) -> ()
4040 tt.return
4141 }
42- // CHECK: llvm.func internal spir_funccc @noinline_shared_fn__fp32_fp32_Pfp32__(%arg0: f32, %arg1: f32, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<3>, %arg4: !llvm.ptr<1>)
42+ // CHECK: llvm.func internal spir_funccc @noinline_shared_fn__fp32_fp32_Pfp32__(%arg0: f32, %arg1: f32, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<3>, %arg4: !llvm.ptr<1>, %arg5: !llvm.ptr<1> )
4343 // CHECK: llvm.getelementptr %arg3[{{.*}}]
4444 tt.func private @noinline_shared_fn__fp32_fp32_Pfp32__ (%arg0: f32 {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 1 : i64 }, %arg1: f32 {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 1 : i64 }, %arg2: !tt.ptr <f32 > {tt.constancy = 1 : i64 , tt.contiguity = 1 : i64 , tt.divisibility = 16 : i64 }) attributes {noinline = true } {
4545 %cst = arith.constant dense <16 > : tensor <16 x1 xi32 , #blocked >
0 commit comments