Skip to content

Commit 8267484

Browse files
Added smed3, umed3 amdgcn.& -> asm lits; Added rocdl -> llvm lowering lit
Signed-off-by: keshavvinayak01 <[email protected]>
1 parent 6cb12ec commit 8267484

File tree

5 files changed

+180
-0
lines changed

5 files changed

+180
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
2+
3+
; GCN-LABEL: {{^}}test_smed3_i16:
4+
; GCN: v_med3_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
5+
define amdgpu_kernel void @test_smed3_i16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
6+
%src0.i16 = trunc i32 %src0.arg to i16
7+
%src1.i16 = trunc i32 %src1.arg to i16
8+
%src2.i16 = trunc i32 %src2.arg to i16
9+
%med3 = call i16 @llvm.amdgcn.smed3.i16(i16 %src0.i16, i16 %src1.i16, i16 %src2.i16)
10+
store i16 %med3, ptr addrspace(1) %out
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}test_smed3_zero_i16:
15+
; GCN: v_med3_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 0
16+
define amdgpu_kernel void @test_smed3_zero_i16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg) #1 {
17+
%src0.i16 = trunc i32 %src0.arg to i16
18+
%src1.i16 = trunc i32 %src1.arg to i16
19+
%med3 = call i16 @llvm.amdgcn.smed3.i16(i16 %src0.i16, i16 %src1.i16, i16 0)
20+
store i16 %med3, ptr addrspace(1) %out
21+
ret void
22+
}
23+
24+
declare i16 @llvm.amdgcn.smed3.i16(i16, i16, i16) #0
25+
26+
attributes #0 = { nounwind readnone }
27+
attributes #1 = { nounwind }
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN %s
3+
4+
; GCN-LABEL: {{^}}test_smed3:
5+
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
6+
define amdgpu_kernel void @test_smed3(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 {
7+
%med3 = call i32 @llvm.amdgcn.smed3.i32(i32 %src0, i32 %src1, i32 %src2)
8+
store i32 %med3, ptr addrspace(1) %out
9+
ret void
10+
}
11+
12+
; GCN-LABEL: {{^}}test_smed3_multi_use:
13+
; GCN: v_med3_i32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
14+
; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MED3]], s{{[0-9]+}}
15+
define amdgpu_kernel void @test_smed3_multi_use(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2, i32 %mul.arg) #1 {
16+
%med3 = call i32 @llvm.amdgcn.smed3.i32(i32 %src0, i32 %src1, i32 %src2)
17+
%med3.user = mul i32 %med3, %mul.arg
18+
store volatile i32 %med3.user, ptr addrspace(1) %out
19+
store volatile i32 %med3, ptr addrspace(1) %out
20+
ret void
21+
}
22+
23+
; GCN-LABEL: {{^}}test_smed3_constants:
24+
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 42
25+
define amdgpu_kernel void @test_smed3_constants(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
26+
%med3 = call i32 @llvm.amdgcn.smed3.i32(i32 %src0, i32 %src1, i32 42)
27+
store i32 %med3, ptr addrspace(1) %out
28+
ret void
29+
}
30+
31+
; GCN-LABEL: {{^}}test_smed3_zero:
32+
; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 0
33+
define amdgpu_kernel void @test_smed3_zero(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
34+
%med3 = call i32 @llvm.amdgcn.smed3.i32(i32 %src0, i32 %src1, i32 0)
35+
store i32 %med3, ptr addrspace(1) %out
36+
ret void
37+
}
38+
39+
declare i32 @llvm.amdgcn.smed3.i32(i32, i32, i32) #0
40+
41+
attributes #0 = { nounwind readnone }
42+
attributes #1 = { nounwind }
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
2+
3+
; GCN-LABEL: {{^}}test_umed3_i16:
4+
; GCN: v_med3_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
5+
define amdgpu_kernel void @test_umed3_i16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
6+
%src0.i16 = trunc i32 %src0.arg to i16
7+
%src1.i16 = trunc i32 %src1.arg to i16
8+
%src2.i16 = trunc i32 %src2.arg to i16
9+
%med3 = call i16 @llvm.amdgcn.umed3.i16(i16 %src0.i16, i16 %src1.i16, i16 %src2.i16)
10+
store i16 %med3, ptr addrspace(1) %out
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}test_umed3_zero_i16:
15+
; GCN: v_med3_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 0
16+
define amdgpu_kernel void @test_umed3_zero_i16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg) #1 {
17+
%src0.i16 = trunc i32 %src0.arg to i16
18+
%src1.i16 = trunc i32 %src1.arg to i16
19+
%med3 = call i16 @llvm.amdgcn.umed3.i16(i16 %src0.i16, i16 %src1.i16, i16 0)
20+
store i16 %med3, ptr addrspace(1) %out
21+
ret void
22+
}
23+
24+
declare i16 @llvm.amdgcn.umed3.i16(i16, i16, i16) #0
25+
26+
attributes #0 = { nounwind readnone }
27+
attributes #1 = { nounwind }
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN %s
3+
4+
; GCN-LABEL: {{^}}test_umed3:
5+
; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
6+
define amdgpu_kernel void @test_umed3(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 {
7+
%med3 = call i32 @llvm.amdgcn.umed3.i32(i32 %src0, i32 %src1, i32 %src2)
8+
store i32 %med3, ptr addrspace(1) %out
9+
ret void
10+
}
11+
12+
; GCN-LABEL: {{^}}test_umed3_multi_use:
13+
; GCN: v_med3_u32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
14+
; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MED3]], s{{[0-9]+}}
15+
define amdgpu_kernel void @test_umed3_multi_use(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2, i32 %mul.arg) #1 {
16+
%med3 = call i32 @llvm.amdgcn.umed3.i32(i32 %src0, i32 %src1, i32 %src2)
17+
%med3.user = mul i32 %med3, %mul.arg
18+
store volatile i32 %med3.user, ptr addrspace(1) %out
19+
store volatile i32 %med3, ptr addrspace(1) %out
20+
ret void
21+
}
22+
23+
; GCN-LABEL: {{^}}test_umed3_constants:
24+
; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 42
25+
define amdgpu_kernel void @test_umed3_constants(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
26+
%med3 = call i32 @llvm.amdgcn.umed3.i32(i32 %src0, i32 %src1, i32 42)
27+
store i32 %med3, ptr addrspace(1) %out
28+
ret void
29+
}
30+
31+
; GCN-LABEL: {{^}}test_umed3_zero:
32+
; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 0
33+
define amdgpu_kernel void @test_umed3_zero(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
34+
%med3 = call i32 @llvm.amdgcn.umed3.i32(i32 %src0, i32 %src1, i32 0)
35+
store i32 %med3, ptr addrspace(1) %out
36+
ret void
37+
}
38+
39+
declare i32 @llvm.amdgcn.umed3.i32(i32, i32, i32) #0
40+
41+
attributes #0 = { nounwind readnone }
42+
attributes #1 = { nounwind }

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,48 @@ llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
12981298
llvm.return %ret : i32
12991299
}
13001300

1301+
llvm.func @test_med3_f16(%arg0: f16, %arg1: f16, %arg2: f16) -> f16 {
1302+
// CHECK-LABEL: define half @test_med3_f16(half %0, half %1, half %2)
1303+
%0 = rocdl.med3.f16 %arg0, %arg1, %arg2 : (f16, f16, f16) -> f16
1304+
llvm.return %0 : f16
1305+
// CHECK: call half @llvm.amdgcn.fmed3.f16(half %0, half %1, half %2)
1306+
}
1307+
1308+
llvm.func @test_med3_f32(%arg0: f32, %arg1: f32, %arg2: f32) -> f32 {
1309+
// CHECK-LABEL: define float @test_med3_f32(float %0, float %1, float %2)
1310+
%0 = rocdl.med3.f32 %arg0, %arg1, %arg2 : (f32, f32, f32) -> f32
1311+
llvm.return %0 : f32
1312+
// CHECK: call float @llvm.amdgcn.fmed3.f32(float %0, float %1, float %2)
1313+
}
1314+
1315+
llvm.func @test_med3_i16(%arg0: i16, %arg1: i16, %arg2: i16) -> i16 {
1316+
// CHECK-LABEL: define i16 @test_med3_i16(i16 %0, i16 %1, i16 %2)
1317+
%0 = rocdl.med3.i16 %arg0, %arg1, %arg2 : (i16, i16, i16) -> i16
1318+
llvm.return %0 : i16
1319+
// CHECK: call i16 @llvm.amdgcn.smed3.i16(i16 %0, i16 %1, i16 %2)
1320+
}
1321+
1322+
llvm.func @test_med3_i32(%arg0: i32, %arg1: i32, %arg2: i32) -> i32 {
1323+
// CHECK-LABEL: define i32 @test_med3_i32(i32 %0, i32 %1, i32 %2)
1324+
%0 = rocdl.med3.i32 %arg0, %arg1, %arg2 : (i32, i32, i32) -> i32
1325+
llvm.return %0 : i32
1326+
// CHECK: call i32 @llvm.amdgcn.smed3.i32(i32 %0, i32 %1, i32 %2)
1327+
}
1328+
1329+
llvm.func @test_med3_u16(%arg0: i16, %arg1: i16, %arg2: i16) -> i16 {
1330+
// CHECK-LABEL: define i16 @test_med3_u16(i16 %0, i16 %1, i16 %2)
1331+
%0 = rocdl.med3.u16 %arg0, %arg1, %arg2 : (i16, i16, i16) -> i16
1332+
llvm.return %0 : i16
1333+
// CHECK: call i16 @llvm.amdgcn.umed3.i16(i16 %0, i16 %1, i16 %2)
1334+
}
1335+
1336+
llvm.func @test_med3_u32(%arg0: i32, %arg1: i32, %arg2: i32) -> i32 {
1337+
// CHECK-LABEL: define i32 @test_med3_u32(i32 %0, i32 %1, i32 %2)
1338+
%0 = rocdl.med3.u32 %arg0, %arg1, %arg2 : (i32, i32, i32) -> i32
1339+
llvm.return %0 : i32
1340+
// CHECK: call i32 @llvm.amdgcn.umed3.i32(i32 %0, i32 %1, i32 %2)
1341+
}
1342+
13011343
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
13021344
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
13031345
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"

0 commit comments

Comments
 (0)