Skip to content

Commit 0943e98

Browse files
update test
1 parent ec5c6be commit 0943e98

File tree

2 files changed

+74
-87
lines changed

2 files changed

+74
-87
lines changed

llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll

Lines changed: 0 additions & 87 deletions
This file was deleted.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s -check-prefixes=GFX11
3+
4+
define amdgpu_ps i32 @uniform_v_to_s_i32(float inreg %a, float inreg %b) {
5+
; GFX11-LABEL: uniform_v_to_s_i32:
6+
; GFX11: ; %bb.0:
7+
; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
8+
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
9+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
10+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
11+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
12+
; GFX11-NEXT: ; return to shader part epilog
13+
%max0 = call float @llvm.maximum.f32(float %a, float %b)
14+
%cast = bitcast float %max0 to i32
15+
ret i32 %cast
16+
}
17+
18+
define amdgpu_ps i64 @uniform_v_to_s_i64(double inreg %a, double inreg %b) {
19+
; GFX11-LABEL: uniform_v_to_s_i64:
20+
; GFX11: ; %bb.0:
21+
; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
22+
; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
23+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
24+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
25+
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
26+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
27+
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
28+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
29+
; GFX11-NEXT: ; return to shader part epilog
30+
%max0 = call double @llvm.maximum.f64(double %a, double %b)
31+
%cast = bitcast double %max0 to i64
32+
ret i64 %cast
33+
}
34+
35+
define amdgpu_ps <2 x i32> @uniform_v_to_s_2_i32(<2 x float> inreg %a, <2 x float> inreg %b) {
36+
; GFX11-LABEL: uniform_v_to_s_2_i32:
37+
; GFX11: ; %bb.0:
38+
; GFX11-NEXT: v_max_f32_e64 v0, s0, s2
39+
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2
40+
; GFX11-NEXT: v_max_f32_e64 v1, s1, s3
41+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
42+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
43+
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3
44+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
45+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
46+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v1, vcc_lo
47+
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
48+
; GFX11-NEXT: ; return to shader part epilog
49+
%max0 = call <2 x float> @llvm.maximum.f32(<2 x float> %a, <2 x float> %b)
50+
%cast = bitcast <2 x float> %max0 to <2 x i32>
51+
ret <2 x i32> %cast
52+
}
53+
54+
define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
55+
; GFX11-LABEL: uniform_v_to_s_ptr:
56+
; GFX11: ; %bb.0:
57+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
58+
; GFX11-NEXT: s_mov_b32 s1, 0
59+
; GFX11-NEXT: flat_load_b32 v0, v[0:1]
60+
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
61+
; GFX11-NEXT: v_max_f32_e32 v1, 1.0, v0
62+
; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0
63+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
64+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v1, vcc_lo
65+
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
66+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
67+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
68+
; GFX11-NEXT: ; return to shader part epilog
69+
%val = load float, ptr %x, align 4
70+
%max = call float @llvm.maximum.f32(float %val, float 1.0)
71+
%int = fptoui float %max to i32
72+
%ptr = inttoptr i32 %int to ptr
73+
ret ptr %ptr
74+
}

0 commit comments

Comments
 (0)