Skip to content

Commit 216a740

Browse files
Merge branch 'main' into instsimplify_maximumnum
2 parents fd1d020 + 4769e52 commit 216a740

File tree

15 files changed

+735
-343
lines changed

15 files changed

+735
-343
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2935,6 +2935,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
29352935
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
29362936
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
29372937
widenScalarDst(MI, WideTy);
2938+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
29382939
widenScalarDst(MI, WideTy, 1);
29392940
Observer.changedInstr(MI);
29402941
return Legalized;
@@ -2972,6 +2973,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
29722973
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
29732974
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
29742975
widenScalarDst(MI, WideTy);
2976+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
29752977
widenScalarDst(MI, WideTy, 1);
29762978
Observer.changedInstr(MI);
29772979
return Legalized;
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s
3+
4+
---
5+
name: test_sdivrem_s16
6+
body: |
7+
bb.0:
8+
liveins: $vgpr0, $vgpr1
9+
10+
; GFX8-LABEL: name: test_sdivrem_s16
11+
; GFX8: liveins: $vgpr0, $vgpr1
12+
; GFX8-NEXT: {{ $}}
13+
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
14+
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
15+
; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
16+
; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
17+
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
18+
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
19+
; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
20+
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
21+
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
22+
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
23+
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
24+
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
25+
; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
26+
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
27+
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
28+
; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
29+
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
30+
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
31+
; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
32+
; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
33+
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
34+
; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
35+
; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
36+
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
37+
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
38+
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
39+
; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
40+
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
41+
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
42+
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
43+
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
44+
; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
45+
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
46+
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[XOR1]]
47+
; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT1]]
48+
; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
49+
; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
50+
; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
51+
; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
52+
; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[ASHR]]
53+
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB4]](s32)
54+
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB5]](s32)
55+
; GFX8-NEXT: S_NOP 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16)
56+
; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32)
57+
; GFX8-NEXT: $vgpr0 = COPY [[SUB5]](s32)
58+
%0:_(s32) = COPY $vgpr0
59+
%1:_(s32) = COPY $vgpr1
60+
%2:_(s16) = G_TRUNC %0
61+
%3:_(s16) = G_TRUNC %1
62+
%4:_(s16), %5:_(s16) = G_SDIVREM %2, %3
63+
S_NOP 0, implicit %4, implicit %5
64+
%6:_(s32) = G_ANYEXT %4
65+
%7:_(s32) = G_ANYEXT %5
66+
$vgpr0 = COPY %6
67+
$vgpr0 = COPY %7
68+
69+
...
70+
71+
---
72+
name: test_udivrem_s16
73+
body: |
74+
bb.0:
75+
liveins: $vgpr0, $vgpr1
76+
77+
; GFX8-LABEL: name: test_udivrem_s16
78+
; GFX8: liveins: $vgpr0, $vgpr1
79+
; GFX8-NEXT: {{ $}}
80+
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
81+
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
82+
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
83+
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
84+
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
85+
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
86+
; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
87+
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
88+
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
89+
; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
90+
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
91+
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
92+
; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
93+
; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
94+
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
95+
; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
96+
; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
97+
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
98+
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
99+
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
100+
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
101+
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
102+
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
103+
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
104+
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
105+
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
106+
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
107+
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[AND1]]
108+
; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT1]]
109+
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32)
110+
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32)
111+
; GFX8-NEXT: S_NOP 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16)
112+
; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
113+
; GFX8-NEXT: $vgpr0 = COPY [[SELECT3]](s32)
114+
%0:_(s32) = COPY $vgpr0
115+
%1:_(s32) = COPY $vgpr1
116+
%2:_(s16) = G_TRUNC %0
117+
%3:_(s16) = G_TRUNC %1
118+
%4:_(s16), %5:_(s16) = G_UDIVREM %2, %3
119+
S_NOP 0, implicit %4, implicit %5
120+
%6:_(s32) = G_ANYEXT %4
121+
%7:_(s32) = G_ANYEXT %5
122+
$vgpr0 = COPY %6
123+
$vgpr0 = COPY %7
124+
125+
...

llvm/test/MC/AMDGPU/wave_any.s

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6
12
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
23

34
v_cmp_ge_i32_e32 s0, v0
4-
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
5+
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
56

67
v_cmp_ge_i32_e32 vcc_lo, s0, v1
7-
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
8+
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
89

910
v_cmp_ge_i32_e32 vcc, s0, v2
10-
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
11+
// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
1112

1213
v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
1314
// GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
@@ -16,10 +17,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
1617
// GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
1718

1819
v_cmp_class_f32_e32 vcc_lo, s0, v0
19-
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
20+
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
2021

2122
v_cmp_class_f32_e32 vcc, s0, v0
22-
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
23+
// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
2324

2425
v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
2526
// GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
@@ -34,13 +35,13 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
3435
// GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
3536

3637
v_cndmask_b32_e32 v1, v2, v3,
37-
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
38+
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
3839

3940
v_cndmask_b32_e32 v1, v2, v3, vcc_lo
40-
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
41+
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
4142

4243
v_cndmask_b32_e32 v1, v2, v3, vcc
43-
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
44+
// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
4445

4546
v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
4647
// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
@@ -127,61 +128,61 @@ v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 ban
127128
// GFX10: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
128129

129130
v_add_co_u32 v0, s0, v0, v2
130-
// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
131+
// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
131132

132133
v_add_co_u32_e64 v0, s0, v0, v2
133-
// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
134+
// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
134135

135136
v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
136-
// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
137+
// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
137138

138139
v_sub_co_u32 v0, s0, v0, v2
139-
// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
140+
// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
140141

141142
v_sub_co_u32_e64 v0, s0, v0, v2
142-
// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
143+
// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
143144

144145
v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
145-
// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
146+
// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
146147

147148
v_subrev_co_u32 v0, s0, v0, v2
148-
// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
149+
// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
149150

150151
v_subrev_co_u32_e64 v0, s0, v0, v2
151-
// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
152+
// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
152153

153154
v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
154155
// GFX10: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
155156

156157
v_add_co_u32 v0, s[0:1], v0, v2
157-
// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
158+
// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
158159

159160
v_add_co_u32 v0, exec, v0, v2
160-
// GFX10: v_add_co_u32 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
161+
// GFX10: v_add_co_u32 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
161162

162163
v_add_co_u32 v0, exec_lo, v0, v2
163-
// GFX10: v_add_co_u32 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
164+
// GFX10: v_add_co_u32 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
164165

165166
v_add_co_u32_e64 v0, s[0:1], v0, v2
166-
// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
167+
// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
167168

168169
v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
169170
// GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
170171

171172
v_sub_co_u32 v0, s[0:1], v0, v2
172-
// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
173+
// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
173174

174175
v_sub_co_u32_e64 v0, s[0:1], v0, v2
175-
// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
176+
// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
176177

177178
v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
178179
// GFX10: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
179180

180181
v_subrev_co_u32 v0, s[0:1], v0, v2
181-
// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
182+
// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
182183

183184
v_subrev_co_u32_e64 v0, s[0:1], v0, v2
184-
// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
185+
// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
185186

186187
v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
187188
// GFX10: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
@@ -199,10 +200,10 @@ v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
199200
// GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
200201

201202
v_div_scale_f32 v2, s2, v0, v0, v2
202-
// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
203+
// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
203204

204205
v_div_scale_f32 v2, s[2:3], v0, v0, v2
205-
// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
206+
// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
206207

207208
v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
208209
// GFX10: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
@@ -223,7 +224,7 @@ v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
223224
// GFX10: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
224225

225226
v_cmpx_neq_f32_e32 v0, v1
226-
// GFX10: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
227+
// GFX10: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
227228

228229
v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
229230
// GFX10: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
@@ -232,7 +233,7 @@ v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
232233
// GFX10: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
233234

234235
v_cmpx_class_f32_e64 v0, 1
235-
// GFX10: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
236+
// GFX10: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
236237

237238
v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
238239
// GFX10: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]

0 commit comments

Comments
 (0)