Skip to content

Commit 4027444

Browse files
committed
[GlobalISel][AMDGPU] Address comments
Change-Id: Ia82e785f936dae63180b62a297b5cd2a1d1b8bf3
1 parent ca16db8 commit 4027444

File tree

4 files changed

+128
-125
lines changed

4 files changed

+128
-125
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,6 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
119119
};
120120
}
121121

122-
static LegalizeMutation breakCurrentEltsToSize32Or64(unsigned TypeIdx) {
123-
return [=](const LegalityQuery &Query) {
124-
const LLT Ty = Query.Types[TypeIdx];
125-
const LLT EltTy = Ty.getElementType();
126-
const int Size = Ty.getSizeInBits();
127-
const int EltSize = EltTy.getSizeInBits();
128-
const unsigned TargetEltSize = EltSize % 64 == 0 ? 64 : 32;
129-
const unsigned NewNumElts = (Size + (TargetEltSize - 1)) / TargetEltSize;
130-
return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, TargetEltSize));
131-
};
132-
}
133-
134122
// Increase the number of vector elements to reach the next multiple of 32-bit
135123
// type.
136124
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
@@ -890,8 +878,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
890878
.fewerElementsIf(
891879
all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)),
892880
fewerEltsToSize64Vector(0))
893-
.bitcastIf(all(vectorWiderThan(0, 64), scalarOrEltWiderThan(0, 64)),
894-
breakCurrentEltsToSize32Or64(0))
895881
.widenScalarToNextPow2(0)
896882
.scalarize(0);
897883

llvm/test/CodeGen/AMDGPU/GlobalISel/and.v2i128.ll

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
44
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
55
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
77

88
define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
99
; GFX7-LABEL: v_and_v2i128:
@@ -57,6 +57,19 @@ define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
5757
; GFX10-NEXT: v_and_b32_e32 v6, v6, v14
5858
; GFX10-NEXT: v_and_b32_e32 v7, v7, v15
5959
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
;
61+
; GFX11-LABEL: v_and_v2i128:
62+
; GFX11: ; %bb.0:
63+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX11-NEXT: v_and_b32_e32 v0, v0, v8
65+
; GFX11-NEXT: v_and_b32_e32 v1, v1, v9
66+
; GFX11-NEXT: v_and_b32_e32 v2, v2, v10
67+
; GFX11-NEXT: v_and_b32_e32 v3, v3, v11
68+
; GFX11-NEXT: v_and_b32_e32 v4, v4, v12
69+
; GFX11-NEXT: v_and_b32_e32 v5, v5, v13
70+
; GFX11-NEXT: v_and_b32_e32 v6, v6, v14
71+
; GFX11-NEXT: v_and_b32_e32 v7, v7, v15
72+
; GFX11-NEXT: s_setpc_b64 s[30:31]
6073
%and = and <2 x i128> %a, %b
6174
ret <2 x i128> %and
6275
}
@@ -65,53 +78,63 @@ define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
6578
; GFX7-LABEL: v_and_v2i128_inline_imm:
6679
; GFX7: ; %bb.0:
6780
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68-
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69-
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70-
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71-
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72-
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
73-
; GFX7-NEXT: v_and_b32_e32 v1, s5, v1
74-
; GFX7-NEXT: v_and_b32_e32 v2, s6, v2
75-
; GFX7-NEXT: v_and_b32_e32 v3, s7, v3
76-
; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
77-
; GFX7-NEXT: v_and_b32_e32 v5, s5, v5
78-
; GFX7-NEXT: v_and_b32_e32 v6, s6, v6
79-
; GFX7-NEXT: v_and_b32_e32 v7, s7, v7
81+
; GFX7-NEXT: v_and_b32_e32 v0, 64, v0
82+
; GFX7-NEXT: v_and_b32_e32 v4, 64, v4
83+
; GFX7-NEXT: v_mov_b32_e32 v1, 0
84+
; GFX7-NEXT: v_mov_b32_e32 v2, 0
85+
; GFX7-NEXT: v_mov_b32_e32 v3, 0
86+
; GFX7-NEXT: v_mov_b32_e32 v5, 0
87+
; GFX7-NEXT: v_mov_b32_e32 v6, 0
88+
; GFX7-NEXT: v_mov_b32_e32 v7, 0
8089
; GFX7-NEXT: s_setpc_b64 s[30:31]
8190
;
8291
; GFX9-LABEL: v_and_v2i128_inline_imm:
8392
; GFX9: ; %bb.0:
8493
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85-
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86-
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87-
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88-
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89-
; GFX9-NEXT: v_and_b32_e32 v0, s4, v0
90-
; GFX9-NEXT: v_and_b32_e32 v1, s5, v1
91-
; GFX9-NEXT: v_and_b32_e32 v2, s6, v2
92-
; GFX9-NEXT: v_and_b32_e32 v3, s7, v3
93-
; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
94-
; GFX9-NEXT: v_and_b32_e32 v5, s5, v5
95-
; GFX9-NEXT: v_and_b32_e32 v6, s6, v6
96-
; GFX9-NEXT: v_and_b32_e32 v7, s7, v7
94+
; GFX9-NEXT: v_and_b32_e32 v0, 64, v0
95+
; GFX9-NEXT: v_and_b32_e32 v4, 64, v4
96+
; GFX9-NEXT: v_mov_b32_e32 v1, 0
97+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
98+
; GFX9-NEXT: v_mov_b32_e32 v3, 0
99+
; GFX9-NEXT: v_mov_b32_e32 v5, 0
100+
; GFX9-NEXT: v_mov_b32_e32 v6, 0
101+
; GFX9-NEXT: v_mov_b32_e32 v7, 0
97102
; GFX9-NEXT: s_setpc_b64 s[30:31]
98103
;
99104
; GFX8-LABEL: v_and_v2i128_inline_imm:
100105
; GFX8: ; %bb.0:
101106
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102-
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103-
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104-
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105-
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106-
; GFX8-NEXT: v_and_b32_e32 v0, s4, v0
107-
; GFX8-NEXT: v_and_b32_e32 v1, s5, v1
108-
; GFX8-NEXT: v_and_b32_e32 v2, s6, v2
109-
; GFX8-NEXT: v_and_b32_e32 v3, s7, v3
110-
; GFX8-NEXT: v_and_b32_e32 v4, s4, v4
111-
; GFX8-NEXT: v_and_b32_e32 v5, s5, v5
112-
; GFX8-NEXT: v_and_b32_e32 v6, s6, v6
113-
; GFX8-NEXT: v_and_b32_e32 v7, s7, v7
107+
; GFX8-NEXT: v_and_b32_e32 v0, 64, v0
108+
; GFX8-NEXT: v_and_b32_e32 v4, 64, v4
109+
; GFX8-NEXT: v_mov_b32_e32 v1, 0
110+
; GFX8-NEXT: v_mov_b32_e32 v2, 0
111+
; GFX8-NEXT: v_mov_b32_e32 v3, 0
112+
; GFX8-NEXT: v_mov_b32_e32 v5, 0
113+
; GFX8-NEXT: v_mov_b32_e32 v6, 0
114+
; GFX8-NEXT: v_mov_b32_e32 v7, 0
114115
; GFX8-NEXT: s_setpc_b64 s[30:31]
116+
;
117+
; GFX10-LABEL: v_and_v2i128_inline_imm:
118+
; GFX10: ; %bb.0:
119+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; GFX10-NEXT: v_and_b32_e32 v0, 64, v0
121+
; GFX10-NEXT: v_and_b32_e32 v4, 64, v4
122+
; GFX10-NEXT: v_mov_b32_e32 v1, 0
123+
; GFX10-NEXT: v_mov_b32_e32 v2, 0
124+
; GFX10-NEXT: v_mov_b32_e32 v3, 0
125+
; GFX10-NEXT: v_mov_b32_e32 v5, 0
126+
; GFX10-NEXT: v_mov_b32_e32 v6, 0
127+
; GFX10-NEXT: v_mov_b32_e32 v7, 0
128+
; GFX10-NEXT: s_setpc_b64 s[30:31]
129+
;
130+
; GFX11-LABEL: v_and_v2i128_inline_imm:
131+
; GFX11: ; %bb.0:
132+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133+
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
134+
; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
135+
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
136+
; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
137+
; GFX11-NEXT: s_setpc_b64 s[30:31]
115138
%and = and <2 x i128> %a, <i128 64, i128 64>
116139
ret <2 x i128> %and
117140
}

llvm/test/CodeGen/AMDGPU/GlobalISel/or.v2i128.ll

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
44
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
55
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
77

88
define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) {
99
; GFX7-LABEL: v_or_v2i128:
@@ -57,6 +57,19 @@ define <2 x i128> @v_or_v2i128(<2 x i128> %a, <2 x i128> %b) {
5757
; GFX10-NEXT: v_or_b32_e32 v6, v6, v14
5858
; GFX10-NEXT: v_or_b32_e32 v7, v7, v15
5959
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
;
61+
; GFX11-LABEL: v_or_v2i128:
62+
; GFX11: ; %bb.0:
63+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX11-NEXT: v_or_b32_e32 v0, v0, v8
65+
; GFX11-NEXT: v_or_b32_e32 v1, v1, v9
66+
; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
67+
; GFX11-NEXT: v_or_b32_e32 v3, v3, v11
68+
; GFX11-NEXT: v_or_b32_e32 v4, v4, v12
69+
; GFX11-NEXT: v_or_b32_e32 v5, v5, v13
70+
; GFX11-NEXT: v_or_b32_e32 v6, v6, v14
71+
; GFX11-NEXT: v_or_b32_e32 v7, v7, v15
72+
; GFX11-NEXT: s_setpc_b64 s[30:31]
6073
%or = or <2 x i128> %a, %b
6174
ret <2 x i128> %or
6275
}
@@ -65,53 +78,37 @@ define <2 x i128> @v_or_v2i128_inline_imm(<2 x i128> %a) {
6578
; GFX7-LABEL: v_or_v2i128_inline_imm:
6679
; GFX7: ; %bb.0:
6780
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68-
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69-
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70-
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71-
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72-
; GFX7-NEXT: v_or_b32_e32 v0, s4, v0
73-
; GFX7-NEXT: v_or_b32_e32 v1, s5, v1
74-
; GFX7-NEXT: v_or_b32_e32 v2, s6, v2
75-
; GFX7-NEXT: v_or_b32_e32 v3, s7, v3
76-
; GFX7-NEXT: v_or_b32_e32 v4, s4, v4
77-
; GFX7-NEXT: v_or_b32_e32 v5, s5, v5
78-
; GFX7-NEXT: v_or_b32_e32 v6, s6, v6
79-
; GFX7-NEXT: v_or_b32_e32 v7, s7, v7
81+
; GFX7-NEXT: v_or_b32_e32 v0, 64, v0
82+
; GFX7-NEXT: v_or_b32_e32 v4, 64, v4
8083
; GFX7-NEXT: s_setpc_b64 s[30:31]
8184
;
8285
; GFX9-LABEL: v_or_v2i128_inline_imm:
8386
; GFX9: ; %bb.0:
8487
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85-
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86-
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87-
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88-
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89-
; GFX9-NEXT: v_or_b32_e32 v0, s4, v0
90-
; GFX9-NEXT: v_or_b32_e32 v1, s5, v1
91-
; GFX9-NEXT: v_or_b32_e32 v2, s6, v2
92-
; GFX9-NEXT: v_or_b32_e32 v3, s7, v3
93-
; GFX9-NEXT: v_or_b32_e32 v4, s4, v4
94-
; GFX9-NEXT: v_or_b32_e32 v5, s5, v5
95-
; GFX9-NEXT: v_or_b32_e32 v6, s6, v6
96-
; GFX9-NEXT: v_or_b32_e32 v7, s7, v7
88+
; GFX9-NEXT: v_or_b32_e32 v0, 64, v0
89+
; GFX9-NEXT: v_or_b32_e32 v4, 64, v4
9790
; GFX9-NEXT: s_setpc_b64 s[30:31]
9891
;
9992
; GFX8-LABEL: v_or_v2i128_inline_imm:
10093
; GFX8: ; %bb.0:
10194
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102-
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103-
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104-
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105-
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106-
; GFX8-NEXT: v_or_b32_e32 v0, s4, v0
107-
; GFX8-NEXT: v_or_b32_e32 v1, s5, v1
108-
; GFX8-NEXT: v_or_b32_e32 v2, s6, v2
109-
; GFX8-NEXT: v_or_b32_e32 v3, s7, v3
110-
; GFX8-NEXT: v_or_b32_e32 v4, s4, v4
111-
; GFX8-NEXT: v_or_b32_e32 v5, s5, v5
112-
; GFX8-NEXT: v_or_b32_e32 v6, s6, v6
113-
; GFX8-NEXT: v_or_b32_e32 v7, s7, v7
95+
; GFX8-NEXT: v_or_b32_e32 v0, 64, v0
96+
; GFX8-NEXT: v_or_b32_e32 v4, 64, v4
11497
; GFX8-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX10-LABEL: v_or_v2i128_inline_imm:
100+
; GFX10: ; %bb.0:
101+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX10-NEXT: v_or_b32_e32 v0, 64, v0
103+
; GFX10-NEXT: v_or_b32_e32 v4, 64, v4
104+
; GFX10-NEXT: s_setpc_b64 s[30:31]
105+
;
106+
; GFX11-LABEL: v_or_v2i128_inline_imm:
107+
; GFX11: ; %bb.0:
108+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109+
; GFX11-NEXT: v_or_b32_e32 v0, 64, v0
110+
; GFX11-NEXT: v_or_b32_e32 v4, 64, v4
111+
; GFX11-NEXT: s_setpc_b64 s[30:31]
115112
%or = or <2 x i128> %a, <i128 64, i128 64>
116113
ret <2 x i128> %or
117114
}

llvm/test/CodeGen/AMDGPU/GlobalISel/xor.v2i128.ll

Lines changed: 34 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
44
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
55
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -global-isel=true -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
77

88
define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) {
99
; GFX7-LABEL: v_xor_v2i128:
@@ -57,6 +57,19 @@ define <2 x i128> @v_xor_v2i128(<2 x i128> %a, <2 x i128> %b) {
5757
; GFX10-NEXT: v_xor_b32_e32 v6, v6, v14
5858
; GFX10-NEXT: v_xor_b32_e32 v7, v7, v15
5959
; GFX10-NEXT: s_setpc_b64 s[30:31]
60+
;
61+
; GFX11-LABEL: v_xor_v2i128:
62+
; GFX11: ; %bb.0:
63+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX11-NEXT: v_xor_b32_e32 v0, v0, v8
65+
; GFX11-NEXT: v_xor_b32_e32 v1, v1, v9
66+
; GFX11-NEXT: v_xor_b32_e32 v2, v2, v10
67+
; GFX11-NEXT: v_xor_b32_e32 v3, v3, v11
68+
; GFX11-NEXT: v_xor_b32_e32 v4, v4, v12
69+
; GFX11-NEXT: v_xor_b32_e32 v5, v5, v13
70+
; GFX11-NEXT: v_xor_b32_e32 v6, v6, v14
71+
; GFX11-NEXT: v_xor_b32_e32 v7, v7, v15
72+
; GFX11-NEXT: s_setpc_b64 s[30:31]
6073
%xor = xor <2 x i128> %a, %b
6174
ret <2 x i128> %xor
6275
}
@@ -65,53 +78,37 @@ define <2 x i128> @v_xor_v2i128_inline_imm(<2 x i128> %a) {
6578
; GFX7-LABEL: v_xor_v2i128_inline_imm:
6679
; GFX7: ; %bb.0:
6780
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68-
; GFX7-NEXT: s_mov_b64 s[4:5], 64
69-
; GFX7-NEXT: s_mov_b64 s[6:7], 0
70-
; GFX7-NEXT: s_mov_b64 s[4:5], s[4:5]
71-
; GFX7-NEXT: s_mov_b64 s[6:7], s[6:7]
72-
; GFX7-NEXT: v_xor_b32_e32 v0, s4, v0
73-
; GFX7-NEXT: v_xor_b32_e32 v1, s5, v1
74-
; GFX7-NEXT: v_xor_b32_e32 v2, s6, v2
75-
; GFX7-NEXT: v_xor_b32_e32 v3, s7, v3
76-
; GFX7-NEXT: v_xor_b32_e32 v4, s4, v4
77-
; GFX7-NEXT: v_xor_b32_e32 v5, s5, v5
78-
; GFX7-NEXT: v_xor_b32_e32 v6, s6, v6
79-
; GFX7-NEXT: v_xor_b32_e32 v7, s7, v7
81+
; GFX7-NEXT: v_xor_b32_e32 v0, 64, v0
82+
; GFX7-NEXT: v_xor_b32_e32 v4, 64, v4
8083
; GFX7-NEXT: s_setpc_b64 s[30:31]
8184
;
8285
; GFX9-LABEL: v_xor_v2i128_inline_imm:
8386
; GFX9: ; %bb.0:
8487
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85-
; GFX9-NEXT: s_mov_b64 s[4:5], 64
86-
; GFX9-NEXT: s_mov_b64 s[6:7], 0
87-
; GFX9-NEXT: s_mov_b64 s[4:5], s[4:5]
88-
; GFX9-NEXT: s_mov_b64 s[6:7], s[6:7]
89-
; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0
90-
; GFX9-NEXT: v_xor_b32_e32 v1, s5, v1
91-
; GFX9-NEXT: v_xor_b32_e32 v2, s6, v2
92-
; GFX9-NEXT: v_xor_b32_e32 v3, s7, v3
93-
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v4
94-
; GFX9-NEXT: v_xor_b32_e32 v5, s5, v5
95-
; GFX9-NEXT: v_xor_b32_e32 v6, s6, v6
96-
; GFX9-NEXT: v_xor_b32_e32 v7, s7, v7
88+
; GFX9-NEXT: v_xor_b32_e32 v0, 64, v0
89+
; GFX9-NEXT: v_xor_b32_e32 v4, 64, v4
9790
; GFX9-NEXT: s_setpc_b64 s[30:31]
9891
;
9992
; GFX8-LABEL: v_xor_v2i128_inline_imm:
10093
; GFX8: ; %bb.0:
10194
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102-
; GFX8-NEXT: s_mov_b64 s[4:5], 64
103-
; GFX8-NEXT: s_mov_b64 s[6:7], 0
104-
; GFX8-NEXT: s_mov_b64 s[4:5], s[4:5]
105-
; GFX8-NEXT: s_mov_b64 s[6:7], s[6:7]
106-
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0
107-
; GFX8-NEXT: v_xor_b32_e32 v1, s5, v1
108-
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
109-
; GFX8-NEXT: v_xor_b32_e32 v3, s7, v3
110-
; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4
111-
; GFX8-NEXT: v_xor_b32_e32 v5, s5, v5
112-
; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
113-
; GFX8-NEXT: v_xor_b32_e32 v7, s7, v7
95+
; GFX8-NEXT: v_xor_b32_e32 v0, 64, v0
96+
; GFX8-NEXT: v_xor_b32_e32 v4, 64, v4
11497
; GFX8-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; GFX10-LABEL: v_xor_v2i128_inline_imm:
100+
; GFX10: ; %bb.0:
101+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX10-NEXT: v_xor_b32_e32 v0, 64, v0
103+
; GFX10-NEXT: v_xor_b32_e32 v4, 64, v4
104+
; GFX10-NEXT: s_setpc_b64 s[30:31]
105+
;
106+
; GFX11-LABEL: v_xor_v2i128_inline_imm:
107+
; GFX11: ; %bb.0:
108+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109+
; GFX11-NEXT: v_xor_b32_e32 v0, 64, v0
110+
; GFX11-NEXT: v_xor_b32_e32 v4, 64, v4
111+
; GFX11-NEXT: s_setpc_b64 s[30:31]
115112
%xor = xor <2 x i128> %a, <i128 64, i128 64>
116113
ret <2 x i128> %xor
117114
}

0 commit comments

Comments
 (0)