Skip to content

Commit e1ee164

Browse files
author
Thorsten Schütt
committed
remove and combines
1 parent 6bf97b2 commit e1ee164

File tree

12 files changed

+51
-120
lines changed

12 files changed

+51
-120
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,6 @@ bool CombinerHelper::matchCombineZextTrunc(const MachineInstr &ZextMI,
378378

379379
unsigned DstSize = DstTy.getScalarSizeInBits();
380380
unsigned MidSize = MRI.getType(Mid).getScalarSizeInBits();
381-
unsigned SrcSize = SrcTy.getScalarSizeInBits();
382381

383382
// Are the truncated bits known to be zero?
384383
if (DstTy == SrcTy &&
@@ -387,66 +386,5 @@ bool CombinerHelper::matchCombineZextTrunc(const MachineInstr &ZextMI,
387386
return true;
388387
}
389388

390-
// If the sizes are just right we can convert this into a logical
391-
// 'and', which will be much cheaper than the pair of casts.
392-
393-
// If we're actually extending zero bits, then if
394-
// SrcSize < DstSize: zext(Src & mask)
395-
// SrcSize == DstSize: Src & mask
396-
// SrcSize > DstSize: trunc(Src) & mask
397-
398-
if (DstSize == SrcSize) {
399-
// Src & mask.
400-
401-
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
402-
!isConstantLegalOrBeforeLegalizer(DstTy))
403-
return false;
404-
405-
// build mask.
406-
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
407-
408-
MatchInfo = [=](MachineIRBuilder &B) {
409-
auto Mask = B.buildConstant(DstTy, AndValue);
410-
B.buildAnd(Dst, Src, Mask);
411-
};
412-
return true;
413-
}
414-
415-
// if (SrcSize < DstSize) {
416-
// // zext(Src & mask).
417-
//
418-
// if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {SrcTy}}) ||
419-
// !isConstantLegalOrBeforeLegalizer(SrcTy) ||
420-
// !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
421-
// return false;
422-
//
423-
// APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
424-
//
425-
// MatchInfo = [=](MachineIRBuilder &B) {
426-
// auto Mask = B.buildConstant(SrcTy, AndValue);
427-
// auto And = B.buildAnd(SrcTy, Src, Mask);
428-
// B.buildZExt(Dst, And);
429-
// };
430-
// return true;
431-
// }
432-
433-
// if (SrcSize > DstSize) {
434-
// // trunc(Src) & mask.
435-
//
436-
// if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
437-
// !isConstantLegalOrBeforeLegalizer(DstTy) ||
438-
// !isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}}))
439-
// return false;
440-
//
441-
// APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
442-
//
443-
// MatchInfo = [=](MachineIRBuilder &B) {
444-
// auto Mask = B.buildConstant(DstTy, AndValue);
445-
// auto Trunc = B.buildTrunc(DstTy, Src);
446-
// B.buildAnd(Dst, Trunc, Mask);
447-
// };
448-
// return true;
449-
// }
450-
451389
return false;
452390
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ body: |
4949
; CHECK: liveins: $x0, $x1
5050
; CHECK-NEXT: {{ $}}
5151
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
52-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
53-
; CHECK-NEXT: %zext:_(s64) = G_AND %arg1, [[C]]
52+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
53+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
5454
; CHECK-NEXT: $x0 = COPY %zext(s64)
5555
; CHECK-NEXT: RET_ReallyLR implicit $x0
5656
%arg1:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ body: |
1010
; CHECK: liveins: $w0, $w1
1111
; CHECK-NEXT: {{ $}}
1212
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
13-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
14-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
15-
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
13+
; CHECK-NEXT: $x1 = COPY [[COPY]](s64)
1614
%0:_(s64) = COPY $x0
1715
%2:_(s32) = nuw G_TRUNC %0
1816
%3:_(s64) = G_ZEXT %2
@@ -27,9 +25,9 @@ body: |
2725
; CHECK: liveins: $w0, $w1
2826
; CHECK-NEXT: {{ $}}
2927
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
30-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
31-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
32-
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
28+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nsw G_TRUNC [[COPY]](s64)
29+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
30+
; CHECK-NEXT: $x1 = COPY [[ZEXT]](s64)
3331
%0:_(s64) = COPY $x0
3432
%2:_(s32) = nsw G_TRUNC %0
3533
%3:_(s64) = G_ZEXT %2
@@ -44,9 +42,9 @@ body: |
4442
; CHECK: liveins: $w0, $w1
4543
; CHECK-NEXT: {{ $}}
4644
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
47-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
48-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
49-
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
45+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
46+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
47+
; CHECK-NEXT: $x1 = COPY [[ZEXT]](s64)
5048
%0:_(s64) = COPY $x0
5149
%2:_(s32) = G_TRUNC %0
5250
%3:_(s64) = G_ZEXT %2
@@ -302,10 +300,7 @@ body: |
302300
; CHECK-NEXT: {{ $}}
303301
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
304302
; CHECK-NEXT: %sv0:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
305-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
306-
; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
307-
; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_AND %sv0, [[SPLAT_VECTOR]]
308-
; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
303+
; CHECK-NEXT: $z0 = COPY %sv0(<vscale x 2 x s64>)
309304
%0:_(s64) = COPY $x0
310305
%1:_(s64) = COPY $x1
311306
%sv0:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %0:_(s64)

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ body: |
165165
; CHECK-NEXT: {{ $}}
166166
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
167167
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
168-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
169-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXTLOAD]], [[C]]
170-
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
168+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s32)
169+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
170+
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
171171
; CHECK-NEXT: $w1 = COPY [[SEXTLOAD]](s32)
172172
%0:_(p0) = COPY $x0
173173
%1:_(s8) = G_LOAD %0 :: (load (s8))

llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) {
453453
; GFX7-NEXT: s_lshl_b32 s2, s1, 8
454454
; GFX7-NEXT: s_bfe_u32 s1, s1, 0x80008
455455
; GFX7-NEXT: s_or_b32 s1, s1, s2
456-
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
457-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
456+
; GFX7-NEXT: s_and_b32 s1, 0xffff, s1
457+
; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
458458
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
459459
; GFX7-NEXT: s_or_b32 s0, s0, s1
460460
; GFX7-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -413,19 +413,19 @@ body: |
413413
; GFX6: liveins: $vgpr0
414414
; GFX6-NEXT: {{ $}}
415415
; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0
416+
; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
416417
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
417-
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
418-
; GFX6-NEXT: %extend:_(s32) = G_AND %argument, [[C]]
418+
; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
419419
; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
420420
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
421421
;
422422
; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
423423
; GFX9: liveins: $vgpr0
424424
; GFX9-NEXT: {{ $}}
425425
; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0
426+
; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
426427
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
427-
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
428-
; GFX9-NEXT: %extend:_(s32) = G_AND %argument, [[C]]
428+
; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
429429
; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
430430
; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
431431
%argument:_(s32) = COPY $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,19 +285,19 @@ body: |
285285
; GFX6: liveins: $vgpr0
286286
; GFX6-NEXT: {{ $}}
287287
; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0
288+
; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
288289
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
289-
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
290-
; GFX6-NEXT: %extend:_(s32) = G_AND %argument, [[C]]
290+
; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
291291
; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
292292
; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
293293
;
294294
; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
295295
; GFX9: liveins: $vgpr0
296296
; GFX9-NEXT: {{ $}}
297297
; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0
298+
; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
298299
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
299-
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
300-
; GFX9-NEXT: %extend:_(s32) = G_AND %argument, [[C]]
300+
; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
301301
; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
302302
; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
303303
%argument:_(s32) = COPY $vgpr0

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ body: |
3636
; GCN: liveins: $vgpr0
3737
; GCN-NEXT: {{ $}}
3838
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
39-
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
40-
; GCN-NEXT: %zext:_(s32) = G_AND %var, [[C]]
39+
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
40+
; GCN-NEXT: %zext:_(s32) = G_ZEXT [[TRUNC]](s16)
4141
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
4242
%var:_(s32) = COPY $vgpr0
4343
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
@@ -136,9 +136,8 @@ body: |
136136
; GCN-NEXT: %c7FFF:_(s32) = G_CONSTANT i32 32767
137137
; GCN-NEXT: %c:_(<2 x s32>) = G_BUILD_VECTOR %cFFFFF(s32), %c7FFF(s32)
138138
; GCN-NEXT: %low_bits:_(<2 x s32>) = G_AND %var, %c
139-
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
140-
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
141-
; GCN-NEXT: %zext:_(<2 x s32>) = G_AND %low_bits, [[BUILD_VECTOR]]
139+
; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
140+
; GCN-NEXT: %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
142141
; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(<2 x s32>)
143142
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
144143
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
@@ -234,8 +233,8 @@ body: |
234233
; GCN: liveins: $vgpr0
235234
; GCN-NEXT: {{ $}}
236235
; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1
237-
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
238-
; GCN-NEXT: %zext:_(s64) = G_AND %var, [[C]]
236+
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %var(s64)
237+
; GCN-NEXT: %zext:_(s64) = G_ZEXT %trunc(s16)
239238
; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(s64)
240239
%var:_(s64) = COPY $vgpr0_vgpr1
241240
%trunc:_(s16) = G_TRUNC %var(s64)

llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3893,8 +3893,8 @@ define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <
38933893
; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
38943894
; GFX6-NEXT: s_lshr_b32 s2, s2, s3
38953895
; GFX6-NEXT: s_or_b32 s1, s1, s2
3896-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
3897-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
3896+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3897+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
38983898
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
38993899
; GFX6-NEXT: s_or_b32 s0, s0, s1
39003900
; GFX6-NEXT: ; return to shader part epilog
@@ -4498,12 +4498,12 @@ define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
44984498
; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
44994499
; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
45004500
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4501-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
4501+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
45024502
; GFX6-NEXT: s_or_b32 s2, s2, s3
4503-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
4503+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
45044504
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
45054505
; GFX6-NEXT: s_or_b32 s0, s0, s1
4506-
; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
4506+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
45074507
; GFX6-NEXT: ; return to shader part epilog
45084508
;
45094509
; GFX8-LABEL: s_fshl_v3i16:
@@ -4856,13 +4856,13 @@ define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %
48564856
; GFX6-NEXT: s_bfe_u32 s4, s7, 0xf0001
48574857
; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
48584858
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4859-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
4859+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
48604860
; GFX6-NEXT: s_or_b32 s3, s3, s4
4861-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
4861+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
48624862
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
48634863
; GFX6-NEXT: s_or_b32 s0, s0, s1
4864-
; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
4865-
; GFX6-NEXT: s_and_b32 s2, s3, 0xffff
4864+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4865+
; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
48664866
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
48674867
; GFX6-NEXT: s_or_b32 s1, s1, s2
48684868
; GFX6-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3614,8 +3614,8 @@ define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <
36143614
; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
36153615
; GFX6-NEXT: s_lshr_b32 s2, s2, s3
36163616
; GFX6-NEXT: s_or_b32 s1, s1, s2
3617-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
3618-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
3617+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3618+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
36193619
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
36203620
; GFX6-NEXT: s_or_b32 s0, s0, s1
36213621
; GFX6-NEXT: ; return to shader part epilog
@@ -4342,12 +4342,12 @@ define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
43424342
; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
43434343
; GFX6-NEXT: s_lshl_b32 s2, s2, s5
43444344
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4345-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
4345+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
43464346
; GFX6-NEXT: s_or_b32 s2, s2, s3
4347-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
4347+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
43484348
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
43494349
; GFX6-NEXT: s_or_b32 s0, s0, s1
4350-
; GFX6-NEXT: s_and_b32 s1, s2, 0xffff
4350+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
43514351
; GFX6-NEXT: ; return to shader part epilog
43524352
;
43534353
; GFX8-LABEL: s_fshr_v3i16:
@@ -4752,8 +4752,8 @@ define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %
47524752
; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
47534753
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
47544754
; GFX6-NEXT: s_or_b32 s1, s1, s4
4755-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
4756-
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
4755+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4756+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
47574757
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
47584758
; GFX6-NEXT: s_or_b32 s0, s0, s1
47594759
; GFX6-NEXT: s_lshl_b32 s1, s2, 1
@@ -4784,8 +4784,8 @@ define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %
47844784
; GFX6-NEXT: s_and_b32 s4, 0xffff, s5
47854785
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
47864786
; GFX6-NEXT: s_or_b32 s2, s2, s3
4787-
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
4788-
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
4787+
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4788+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
47894789
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
47904790
; GFX6-NEXT: s_or_b32 s1, s1, s2
47914791
; GFX6-NEXT: ; return to shader part epilog

0 commit comments

Comments
 (0)