Skip to content

Commit d65be16

Browse files
[AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) (#165539)
This PR adds a new combine to the `post-legalizer-combiner` pass. The new combine checks for vectors being unmerged and subsequently padded with `G_IMPLICIT_DEF` values by building a new vector. If such a case is found, the vector being unmerged is instead just concatenated with a `G_IMPLICIT_DEF` that is as wide as the vector being unmerged. This removes unnecessary `mov` instructions in a few places.
1 parent 38811be commit d65be16

File tree

6 files changed

+131
-91
lines changed

6 files changed

+131
-91
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,12 @@ class CombinerHelper {
647647
bool matchRotateOutOfRange(MachineInstr &MI) const;
648648
void applyRotateOutOfRange(MachineInstr &MI) const;
649649

650+
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
651+
Register &UnmergeSrc) const;
652+
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
653+
MachineIRBuilder &B,
654+
Register &UnmergeSrc) const;
655+
650656
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
651657
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
652658

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
921921
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
922922
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;
923923

924+
// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
925+
// => concat_vectors(src, undef)
926+
def combine_build_unmerge : GICombineRule<
927+
(defs root:$root, register_matchinfo:$unmergeSrc),
928+
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
929+
[{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
930+
(apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
931+
>;
932+
924933
def merge_combines: GICombineGroup<[
925934
unmerge_anyext_build_vector,
926935
unmerge_merge,
@@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
930939
unmerge_dead_to_trunc,
931940
unmerge_zext_to_zext,
932941
merge_of_x_and_undef,
933-
merge_of_x_and_zero
942+
merge_of_x_and_zero,
943+
combine_build_unmerge
934944
]>;
935945

936946
// Under certain conditions, transform:

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3463,6 +3463,88 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
34633463
isConstTrueVal(TLI, Cst, IsVector, IsFP);
34643464
}
34653465

3466+
// This pattern aims to match the following shape to avoid extra mov
3467+
// instructions
3468+
// G_BUILD_VECTOR(
3469+
// G_UNMERGE_VALUES(src, 0)
3470+
// G_UNMERGE_VALUES(src, 1)
3471+
// G_IMPLICIT_DEF
3472+
// G_IMPLICIT_DEF
3473+
// )
3474+
// ->
3475+
// G_CONCAT_VECTORS(
3476+
// src,
3477+
// undef
3478+
// )
3479+
bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
3480+
MachineRegisterInfo &MRI,
3481+
Register &UnmergeSrc) const {
3482+
auto &BV = cast<GBuildVector>(MI);
3483+
3484+
unsigned BuildUseCount = BV.getNumSources();
3485+
if (BuildUseCount % 2 != 0)
3486+
return false;
3487+
3488+
unsigned NumUnmerge = BuildUseCount / 2;
3489+
3490+
auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3491+
3492+
// Check the first operand is an unmerge and has the correct number of
3493+
// operands
3494+
if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3495+
return false;
3496+
3497+
UnmergeSrc = Unmerge->getSourceReg();
3498+
3499+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3500+
LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3501+
3502+
// Ensure we only generate legal instructions post-legalizer
3503+
if (!IsPreLegalize &&
3504+
!isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3505+
return false;
3506+
3507+
// Check that all of the operands before the midpoint come from the same
3508+
// unmerge and are in the same order as they are used in the build_vector
3509+
for (unsigned I = 0; I < NumUnmerge; ++I) {
3510+
auto MaybeUnmergeReg = BV.getSourceReg(I);
3511+
auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3512+
3513+
if (!LoopUnmerge || LoopUnmerge != Unmerge)
3514+
return false;
3515+
3516+
if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3517+
return false;
3518+
}
3519+
3520+
// Check that all of the unmerged values are used
3521+
if (Unmerge->getNumDefs() != NumUnmerge)
3522+
return false;
3523+
3524+
// Check that all of the operands after the mid point are undefs.
3525+
for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3526+
auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3527+
3528+
if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3529+
return false;
3530+
}
3531+
3532+
return true;
3533+
}
3534+
3535+
void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
3536+
MachineRegisterInfo &MRI,
3537+
MachineIRBuilder &B,
3538+
Register &UnmergeSrc) const {
3539+
assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3540+
B.setInstrAndDebugLoc(MI);
3541+
3542+
Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3543+
B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3544+
3545+
MI.eraseFromParent();
3546+
}
3547+
34663548
// This combine tries to reduce the number of scalarised G_TRUNC instructions by
34673549
// using vector truncates instead
34683550
//
@@ -8426,4 +8508,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
84268508
}
84278509

84288510
return false;
8429-
}
8511+
}

llvm/test/CodeGen/AArch64/fptrunc.ll

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -345,19 +345,11 @@ entry:
345345
}
346346

347347
define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
348-
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
349-
; CHECK-SD: // %bb.0: // %entry
350-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
351-
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
352-
; CHECK-SD-NEXT: ret
353-
;
354-
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
355-
; CHECK-GI: // %bb.0: // %entry
356-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
357-
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
358-
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
359-
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
360-
; CHECK-GI-NEXT: ret
348+
; CHECK-LABEL: fptrunc_v2f32_v2f16:
349+
; CHECK: // %bb.0: // %entry
350+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
351+
; CHECK-NEXT: fcvtn v0.4h, v0.4s
352+
; CHECK-NEXT: ret
361353
entry:
362354
%c = fptrunc <2 x float> %a to <2 x half>
363355
ret <2 x half> %c

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
57635763
; CHECK-NOFP16-GI: // %bb.0: // %entry
57645764
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
57655765
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5766-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5767-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5768-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5766+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
57695767
; CHECK-NOFP16-GI-NEXT: ret
57705768
;
57715769
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
57725770
; CHECK-FP16-GI: // %bb.0: // %entry
57735771
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
57745772
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5775-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5776-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5777-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5773+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
57785774
; CHECK-FP16-GI-NEXT: ret
57795775
entry:
57805776
%c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
58085804
; CHECK-NOFP16-GI: // %bb.0: // %entry
58095805
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
58105806
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5811-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5812-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5813-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5807+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
58145808
; CHECK-NOFP16-GI-NEXT: ret
58155809
;
58165810
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
58175811
; CHECK-FP16-GI: // %bb.0: // %entry
58185812
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
58195813
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5820-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5821-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5822-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5814+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
58235815
; CHECK-FP16-GI-NEXT: ret
58245816
entry:
58255817
%c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
62326224
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
62336225
; CHECK-NOFP16-GI: // %bb.0: // %entry
62346226
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6235-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6236-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6237-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6227+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62386228
; CHECK-NOFP16-GI-NEXT: ret
62396229
;
62406230
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
62416231
; CHECK-FP16-GI: // %bb.0: // %entry
62426232
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
6243-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6244-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6245-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6233+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62466234
; CHECK-FP16-GI-NEXT: ret
62476235
entry:
62486236
%c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
62676255
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
62686256
; CHECK-NOFP16-GI: // %bb.0: // %entry
62696257
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6270-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6271-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6272-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6258+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62736259
; CHECK-NOFP16-GI-NEXT: ret
62746260
;
62756261
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
62766262
; CHECK-FP16-GI: // %bb.0: // %entry
62776263
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
6278-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6279-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6280-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6264+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62816265
; CHECK-FP16-GI-NEXT: ret
62826266
entry:
62836267
%c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
64806464
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
64816465
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
64826466
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6483-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6484-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6485-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6467+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
64866468
; CHECK-NOFP16-GI-NEXT: ret
64876469
entry:
64886470
%c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
65096491
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
65106492
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
65116493
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6512-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6513-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6514-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6494+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
65156495
; CHECK-NOFP16-GI-NEXT: ret
65166496
entry:
65176497
%c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
67666746
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
67676747
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
67686748
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6769-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6770-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6771-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6749+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
67726750
; CHECK-NOFP16-GI-NEXT: ret
67736751
;
67746752
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
68176795
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
68186796
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
68196797
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6820-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6821-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6822-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6798+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
68236799
; CHECK-NOFP16-GI-NEXT: ret
68246800
;
68256801
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:

llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
762762
}
763763

764764
define void @undef_hi2_v4i16(<2 x i16> %arg0) {
765-
; GFX8-SDAG-LABEL: undef_hi2_v4i16:
766-
; GFX8-SDAG: ; %bb.0:
767-
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768-
; GFX8-SDAG-NEXT: ;;#ASMSTART
769-
; GFX8-SDAG-NEXT: ; use v[0:1]
770-
; GFX8-SDAG-NEXT: ;;#ASMEND
771-
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
772-
;
773-
; GFX8-GISEL-LABEL: undef_hi2_v4i16:
774-
; GFX8-GISEL: ; %bb.0:
775-
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776-
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
777-
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
778-
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
779-
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
780-
; GFX8-GISEL-NEXT: ;;#ASMSTART
781-
; GFX8-GISEL-NEXT: ; use v[0:1]
782-
; GFX8-GISEL-NEXT: ;;#ASMEND
783-
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
765+
; GFX8-LABEL: undef_hi2_v4i16:
766+
; GFX8: ; %bb.0:
767+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768+
; GFX8-NEXT: ;;#ASMSTART
769+
; GFX8-NEXT: ; use v[0:1]
770+
; GFX8-NEXT: ;;#ASMEND
771+
; GFX8-NEXT: s_setpc_b64 s[30:31]
784772
;
785773
; GFX9-LABEL: undef_hi2_v4i16:
786774
; GFX9: ; %bb.0:
@@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
803791
}
804792

805793
define void @undef_hi2_v4f16(<2 x half> %arg0) {
806-
; GFX8-SDAG-LABEL: undef_hi2_v4f16:
807-
; GFX8-SDAG: ; %bb.0:
808-
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809-
; GFX8-SDAG-NEXT: ;;#ASMSTART
810-
; GFX8-SDAG-NEXT: ; use v[0:1]
811-
; GFX8-SDAG-NEXT: ;;#ASMEND
812-
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
813-
;
814-
; GFX8-GISEL-LABEL: undef_hi2_v4f16:
815-
; GFX8-GISEL: ; %bb.0:
816-
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817-
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
818-
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
819-
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
820-
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
821-
; GFX8-GISEL-NEXT: ;;#ASMSTART
822-
; GFX8-GISEL-NEXT: ; use v[0:1]
823-
; GFX8-GISEL-NEXT: ;;#ASMEND
824-
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
794+
; GFX8-LABEL: undef_hi2_v4f16:
795+
; GFX8: ; %bb.0:
796+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797+
; GFX8-NEXT: ;;#ASMSTART
798+
; GFX8-NEXT: ; use v[0:1]
799+
; GFX8-NEXT: ;;#ASMEND
800+
; GFX8-NEXT: s_setpc_b64 s[30:31]
825801
;
826802
; GFX9-LABEL: undef_hi2_v4f16:
827803
; GFX9: ; %bb.0:
@@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
842818
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
843819
ret void
844820
}
845-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
846-
; GFX8: {{.*}}

0 commit comments

Comments
 (0)