Skip to content

Commit 1565efb

Browse files
author
Thorsten Schütt
committed
[GlobalISel] Combine G_MERGE_VALUES of x and undef
into zext x ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s32), [[DEF]](s32) Please continue padding merge values. // %bits_8_15:_(s8) = G_IMPLICIT_DEF // %0:_(s16) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8) %bits_8_15 is defined by undef. Its value is undefined and we can pick an arbitrary value. For optimization, we pick zero. // %0:_(s16) = G_ZEXT %bits_0_7:(s8) The upper bits of %0 are zero and the lower bits come from %bits_0_7.
1 parent f427004 commit 1565efb

File tree

11 files changed

+222
-117
lines changed

11 files changed

+222
-117
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,9 @@ class CombinerHelper {
925925
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
926926
BuildFnTy &MatchInfo);
927927

928+
// merge_values(_, undef) -> zext
929+
bool matchMergeXAndUndef(const MachineInstr &MI, BuildFnTy &MatchInfo);
930+
928931
private:
929932
/// Checks for legality of an indexed variant of \p LdSt.
930933
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -856,14 +856,23 @@ def unmerge_zext_to_zext : GICombineRule<
856856
(apply [{ Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
857857
>;
858858

859+
/// Transform merge_x_undef -> zext.
860+
def merge_of_x_and_undef : GICombineRule <
861+
(defs root:$root, build_fn_matchinfo:$matchinfo),
862+
(match (G_IMPLICIT_DEF $undef),
863+
(G_MERGE_VALUES $root, $x, $undef):$MI,
864+
[{ return Helper.matchMergeXAndUndef(*${MI}, ${matchinfo}); }]),
865+
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;
866+
859867
def merge_combines: GICombineGroup<[
860868
unmerge_anyext_build_vector,
861869
unmerge_merge,
862870
merge_unmerge,
863871
unmerge_cst,
864872
unmerge_undef,
865873
unmerge_dead_to_trunc,
866-
unmerge_zext_to_zext
874+
unmerge_zext_to_zext,
875+
merge_of_x_and_undef
867876
]>;
868877

869878
// Under certain conditions, transform:

llvm/lib/CodeGen/GlobalISel/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
66
GlobalISel.cpp
77
Combiner.cpp
88
CombinerHelper.cpp
9+
CombinerHelperArtifacts.cpp
910
CombinerHelperCasts.cpp
1011
CombinerHelperCompares.cpp
1112
CombinerHelperVectorOps.cpp
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===- CombinerHelperArtifacts.cpp-----------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements CombinerHelper for legalization artifacts.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// G_MERGE_VALUES
14+
//
15+
//===----------------------------------------------------------------------===//
16+
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
17+
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
18+
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19+
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
20+
#include "llvm/CodeGen/GlobalISel/Utils.h"
21+
#include "llvm/CodeGen/LowLevelTypeUtils.h"
22+
#include "llvm/CodeGen/MachineOperand.h"
23+
#include "llvm/CodeGen/MachineRegisterInfo.h"
24+
#include "llvm/CodeGen/TargetOpcodes.h"
25+
#include "llvm/Support/Casting.h"
26+
27+
#define DEBUG_TYPE "gi-combiner"
28+
29+
using namespace llvm;
30+
31+
bool CombinerHelper::matchMergeXAndUndef(const MachineInstr &MI,
32+
BuildFnTy &MatchInfo) {
33+
const GMerge *Merge = cast<GMerge>(&MI);
34+
35+
Register Dst = Merge->getReg(0);
36+
Register Undef = Merge->getSourceReg(1);
37+
LLT DstTy = MRI.getType(Dst);
38+
LLT SrcTy = MRI.getType(Merge->getSourceReg(0));
39+
40+
//
41+
// %bits_8_15:_(s8) = G_IMPLICIT_DEF
42+
// %0:_(s16) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8)
43+
//
44+
// ->
45+
//
46+
// %0:_(s16) = G_ZEXT %bits_0_7:(s8)
47+
//
48+
49+
if (!MRI.hasOneNonDBGUse(Undef) ||
50+
!isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
51+
return false;
52+
53+
MatchInfo = [=](MachineIRBuilder &B) {
54+
B.buildZExt(Dst, Merge->getSourceReg(0));
55+
};
56+
return true;
57+
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ body: |
1010
bb.1:
1111
; CHECK-LABEL: name: test_combine_unmerge_merge
1212
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
13-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
13+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1414
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
15-
; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
15+
; CHECK-NEXT: $w1 = COPY [[C]](s32)
1616
%0:_(s32) = G_IMPLICIT_DEF
1717
%1:_(s32) = G_IMPLICIT_DEF
1818
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -115,9 +115,11 @@ body: |
115115
bb.1:
116116
; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge
117117
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
118-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
119-
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
120-
; CHECK-NEXT: $w1 = COPY [[DEF1]](s32)
118+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s32)
119+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[ZEXT]](s64)
120+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
121+
; CHECK-NEXT: $w0 = COPY [[UV]](s32)
122+
; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
121123
%0:_(s32) = G_IMPLICIT_DEF
122124
%1:_(s32) = G_IMPLICIT_DEF
123125
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -136,9 +138,8 @@ body: |
136138
bb.1:
137139
; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types
138140
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
139-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
140-
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
141-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64)
141+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s32)
142+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64)
142143
; CHECK-NEXT: $h0 = COPY [[UV]](s16)
143144
; CHECK-NEXT: $h1 = COPY [[UV1]](s16)
144145
; CHECK-NEXT: $h2 = COPY [[UV2]](s16)
@@ -539,3 +540,36 @@ body: |
539540
$q0 = COPY %un1(s128)
540541
$q1 = COPY %un2(s128)
541542
...
543+
544+
# Check that we zext the merge
545+
---
546+
name: test_merge_undef
547+
body: |
548+
bb.1:
549+
; CHECK-LABEL: name: test_merge_undef
550+
; CHECK: %opaque:_(s64) = COPY $x0
551+
; CHECK-NEXT: %me:_(s128) = G_ZEXT %opaque(s64)
552+
; CHECK-NEXT: $q0 = COPY %me(s128)
553+
%opaque:_(s64) = COPY $x0
554+
%def:_(s64) = G_IMPLICIT_DEF
555+
%me:_(s128) = G_MERGE_VALUES %opaque(s64), %def
556+
$q0 = COPY %me(s128)
557+
...
558+
559+
# Check that we don't zext the merge, multi-use
560+
---
561+
name: test_merge_undef_multi_use
562+
body: |
563+
bb.1:
564+
; CHECK-LABEL: name: test_merge_undef_multi_use
565+
; CHECK: %opaque:_(s64) = COPY $x0
566+
; CHECK-NEXT: %def:_(s64) = G_IMPLICIT_DEF
567+
; CHECK-NEXT: %me:_(s128) = G_MERGE_VALUES %opaque(s64), %def(s64)
568+
; CHECK-NEXT: $q0 = COPY %me(s128)
569+
; CHECK-NEXT: $x0 = COPY %def(s64)
570+
%opaque:_(s64) = COPY $x0
571+
%def:_(s64) = G_IMPLICIT_DEF
572+
%me:_(s128) = G_MERGE_VALUES %opaque(s64), %def
573+
$q0 = COPY %me(s128)
574+
$x0 = COPY %def(s64)
575+
...

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -45,25 +45,14 @@ define i64 @bswap_i16_to_i64_anyext(i16 %a) {
4545

4646
; The zext here is optimised to an any_extend during isel..
4747
define i128 @bswap_i16_to_i128_anyext(i16 %a) {
48-
; CHECK-SD-LABEL: bswap_i16_to_i128_anyext:
49-
; CHECK-SD: // %bb.0:
50-
; CHECK-SD-NEXT: mov w8, w0
51-
; CHECK-SD-NEXT: mov x0, xzr
52-
; CHECK-SD-NEXT: rev w8, w8
53-
; CHECK-SD-NEXT: lsr w8, w8, #16
54-
; CHECK-SD-NEXT: lsl x1, x8, #48
55-
; CHECK-SD-NEXT: ret
56-
;
57-
; CHECK-GI-LABEL: bswap_i16_to_i128_anyext:
58-
; CHECK-GI: // %bb.0:
59-
; CHECK-GI-NEXT: mov w8, w0
60-
; CHECK-GI-NEXT: mov x0, xzr
61-
; CHECK-GI-NEXT: rev w8, w8
62-
; CHECK-GI-NEXT: lsr w8, w8, #16
63-
; CHECK-GI-NEXT: bfi x8, x8, #32, #32
64-
; CHECK-GI-NEXT: and x8, x8, #0xffff
65-
; CHECK-GI-NEXT: lsl x1, x8, #48
66-
; CHECK-GI-NEXT: ret
48+
; CHECK-LABEL: bswap_i16_to_i128_anyext:
49+
; CHECK: // %bb.0:
50+
; CHECK-NEXT: mov w8, w0
51+
; CHECK-NEXT: mov x0, xzr
52+
; CHECK-NEXT: rev w8, w8
53+
; CHECK-NEXT: lsr w8, w8, #16
54+
; CHECK-NEXT: lsl x1, x8, #48
55+
; CHECK-NEXT: ret
6756
%3 = call i16 @llvm.bswap.i16(i16 %a)
6857
%4 = zext i16 %3 to i128
6958
%5 = shl i128 %4, 112

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,22 +1884,22 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
18841884
define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
18851885
; GCN-LABEL: s_ashr_i65_33:
18861886
; GCN: ; %bb.0:
1887-
; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1888-
; GCN-NEXT: s_lshr_b32 s0, s1, 1
1889-
; GCN-NEXT: s_mov_b32 s1, 0
1890-
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1891-
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1892-
; GCN-NEXT: s_ashr_i32 s2, s3, 1
1887+
; GCN-NEXT: s_mov_b32 s3, 0
1888+
; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1889+
; GCN-NEXT: s_lshr_b32 s2, s1, 1
1890+
; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1891+
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1892+
; GCN-NEXT: s_ashr_i32 s2, s5, 1
18931893
; GCN-NEXT: ; return to shader part epilog
18941894
;
18951895
; GFX10PLUS-LABEL: s_ashr_i65_33:
18961896
; GFX10PLUS: ; %bb.0:
1897-
; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1898-
; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1899-
; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1900-
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1901-
; GFX10PLUS-NEXT: s_ashr_i32 s2, s3, 1
1902-
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1897+
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1898+
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1899+
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1900+
; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1901+
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1902+
; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1
19031903
; GFX10PLUS-NEXT: ; return to shader part epilog
19041904
%result = ashr i65 %value, 33
19051905
ret i65 %result

0 commit comments

Comments
 (0)