Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1765,7 +1765,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// 32-bit amount.
const LLT ValTy = Query.Types[0];
const LLT AmountTy = Query.Types[1];
return ValTy.getSizeInBits() <= 16 &&
return ValTy.isScalar() && ValTy.getSizeInBits() <= 16 &&
AmountTy.getSizeInBits() < 16;
}, changeTo(1, S16));
Shifts.maxScalarIf(typeIs(0, S16), 1, S16);
Expand Down
174 changes: 174 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,176 @@ body: |

...

---
name: test_ashr_v4s1_v4s1
body: |
bb.0:
liveins: $vgpr0, $vgpr1

; SI-LABEL: name: test_ashr_v4s1_v4s1
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
; SI-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[AND]](s32)
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C]]
; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[AND1]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C]]
; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1
; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND2]](s32)
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C]]
; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1
; SI-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[AND3]](s32)
; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY2]](s32)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC1]]
; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C]]
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[COPY3]](s32)
; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC2]]
; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY4]](s32)
; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC3]]
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; SI-NEXT: $vgpr0 = COPY [[AND8]](s32)
;
; VI-LABEL: name: test_ashr_v4s1_v4s1
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32)
; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[AND1]](s16)
; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]]
; VI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1
; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32)
; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC5]], [[AND2]](s16)
; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]]
; VI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1
; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32)
; VI-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC7]], [[AND3]](s16)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]]
; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C4]]
; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16)
; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]]
; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR2]], [[C4]]
; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C5]](s16)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]]
; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C4]]
; VI-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C6]](s16)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; VI-NEXT: $vgpr0 = COPY [[AND8]](s32)
;
; GFX9PLUS-LABEL: name: test_ashr_v4s1_v4s1
; GFX9PLUS: liveins: $vgpr0, $vgpr1
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; GFX9PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; GFX9PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
; GFX9PLUS-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32)
; GFX9PLUS-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; GFX9PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
; GFX9PLUS-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[AND]](s16)
; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
; GFX9PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; GFX9PLUS-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[AND1]](s16)
; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
; GFX9PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]]
; GFX9PLUS-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1
; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG2]](s32)
; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC5]], [[AND2]](s16)
; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
; GFX9PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]]
; GFX9PLUS-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1
; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG3]](s32)
; GFX9PLUS-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC7]], [[AND3]](s16)
; GFX9PLUS-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C4]]
; GFX9PLUS-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C4]]
; GFX9PLUS-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16)
; GFX9PLUS-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]]
; GFX9PLUS-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ASHR2]], [[C4]]
; GFX9PLUS-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
; GFX9PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C5]](s16)
; GFX9PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]]
; GFX9PLUS-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C4]]
; GFX9PLUS-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
; GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C6]](s16)
; GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]]
; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16)
; GFX9PLUS-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
; GFX9PLUS-NEXT: $vgpr0 = COPY [[AND8]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s4) = G_TRUNC %0
%3:_(s4) = G_TRUNC %1
%4:_(<4 x s1>) = G_BITCAST %2
%5:_(<4 x s1>) = G_BITCAST %3
%6:_(<4 x s1>) = G_ASHR %4, %5
%7:_(s4) = G_BITCAST %6
%8:_(s32) = G_ZEXT %7
$vgpr0 = COPY %8
...

---
name: test_ashr_v4s16_v4s16
body: |
Expand Down Expand Up @@ -2258,3 +2428,7 @@ body: |
%6:_(s96) = G_ANYEXT %5
$vgpr0_vgpr1_vgpr2 = COPY %6
...
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
# GFX9PLUS: {{.*}}
# SI: {{.*}}
# VI: {{.*}}