Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -839,8 +839,11 @@ class LLVM_ABI TargetLoweringBase {
/// Return true if the variant with 2 variable shifts is preferred.
/// Return false if there is no preference.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
// By default, let's assume that no one prefers shifts.
return false;
// By default, let's assume that no one prefers shifts for vectors
EVT VT = X.getValueType();

// Prefer shifts for legal types
return isOperationLegal(ISD::SHL, VT);
}

/// Return true if it is profitable to fold a pair of shifts into a mask.
Expand Down
47 changes: 29 additions & 18 deletions llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,16 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
; SI-NEXT: v_and_b32_e32 v0, v1, v0
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
Comment on lines +106 to +107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a neutral change at best

; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c0:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
; VI-NEXT: v_and_b32_e32 v0, v1, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
Expand All @@ -121,12 +121,23 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
}

define i32 @bzhi32_c0_clamp(i32 %val, i32 %numlowbits) nounwind {
; GCN-LABEL: bzhi32_c0_clamp:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v1, 31, v1
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
; SI-LABEL: bzhi32_c0_clamp:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_and_b32_e32 v1, 31, v1
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
Comment on lines +126 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regression, broke a BFE pattern?

; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c0_clamp:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_and_b32_e32 v1, 31, v1
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%low5bits = and i32 %numlowbits, 31
%numhighbits = sub i32 32, %low5bits
%mask = lshr i32 -1, %numhighbits
Expand All @@ -139,16 +150,16 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
; SI-NEXT: v_and_b32_e32 v0, v1, v0
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c1_indexzext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_u16_e32 v1, 32, v1
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
; VI-NEXT: v_and_b32_e32 v0, v1, v0
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
Expand All @@ -162,16 +173,16 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
; SI-NEXT: v_and_b32_e32 v0, v0, v1
; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_c4_commutative:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
; VI-NEXT: v_and_b32_e32 v0, v0, v1
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr add
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
Expand All @@ -279,17 +279,16 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr add
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: LSHR * T0.W, literal.x, PV.W,
; EG-NEXT: -1(nan), 0(0.000000e+00)
; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y,
; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
; EG-NEXT: LSHR T0.X, PV.W, T0.W,
; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: bzhi32_c1_indexzext:
; CM: ; %bb.0:
; CM-NEXT: ALU 0, @8, KC0[], KC1[]
; CM-NEXT: TEX 0 @6
; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
Expand All @@ -302,9 +301,8 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr add
; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; CM-NEXT: LSHR * T0.W, literal.x, PV.W,
; CM-NEXT: -1(nan), 0(0.000000e+00)
; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y,
; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
; CM-NEXT: LSHR * T0.X, PV.W, T0.W,
; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%numhighbits = sub i8 32, %numlowbits
Expand Down