Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2408,8 +2408,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
// If the symbol INV2PI is used as the operand, the value is set to
// 0x3fc45f306dc9c882 in parseImm().
if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
Literal == 0x3fc45f306725feed) {
(Literal == 0x3fc45f306725feed || Literal == 0x3fc45f306dc9c882)) {
// This is the 1/(2*pi) which is going to be truncated to bf16 with the
// loss of precision. The constant represents ideomatic fp32 value of
// 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
Expand Down Expand Up @@ -3227,6 +3229,10 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
}

static bool isInv2PiToken(const AsmToken &Tok) {
return Tok.is(AsmToken::Identifier) && Tok.getIdentifier() == "INV2PI";
}

ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
bool HasSP3AbsModifier, LitModifier Lit) {
// TODO: add syntactic sugar for 1/(2*PI)
Expand All @@ -3253,11 +3259,12 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,

const auto& Tok = getToken();
const auto& NextTok = peekToken();
bool IsReal = Tok.is(AsmToken::Real);
bool IsReal = Tok.is(AsmToken::Real) || isInv2PiToken(Tok);
SMLoc S = getLoc();
bool Negate = false;

if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
if (!IsReal && Tok.is(AsmToken::Minus) &&
(NextTok.is(AsmToken::Real) || isInv2PiToken(NextTok))) {
lex();
IsReal = true;
Negate = true;
Expand All @@ -3272,6 +3279,10 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
// optional sign.

StringRef Num = getTokenStr();
if (Num == "INV2PI")
// Setting the imm to this for INV2PI works for all types except bf16.
// In addLiteralImmOperand() we specifically check for this.
Num = "0.15915494309189532";
lex();

APFloat RealVal(APFloat::IEEEdouble());
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
else if (Imm == 0xC400)
O << "-4.0";
else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
O << "INV2PI";
else
return false;

Expand All @@ -550,7 +550,7 @@ static bool printImmediateBFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
else if (Imm == 0xC080)
O << "-4.0";
else if (Imm == 0x3E22 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
O << "INV2PI";
else
return false;

Expand Down Expand Up @@ -648,7 +648,7 @@ bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm,
O << "-4.0";
else if (Imm == 0x3e22f983 &&
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
O << "INV2PI";
else
return false;

Expand Down Expand Up @@ -699,7 +699,7 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
O << "-4.0";
else if (Imm == 0x3fc45f306dc9c882 &&
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494309189532";
O << "INV2PI";
else
printLiteral64(Imm, O, IsFP);
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.
;
; GFX8-LABEL: test_writelane_imminv2pi_s_v:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_writelane_b32 v0, 0.15915494, s2
; GFX8-NEXT: v_writelane_b32 v0, INV2PI, s2
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: test_writelane_imminv2pi_s_v:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_writelane_b32 v0, 0.15915494, s2
; GFX10-NEXT: v_writelane_b32 v0, INV2PI, s2
; GFX10-NEXT: ; return to shader part epilog
%writelane = call i32 @llvm.amdgcn.writelane(i32 bitcast (float 0x3FC45F3060000000 to i32), i32 %lane, i32 %vdst.in)
%writelane.cast = bitcast i32 %writelane to float
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ define void @materialize_not_neg4.0_i32(ptr addrspace(1) %out) {

; GCN-LABEL: {{^}}materialize_not_inv2pi_i32:
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xc1dd067c
; VI: v_not_b32_e32 v{{[0-9]+}}, 0.15915494
; VI: v_not_b32_e32 v{{[0-9]+}}, INV2PI
define void @materialize_not_inv2pi_i32(ptr addrspace(1) %out) {
store i32 -1042479492, ptr addrspace(1) %out
ret void
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1470,7 +1470,7 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_max_f16_e32 v0, v0, v0
; VI-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; VI-NEXT: v_min_f16_e32 v0, INV2PI, v0
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1479,23 +1479,23 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-NEXT: v_min_f16_e32 v0, INV2PI, v0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inv2pi_minnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inv2pi_minnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
Expand All @@ -1516,7 +1516,7 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_max_f16_e32 v0, v0, v0
; VI-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; VI-NEXT: v_min_f16_e32 v0, INV2PI, v0
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1525,23 +1525,23 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-NEXT: v_min_f16_e32 v0, INV2PI, v0
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-NSZ-TRUE16-LABEL: v_fneg_neg_inv2pi_minnum_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
Expand Down Expand Up @@ -1650,7 +1650,7 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_max_f16_e32 v0, v0, v0
; VI-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; VI-NEXT: v_min_f16_e32 v0, INV2PI, v0
; VI-NEXT: v_mul_f16_e64 v0, -v0, v1
; VI-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -1659,23 +1659,23 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_max_f16_e32 v0, v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_min_f16_e32 v0, 0.15915494, v0
; GFX11-NEXT: v_min_f16_e32 v0, INV2PI, v0
; GFX11-NEXT: v_mul_f16_e64 v0, -v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11-SAFE-TRUE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
; GFX11-SAFE-TRUE16: ; %bb.0:
; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SAFE-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
; GFX11-NSZ-TRUE16-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
; GFX11-NSZ-TRUE16: ; %bb.0:
; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NSZ-TRUE16-NEXT: v_max_f16_e32 v0.l, v0.l, v0.l
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, 0.15915494, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_min_f16_e32 v0.l, INV2PI, v0.l
; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e64 v0.l, -v0.l, v1.l
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1857,7 +1857,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr a
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v3
; VI-NEXT: v_min_f32_e32 v2, 0.15915494, v2
; VI-NEXT: v_min_f32_e32 v2, INV2PI, v2
; VI-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
Expand Down Expand Up @@ -1905,7 +1905,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, p
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_mul_f32_e32 v2, -1.0, v3
; VI-NEXT: v_max_f32_e32 v2, 0.15915494, v2
; VI-NEXT: v_max_f32_e32 v2, INV2PI, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -1953,7 +1953,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr a
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_max_f16_e32 v2, v3, v3
; VI-NEXT: v_min_f16_e32 v2, 0.15915494, v2
; VI-NEXT: v_min_f16_e32 v2, INV2PI, v2
; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v2
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_endpgm
Expand Down Expand Up @@ -2002,7 +2002,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, p
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_max_f16_e64 v2, -v3, -v3
; VI-NEXT: v_max_f16_e32 v2, 0.15915494, v2
; VI-NEXT: v_max_f16_e32 v2, INV2PI, v2
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -2051,7 +2051,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr a
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
; VI-NEXT: v_min_f64 v[0:1], v[0:1], 0.15915494309189532
; VI-NEXT: v_min_f64 v[0:1], v[0:1], INV2PI
; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
Expand Down Expand Up @@ -2101,7 +2101,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, p
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532
; VI-NEXT: v_max_f64 v[0:1], v[0:1], INV2PI
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -2235,7 +2235,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_mul_f32_e32 v2, 1.0, v4
; VI-NEXT: v_min_f32_e32 v2, 0.15915494, v2
; VI-NEXT: v_min_f32_e32 v2, INV2PI, v2
; VI-NEXT: v_mul_f32_e64 v2, -v2, v3
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
Expand Down
Loading