Skip to content

Commit 25b80bd

Browse files
committed
Clang formatter
1 parent 4bd860b commit 25b80bd

File tree

3 files changed

+96
-91
lines changed

3 files changed

+96
-91
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,13 +2328,13 @@ class AMDGPUWaveReduce<LLVMType data_ty = llvm_anyint_ty> : Intrinsic<
23282328
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, ImmArg<ArgIndex<1>>]>;
23292329

23302330
multiclass AMDGPUWaveReduceOps<list<string> Operations> {
2331-
foreach Op = Operations in {
2332-
def Op : AMDGPUWaveReduce;
2333-
}
2331+
foreach Op = Operations in { def Op : AMDGPUWaveReduce; }
23342332
}
23352333

2336-
defvar Operations = ["umin", "min", "umax", "max", "uadd", "add", "usub", "sub", "and", "or", "xor"];
2337-
defm int_amdgcn_wave_reduce_ : AMDGPUWaveReduceOps<Operations>;
2334+
defvar Operations = [
2335+
"umin", "min", "umax", "max", "uadd", "add", "usub", "sub", "and", "or", "xor"
2336+
];
2337+
defm int_amdgcn_wave_reduce_ : AMDGPUWaveReduceOps<Operations>;
23382338

23392339
def int_amdgcn_readfirstlane :
23402340
Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 88 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -4940,26 +4940,26 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
49404940
return LoopBB;
49414941
}
49424942

4943-
static uint32_t getInitialValueForWaveReduction(unsigned Opc){
4944-
switch(Opc){
4945-
case AMDGPU::S_MIN_U32:
4946-
return std::numeric_limits<uint32_t>::max();
4947-
case AMDGPU::S_MIN_I32:
4948-
return std::numeric_limits<int32_t>::max();
4949-
case AMDGPU::S_MAX_U32:
4950-
return std::numeric_limits<u_int32_t>::lowest();
4951-
case AMDGPU::S_MAX_I32:
4952-
return std::numeric_limits<int32_t>::min();
4953-
case AMDGPU::S_ADD_I32:
4954-
case AMDGPU::S_SUB_I32:
4955-
case AMDGPU::S_OR_B32:
4956-
case AMDGPU::S_XOR_B32:
4957-
return 0x00000000;
4958-
case AMDGPU::S_AND_B32:
4959-
return 0xFFFFFFFF;
4960-
default:
4961-
llvm_unreachable("Unexpected opcode in getInitialValueForWaveReduction");
4962-
}
4943+
static uint32_t getInitialValueForWaveReduction(unsigned Opc) {
4944+
switch (Opc) {
4945+
case AMDGPU::S_MIN_U32:
4946+
return std::numeric_limits<uint32_t>::max();
4947+
case AMDGPU::S_MIN_I32:
4948+
return std::numeric_limits<int32_t>::max();
4949+
case AMDGPU::S_MAX_U32:
4950+
return std::numeric_limits<uint32_t>::min();
4951+
case AMDGPU::S_MAX_I32:
4952+
return std::numeric_limits<int32_t>::min();
4953+
case AMDGPU::S_ADD_I32:
4954+
case AMDGPU::S_SUB_I32:
4955+
case AMDGPU::S_OR_B32:
4956+
case AMDGPU::S_XOR_B32:
4957+
return std::numeric_limits<uint32_t>::min();
4958+
case AMDGPU::S_AND_B32:
4959+
return std::numeric_limits<uint32_t>::max();
4960+
default:
4961+
llvm_unreachable("Unexpected opcode in getInitialValueForWaveReduction");
4962+
}
49634963
}
49644964

49654965
static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
@@ -4977,72 +4977,77 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
49774977
Register DstReg = MI.getOperand(0).getReg();
49784978
MachineBasicBlock *RetBB = nullptr;
49794979
if (isSGPR) {
4980-
switch(Opc){
4981-
case AMDGPU::S_MIN_U32:
4982-
case AMDGPU::S_MIN_I32:
4983-
case AMDGPU::S_MAX_U32:
4984-
case AMDGPU::S_MAX_I32:
4985-
case AMDGPU::S_AND_B32:
4986-
case AMDGPU::S_OR_B32:{
4987-
// Idempotent operations.
4988-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
4989-
RetBB = &BB;
4990-
break;
4991-
}
4992-
case AMDGPU::S_XOR_B32:
4993-
case AMDGPU::S_ADD_I32:
4994-
case AMDGPU::S_SUB_I32:{
4995-
const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
4996-
const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
4997-
Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
4998-
Register ActiveLanes = MRI.createVirtualRegister(DstRegClass);
4999-
5000-
bool IsWave32 = ST.isWave32();
5001-
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5002-
unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5003-
unsigned CountReg = IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
5004-
5005-
auto Exec =
5006-
BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
5007-
5008-
auto NewAccumulator = BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
5009-
.addReg(Exec->getOperand(0).getReg());
5010-
5011-
switch(Opc){
5012-
case AMDGPU::S_XOR_B32:{
5013-
// Performing an XOR operation on a uniform value
5014-
// depends on the parity of the number of active lanes.
5015-
// For even parity, the result will be 0, for odd
5016-
// parity the result will be the same as the input value.
5017-
Register ParityRegister = MRI.createVirtualRegister(DstRegClass);
5018-
5019-
auto ParityReg = BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
4980+
switch (Opc) {
4981+
case AMDGPU::S_MIN_U32:
4982+
case AMDGPU::S_MIN_I32:
4983+
case AMDGPU::S_MAX_U32:
4984+
case AMDGPU::S_MAX_I32:
4985+
case AMDGPU::S_AND_B32:
4986+
case AMDGPU::S_OR_B32: {
4987+
// Idempotent operations.
4988+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
4989+
RetBB = &BB;
4990+
break;
4991+
}
4992+
case AMDGPU::S_XOR_B32:
4993+
case AMDGPU::S_ADD_I32:
4994+
case AMDGPU::S_SUB_I32: {
4995+
const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
4996+
const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
4997+
Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
4998+
Register ActiveLanes = MRI.createVirtualRegister(DstRegClass);
4999+
5000+
bool IsWave32 = ST.isWave32();
5001+
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5002+
unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5003+
unsigned CountReg =
5004+
IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
5005+
5006+
auto Exec =
5007+
BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
5008+
5009+
auto NewAccumulator = BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
5010+
.addReg(Exec->getOperand(0).getReg());
5011+
5012+
switch (Opc) {
5013+
case AMDGPU::S_XOR_B32: {
5014+
// Performing an XOR operation on a uniform value
5015+
// depends on the parity of the number of active lanes.
5016+
// For even parity, the result will be 0, for odd
5017+
// parity the result will be the same as the input value.
5018+
Register ParityRegister = MRI.createVirtualRegister(DstRegClass);
5019+
5020+
auto ParityReg =
5021+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
50205022
.addReg(NewAccumulator->getOperand(0).getReg())
50215023
.addImm(1);
5022-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5023-
.addReg(SrcReg)
5024-
.addReg(ParityReg->getOperand(0).getReg()) ;
5025-
break;
5026-
}
5027-
case AMDGPU::S_SUB_I32:{
5028-
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
5029-
5030-
// Take the negation of the source operand.
5031-
auto InvertedValReg = BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal).addImm(-1).addReg(SrcReg);
5032-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5033-
.addReg(InvertedValReg->getOperand(0).getReg())
5034-
.addReg(NewAccumulator->getOperand(0).getReg());
5035-
break;
5036-
}
5037-
case AMDGPU::S_ADD_I32:{
5038-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5039-
.addReg(SrcReg)
5040-
.addReg(NewAccumulator->getOperand(0).getReg());
5041-
break;
5042-
}
5043-
}
5044-
RetBB = &BB;
5024+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5025+
.addReg(SrcReg)
5026+
.addReg(ParityReg->getOperand(0).getReg());
5027+
break;
5028+
}
5029+
case AMDGPU::S_SUB_I32: {
5030+
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
5031+
5032+
// Take the negation of the source operand.
5033+
auto InvertedValReg =
5034+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal)
5035+
.addImm(-1)
5036+
.addReg(SrcReg);
5037+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5038+
.addReg(InvertedValReg->getOperand(0).getReg())
5039+
.addReg(NewAccumulator->getOperand(0).getReg());
5040+
break;
5041+
}
5042+
case AMDGPU::S_ADD_I32: {
5043+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5044+
.addReg(SrcReg)
5045+
.addReg(NewAccumulator->getOperand(0).getReg());
5046+
break;
50455047
}
5048+
}
5049+
RetBB = &BB;
5050+
}
50465051
}
50475052
} else {
50485053
// TODO: Implement DPP Strategy and switch based on immediate strategy

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def : GCNPat <(vt (int_amdgcn_set_inactive vt:$src, vt:$inactive)),
277277
def : GCNPat<(i32 (int_amdgcn_set_inactive_chain_arg i32:$src, i32:$inactive)),
278278
(V_SET_INACTIVE_B32 0, VGPR_32:$src, 0, VGPR_32:$inactive, (IMPLICIT_DEF))>;
279279

280-
// clang-format off
280+
// clang-format off
281281
defvar int_amdgcn_wave_reduce_ = "int_amdgcn_wave_reduce_";
282282
multiclass
283283
AMDGPUWaveReducePseudoGenerator<string Op, string DataType, string Size> {
@@ -288,9 +288,9 @@ multiclass
288288
[(set i32 : $sdst, (!cast<AMDGPUWaveReduce>(int_amdgcn_wave_reduce_ #Op) i32 : $src, i32 : $strategy))]> {}
289289
}
290290
}
291-
// clang-format on
291+
// clang-format on
292292

293-
// Input list : [Operation_name,
293+
// Input list : [Operation_name,
294294
// type - Signed(I)/Unsigned(U)/Float(F)/Bitwise(B),
295295
// Size_in_bits]
296296
defvar Operations = [

0 commit comments

Comments
 (0)