Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit 857e6d3

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:bb7143f6669345825c214b26fbe336857f4bf523 into amd-gfx:a7f5b4f85e9d
Local branch amd-gfx a7f5b4f Merged main:ba976971898d74df38d155c55e008c898120d1e4 into amd-gfx:77eaf56bc521 Remote branch main bb7143f AMDGPU: Avoid creating unnecessary block split in atomic expansion (llvm#102440)
2 parents a7f5b4f + bb7143f commit 857e6d3

File tree

16 files changed

+63
-234
lines changed

16 files changed

+63
-234
lines changed

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,14 @@ void print_header() {
126126
LIBC_NAMESPACE::printf("Running Suite: %-10s\n",
127127
benchmarks[0]->get_suite_name().data());
128128
LIBC_NAMESPACE::printf("%s", RESET);
129-
LIBC_NAMESPACE::printf(
129+
cpp::string titles =
130130
"Benchmark | Cycles | Min | Max | "
131-
"Iterations | Time / Iteration | Stddev | Threads |\n");
132-
LIBC_NAMESPACE::printf(
133-
"---------------------------------------------------------------------"
134-
"--------------------------------\n");
131+
"Iterations | Time / Iteration | Stddev | Threads |\n";
132+
LIBC_NAMESPACE::printf(titles.data());
133+
134+
cpp::string separator(titles.size(), '-');
135+
separator[titles.size() - 1] = '\n';
136+
LIBC_NAMESPACE::printf(separator.data());
135137
}
136138

137139
void Benchmark::run_benchmarks() {

libc/benchmarks/gpu/src/math/sin_benchmark.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ BENCH(NvSinVeryLarge, LIBC_NAMESPACE::__nv_sin, 30, 1000);
4848
#endif
4949

5050
#ifdef AMDGPU_MATH_FOUND
51-
BENCH(AmdgpuSin, LIBC_NAMESPACE::__ocml_sin_f64, -1023, 1023);
52-
BENCH(AmdgpuSinTwoPi, LIBC_NAMESPACE::__ocml_sin_f64, -10, 3);
53-
BENCH(AmdgpuSinTwoPow30, LIBC_NAMESPACE::__ocml_sin_f64, 0, 30);
54-
BENCH(AmdgpuSinVeryLarge, LIBC_NAMESPACE::__ocml_sin_f64, 30, 1000);
51+
BENCH(AmdSin, LIBC_NAMESPACE::__ocml_sin_f64, -1023, 1023);
52+
BENCH(AmdSinTwoPi, LIBC_NAMESPACE::__ocml_sin_f64, -10, 3);
53+
BENCH(AmdSinTwoPow30, LIBC_NAMESPACE::__ocml_sin_f64, 0, 30);
54+
BENCH(AmdSinVeryLarge, LIBC_NAMESPACE::__ocml_sin_f64, 30, 1000);
5555
#endif

libc/newhdrgen/yaml_to_classes.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,15 @@ def add_function_to_yaml(yaml_file, function_details):
190190
if new_function.attributes:
191191
function_dict["attributes"] = new_function.attributes
192192

193-
yaml_data["functions"].append(function_dict)
193+
insert_index = 0
194+
for i, func in enumerate(yaml_data["functions"]):
195+
if func["name"] > new_function.name:
196+
insert_index = i
197+
break
198+
else:
199+
insert_index = len(yaml_data["functions"])
200+
201+
yaml_data["functions"].insert(insert_index, function_dict)
194202

195203
class IndentYamlListDumper(yaml.Dumper):
196204
def increase_indent(self, flow=False, indentless=False):

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3843,10 +3843,6 @@ class TargetLowering : public TargetLoweringBase {
38433843
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
38443844
virtual unsigned getJumpTableEncoding() const;
38453845

3846-
virtual MVT getJumpTableRegTy(const DataLayout &DL) const {
3847-
return getPointerTy(DL);
3848-
}
3849-
38503846
virtual const MCExpr *
38513847
LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
38523848
const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 507869
19+
#define LLVM_MAIN_REVISION 507873
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2977,7 +2977,7 @@ void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
29772977
// Emit the code for the jump table
29782978
assert(JT.SL && "Should set SDLoc for SelectionDAG!");
29792979
assert(JT.Reg != -1U && "Should lower JT Header first!");
2980-
EVT PTy = DAG.getTargetLoweringInfo().getJumpTableRegTy(DAG.getDataLayout());
2980+
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
29812981
SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy);
29822982
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
29832983
SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other,
@@ -3005,13 +3005,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
30053005
// This value may be smaller or larger than the target's pointer type, and
30063006
// therefore require extension or truncating.
30073007
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3008-
SwitchOp =
3009-
DAG.getZExtOrTrunc(Sub, dl, TLI.getJumpTableRegTy(DAG.getDataLayout()));
3008+
SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
30103009

30113010
unsigned JumpTableReg =
3012-
FuncInfo.CreateReg(TLI.getJumpTableRegTy(DAG.getDataLayout()));
3013-
SDValue CopyTo =
3014-
DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp);
3011+
FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
3012+
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
3013+
JumpTableReg, SwitchOp);
30153014
JT.Reg = JumpTableReg;
30163015

30173016
if (!JTH.FallthroughUnreachable) {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16655,9 +16655,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1665516655
//
1665616656
// With this expansion we produce the following code:
1665716657
// [...]
16658-
// br label %atomicrmw.check.shared
16659-
//
16660-
// atomicrmw.check.shared:
1666116658
// %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %addr)
1666216659
// br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private
1666316660
//
@@ -16700,8 +16697,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1670016697
Function *F = BB->getParent();
1670116698
BasicBlock *ExitBB =
1670216699
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
16703-
BasicBlock *CheckSharedBB =
16704-
BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB);
1670516700
BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB);
1670616701
BasicBlock *CheckPrivateBB =
1670716702
BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB);
@@ -16728,9 +16723,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1672816723

1672916724
std::prev(BB->end())->eraseFromParent();
1673016725
Builder.SetInsertPoint(BB);
16731-
Builder.CreateBr(CheckSharedBB);
16732-
16733-
Builder.SetInsertPoint(CheckSharedBB);
1673416726
CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
1673516727
{Addr}, nullptr, "is.shared");
1673616728
Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include "llvm/CodeGen/Analysis.h"
2626
#include "llvm/CodeGen/ISDOpcodes.h"
2727
#include "llvm/CodeGen/MachineFunction.h"
28-
#include "llvm/CodeGen/MachineJumpTableInfo.h"
2928
#include "llvm/CodeGen/MachineMemOperand.h"
3029
#include "llvm/CodeGen/SelectionDAG.h"
3130
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -583,7 +582,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
583582
setOperationAction(ISD::ROTR, MVT::i8, Expand);
584583
setOperationAction(ISD::BSWAP, MVT::i16, Expand);
585584

586-
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
585+
// Indirect branch is not supported.
586+
// This also disables Jump Table creation.
587+
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
587588
setOperationAction(ISD::BRIND, MVT::Other, Expand);
588589

589590
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -944,9 +945,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
944945
MAKE_CASE(NVPTXISD::Dummy)
945946
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
946947
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
947-
MAKE_CASE(NVPTXISD::BrxEnd)
948-
MAKE_CASE(NVPTXISD::BrxItem)
949-
MAKE_CASE(NVPTXISD::BrxStart)
950948
MAKE_CASE(NVPTXISD::Tex1DFloatS32)
951949
MAKE_CASE(NVPTXISD::Tex1DFloatFloat)
952950
MAKE_CASE(NVPTXISD::Tex1DFloatFloatLevel)
@@ -2787,8 +2785,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
27872785
return LowerFP_ROUND(Op, DAG);
27882786
case ISD::FP_EXTEND:
27892787
return LowerFP_EXTEND(Op, DAG);
2790-
case ISD::BR_JT:
2791-
return LowerBR_JT(Op, DAG);
27922788
case ISD::VAARG:
27932789
return LowerVAARG(Op, DAG);
27942790
case ISD::VASTART:
@@ -2814,41 +2810,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
28142810
}
28152811
}
28162812

2817-
SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2818-
SDLoc DL(Op);
2819-
SDValue Chain = Op.getOperand(0);
2820-
const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2821-
SDValue Index = Op.getOperand(2);
2822-
2823-
unsigned JId = JT->getIndex();
2824-
MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2825-
ArrayRef<MachineBasicBlock *> MBBs = MJTI->getJumpTables()[JId].MBBs;
2826-
2827-
SDValue IdV = DAG.getConstant(JId, DL, MVT::i32);
2828-
2829-
// Generate BrxStart node
2830-
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2831-
Chain = DAG.getNode(NVPTXISD::BrxStart, DL, VTs, Chain, IdV);
2832-
2833-
// Generate BrxItem nodes
2834-
assert(!MBBs.empty());
2835-
for (MachineBasicBlock *MBB : MBBs.drop_back())
2836-
Chain = DAG.getNode(NVPTXISD::BrxItem, DL, VTs, Chain.getValue(0),
2837-
DAG.getBasicBlock(MBB), Chain.getValue(1));
2838-
2839-
// Generate BrxEnd nodes
2840-
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
2841-
IdV, Chain.getValue(1)};
2842-
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
2843-
2844-
return BrxEnd;
2845-
}
2846-
2847-
// This will prevent AsmPrinter from trying to print the jump tables itself.
2848-
unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
2849-
return MachineJumpTableInfo::EK_Inline;
2850-
}
2851-
28522813
// This function is almost a copy of SelectionDAG::expandVAArg().
28532814
// The only diff is that this one produces loads from local address space.
28542815
SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,6 @@ enum NodeType : unsigned {
6262
BFI,
6363
PRMT,
6464
DYNAMIC_STACKALLOC,
65-
BrxStart,
66-
BrxItem,
67-
BrxEnd,
6865
Dummy,
6966

7067
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -583,11 +580,6 @@ class NVPTXTargetLowering : public TargetLowering {
583580
return true;
584581
}
585582

586-
// The default is the same as pointer type, but brx.idx only accepts i32
587-
MVT getJumpTableRegTy(const DataLayout &) const override { return MVT::i32; }
588-
589-
unsigned getJumpTableEncoding() const override;
590-
591583
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
592584

593585
// The default is to transform llvm.ctlz(x, false) (where false indicates that
@@ -645,8 +637,6 @@ class NVPTXTargetLowering : public TargetLowering {
645637

646638
SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
647639

648-
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
649-
650640
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
651641
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
652642

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3880,44 +3880,6 @@ def DYNAMIC_STACKALLOC64 :
38803880
[(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>,
38813881
Requires<[hasPTX<73>, hasSM<52>]>;
38823882

3883-
3884-
//
3885-
// BRX
3886-
//
3887-
3888-
def SDTBrxStartProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
3889-
def SDTBrxItemProfile : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
3890-
def SDTBrxEndProfile : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisInt<1>, SDTCisInt<2>]>;
3891-
3892-
def brx_start :
3893-
SDNode<"NVPTXISD::BrxStart", SDTBrxStartProfile,
3894-
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
3895-
def brx_item :
3896-
SDNode<"NVPTXISD::BrxItem", SDTBrxItemProfile,
3897-
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
3898-
def brx_end :
3899-
SDNode<"NVPTXISD::BrxEnd", SDTBrxEndProfile,
3900-
[SDNPHasChain, SDNPInGlue, SDNPSideEffect]>;
3901-
3902-
let isTerminator = 1, isBranch = 1, isIndirectBranch = 1 in {
3903-
3904-
def BRX_START :
3905-
NVPTXInst<(outs), (ins i32imm:$id),
3906-
"$$L_brx_$id: .branchtargets",
3907-
[(brx_start (i32 imm:$id))]>;
3908-
3909-
def BRX_ITEM :
3910-
NVPTXInst<(outs), (ins brtarget:$target),
3911-
"\t$target,",
3912-
[(brx_item bb:$target)]>;
3913-
3914-
def BRX_END :
3915-
NVPTXInst<(outs), (ins brtarget:$target, Int32Regs:$val, i32imm:$id),
3916-
"\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;",
3917-
[(brx_end bb:$target, (i32 Int32Regs:$val), (i32 imm:$id))]>;
3918-
}
3919-
3920-
39213883
include "NVPTXIntrinsics.td"
39223884

39233885
//-----------------------------------

0 commit comments

Comments
 (0)