Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,13 @@ static void combineMetadata(Instruction *K, const Instruction *J,
bool DoesKMove, bool AAOnly = false) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
K->getAllMetadataOtherThanDebugLoc(Metadata);

const auto IsAMDGPUMD = [=](unsigned Kind) {
return Kind ==
K->getContext().getMDKindID("amdgpu.no.fine.grained.memory") ||
Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory") ||
Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode");
};
for (const auto &MD : Metadata) {
unsigned Kind = MD.first;
MDNode *JMD = J->getMetadata(Kind);
Expand All @@ -3311,7 +3318,10 @@ static void combineMetadata(Instruction *K, const Instruction *J,
// TODO: Assert that this switch is exhaustive for fixed MD kinds.
switch (Kind) {
default:
K->setMetadata(Kind, nullptr); // Remove unknown metadata
if (K->isAtomic() && IsAMDGPUMD(Kind))
break; // Preserve AMDGPU atomic metadata.
else
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
Expand Down Expand Up @@ -3446,6 +3456,17 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(LLVMContext::MD_prof,
MDNode::getMergedProfMetadata(KProf, JProf, K, J));
}

// Preserve AMDGPU atomic metadata from J, if present. K might already be
// carrying this but overwriting should cause no issue.
if (K->isAtomic()) {
if (auto *JMD = J->getMetadata("amdgpu.no.fine.grained.memory"))
K->setMetadata("amdgpu.no.fine.grained.memory", JMD);
if (auto *JMD = J->getMetadata("amdgpu.no.remote.memory"))
K->setMetadata("amdgpu.no.remote.memory", JMD);
if (auto *JMD = J->getMetadata("amdgpu.ignore.denormal.mode"))
K->setMetadata("amdgpu.ignore.denormal.mode", JMD);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow this part, it should be intersection

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I misunderstood how these worked / assumed they are per-module global settings. I'll add @yxsamliu to the review since I know you and him worked on this. Intersection seems correct, however I wonder if we should actually prevent sinking / combining altogether if it results in removal of the metadata, since it could incur a pretty significant performance penalty / atomics with different MD aren't really the same instruction to begin with. Thoughts?

}

void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
Expand Down
107 changes: 107 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
;; Test to ensure that AMDGPU atomic related metadata is not dropped when
;; instructions are sunk. Currently the metadata from the first instruction
;; is kept, which prevents full loss of optimisation information.

; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s

define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define amdgpu_kernel void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
define void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) {

; CHECK-LABEL: define amdgpu_kernel void @both(
; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
%p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1

for.body:
%0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end

for.body1:
br i1 %pred1, label %if.then, label %if.end

if.then:
%1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end

if.end:
ret void
}

define amdgpu_kernel void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
; CHECK-LABEL: define amdgpu_kernel void @from(
; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it valid to preserve the metadata if it's only present on one of the branches?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I might've gotten this wrong actually, and it probably is not valid to do this - I'll elaborate a bit when replying to @arsenm's point about the intersection.

; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
%p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1

for.body:
%0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end

for.body1:
br i1 %pred1, label %if.then, label %if.end

if.then:
%1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
br label %if.end

if.end:
ret void
}

define amdgpu_kernel void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr {
; CHECK-LABEL: define amdgpu_kernel void @to(
; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]]
; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_END_SINK_SPLIT]]:
; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
; CHECK-NEXT: ret void
;
entry:
%p.global = addrspacecast ptr %p to ptr addrspace(1)
br i1 %pred0, label %for.body, label %for.body1

for.body:
%0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8
br label %if.end

for.body1:
br i1 %pred1, label %if.then, label %if.end

if.then:
%1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
br label %if.end

if.end:
ret void
}

!0 = !{}
;.
; CHECK: [[META0]] = !{}
;.