-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NVPTX] Do not mark move of global address as cheap enabling more CSE #153730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Do not mark move of global address as cheap enabling more CSE #153730
Conversation
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/153730.diff 2 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index ebb5e32f5e6fc..18b16b350fa0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1510,18 +1510,22 @@ let hasSideEffects = false in {
"mov.b64 \t$d, __local_depot$num;">;
}
-
-// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let hasSideEffects = false, isAsCheapAsAMove = true in {
- let isMoveReg = true in
+let hasSideEffects = false in {
+ let isMoveReg = true, isAsCheapAsAMove = true in
class MOVr<RegisterClass RC, string OpStr> :
BasicNVPTXInst<(outs RC:$dst), (ins RC:$src), "mov." # OpStr>;
- let isMoveImm = true in
+ let isMoveImm = true, isAsCheapAsAMove = true in
class MOVi<RegTyInfo t, string suffix> :
BasicNVPTXInst<(outs t.RC:$dst), (ins t.Imm:$src),
"mov." # suffix,
[(set t.Ty:$dst, t.ImmNode:$src)]>;
+
+ // We don't want to set isAsCheapAsAMove to true for these instructions as
+ // this would prevent CSE and resulted in regressions.
+ class MovSymInst<RegTyInfo t> :
+ BasicNVPTXInst<(outs t.RC:$dst), (ins Operand<t.Ty>:$src),
+ "mov.b" # t.Size>;
}
def MOV_B1_r : MOVr<B1, "pred">;
@@ -1539,6 +1543,9 @@ def MOV_BF16_i : MOVi<BF16RT, "b16">;
def MOV_F32_i : MOVi<F32RT, "b32">;
def MOV_F64_i : MOVi<F64RT, "b64">;
+def MOV_B32_sym : MovSymInst<I32RT>;
+def MOV_B64_sym : MovSymInst<I64RT>;
+
def to_tglobaladdr : SDNodeXForm<globaladdr, [{
return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N),
@@ -1555,11 +1562,11 @@ def to_tframeindex : SDNodeXForm<frameindex, [{
return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
}]>;
-def : Pat<(i32 globaladdr:$dst), (MOV_B32_i (to_tglobaladdr $dst))>;
-def : Pat<(i64 globaladdr:$dst), (MOV_B64_i (to_tglobaladdr $dst))>;
+def : Pat<(i32 globaladdr:$dst), (MOV_B32_sym (to_tglobaladdr $dst))>;
+def : Pat<(i64 globaladdr:$dst), (MOV_B64_sym (to_tglobaladdr $dst))>;
-def : Pat<(i32 externalsym:$dst), (MOV_B32_i (to_texternsym $dst))>;
-def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
+def : Pat<(i32 externalsym:$dst), (MOV_B32_sym (to_texternsym $dst))>;
+def : Pat<(i64 externalsym:$dst), (MOV_B64_sym (to_texternsym $dst))>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
diff --git a/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll b/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll
new file mode 100644
index 0000000000000..31ecce5a66b64
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/cse-mov-sym.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+@global_smem = external addrspace(3) global [0 x i8], align 16
+
+
+;; Confirm the mov.b64 of global_smem is CSE'd. We need to make things a bit
+;; complex with a loop to make this interesting.
+define i32 @test_mov_sym(i32 %offset1, i32 %offset2, i1 %cond) {
+; CHECK-LABEL: test_mov_sym(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<8>;
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: ld.param.b8 %rs1, [test_mov_sym_param_2];
+; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
+; CHECK-NEXT: ld.param.b32 %r4, [test_mov_sym_param_0];
+; CHECK-NEXT: cvt.s64.s32 %rd1, %r4;
+; CHECK-NEXT: mov.b64 %rd2, global_smem;
+; CHECK-NEXT: add.s64 %rd3, %rd2, %rd1;
+; CHECK-NEXT: ld.shared.b32 %r7, [%rd3];
+; CHECK-NEXT: not.pred %p2, %p1;
+; CHECK-NEXT: @%p2 bra $L__BB0_4;
+; CHECK-NEXT: // %bb.1: // %if1.preheader
+; CHECK-NEXT: ld.param.b32 %r5, [test_mov_sym_param_1];
+; CHECK-NEXT: setp.ne.b32 %p3, %r4, %r5;
+; CHECK-NEXT: $L__BB0_2: // %if1
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: @%p3 bra $L__BB0_2;
+; CHECK-NEXT: // %bb.3: // %if2
+; CHECK-NEXT: cvt.s64.s32 %rd4, %r5;
+; CHECK-NEXT: add.s64 %rd6, %rd2, %rd4;
+; CHECK-NEXT: ld.shared.b32 %r6, [%rd6];
+; CHECK-NEXT: add.s32 %r7, %r7, %r6;
+; CHECK-NEXT: $L__BB0_4: // %end
+; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
+; CHECK-NEXT: ret;
+entry:
+ %gep = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset1
+ %val = load i32, ptr addrspace(3) %gep
+ br i1 %cond, label %if1, label %end
+if1:
+ %cond2 = icmp eq i32 %offset1, %offset2
+ br i1 %cond2, label %if2, label %if1
+if2:
+ %gep2 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset2
+ %val2 = load i32, ptr addrspace(3) %gep2
+ %add = add i32 %val, %val2
+ br label %end
+end:
+ %ret = phi i32 [ %add, %if2 ], [ %val, %entry ]
+ ret i32 %ret
+}
|
ThomasRaoux
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
| [(set t.Ty:$dst, t.ImmNode:$src)]>; | ||
|
|
||
| // We don't want to set isAsCheapAsAMove to true for these instructions as | ||
| // this would prevent CSE and resulted in regressions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd mention the thread discussing the context of the issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added!
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/22683 Here is the relevant piece of the build log for the reference |
No description provided.