-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[llvm][NVPTX] Strip unneeded '+0' in PTX load/store #113017
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code.
|
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-clang Author: Youngsuk Kim (JOE1994) ChangesRemove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code. Patch is 474.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113017.diff 66 Files Affected:
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index 3c443420dbd36a..f794b83239f14a 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
__device__ __bf16 test_ret( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
return in;
-// CHECK: st.param.b16 [func_retval0+0], %[[R]]
+// CHECK: st.param.b16 [func_retval0], %[[R]]
// CHECK: ret;
}
@@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in);
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
__device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
-// CHECK: st.param.b16 [param0+0], %[[R]];
+// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
// CHECK: call.uni (retval0),
// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
-// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
+// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
-// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
+// CHECK: st.param.b16 [func_retval0], %[[RET]]
// CHECK: ret;
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 7d6442a611125f..3bda3b72674276 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -363,6 +363,14 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
}
}
+void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ if (auto &Op = MI->getOperand(OpNum); Op.isImm() && Op.getImm() == 0)
+ return; // don't print '+0'
+ O << "+";
+ printOperand(MI, OpNum, O);
+}
+
void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MCOperand &Op = MI->getOperand(OpNum);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index e6954f861cd10e..e8a4a6dbdd5324 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
+ void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8b34ce4f1001c1..b5478b8f09ceb4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> {
let PrintMethod = "printMmaCode";
}
+def Offseti32imm : Operand<i32> {
+ let PrintMethod = "printOffseti32imm";
+}
+
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
@@ -2482,21 +2486,21 @@ def ProxyReg :
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
+ NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
+ !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
[]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
+ NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
!strconcat("ld.param.v2", opstr,
- " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ " \t{{$dst, $dst2}}, [retval0$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
- (ins i32imm:$b),
+ (ins Offseti32imm:$b),
!strconcat("ld.param.v4", opstr,
- " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
[]>;
}
@@ -2512,8 +2516,8 @@ let mayStore = true in {
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
: NVPTXInst<(outs),
- (ins op:$val, i32imm:$a, i32imm:$b),
- "st.param" # opstr # " \t[param$a+$b], $val;",
+ (ins op:$val, i32imm:$a, Offseti32imm:$b),
+ "st.param" # opstr # " \t[param$a$b], $val;",
[]>;
}
@@ -2524,8 +2528,8 @@ let mayStore = true in {
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2,
- i32imm:$a, i32imm:$b),
- "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
+ i32imm:$a, Offseti32imm:$b),
+ "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
[]>;
}
@@ -2541,29 +2545,29 @@ let mayStore = true in {
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
- i32imm:$a, i32imm:$b),
+ i32imm:$a, Offseti32imm:$b),
"st.param.v4" # opstr #
- " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
+ " \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
[]>;
}
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
+ NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
+ !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
+ NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
!strconcat("st.param.v2", opstr,
- " \t[func_retval0+$a], {{$val, $val2}};"),
+ " \t[func_retval0$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
- regclass:$val4, i32imm:$a),
+ regclass:$val4, Offseti32imm:$a),
!strconcat("st.param.v4", opstr,
- " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}
@@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> {
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
@@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _ari_64 : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _asi : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int32Regs:$addr, i32imm:$offset),
+ Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int64Regs:$addr, i32imm:$offset),
+ Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- imem:$addr, i32imm:$offset),
+ imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
@@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta :
Requires<[hasPTX<60>, hasSM<70>]>;
def atomic_thread_fence_acq_rel_cta :
NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
- Requires<[hasPTX<60>, hasSM<70>]>;
\ No newline at end of file
+ Requires<[hasPTX<60>, hasSM<70>]>;
diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
index bc58a700cb9828..028fab7ae54d6a 100644
--- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
+++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -19,7 +19,7 @@ define i32 @f(ptr %p) {
; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0];
; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
; ENABLED-NEXT: add.s32 %r3, %r1, %r2;
-; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; ENABLED-NEXT: st.param.b32 [func_retval0], %r3;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: f(
@@ -32,7 +32,7 @@ define i32 @f(ptr %p) {
; DISABLED-NEXT: ld.u32 %r1, [%rd1];
; DISABLED-NEXT: ld.u32 %r2, [%rd1+4];
; DISABLED-NEXT: add.s32 %r3, %r1, %r2;
-; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; DISABLED-NEXT: st.param.b32 [func_retval0], %r3;
; DISABLED-NEXT: ret;
%p.1 = getelementptr i32, ptr %p, i32 1
%v0 = load i32, ptr %p, align 8
@@ -68,7 +68,7 @@ define half @fh(ptr %p) {
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: fh(
@@ -100,7 +100,7 @@ define half @fh(ptr %p) {
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr half, ptr %p, i32 1
%p.2 = getelementptr half, ptr %p, i32 2
@@ -132,7 +132,7 @@ define float @ff(ptr %p) {
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; ENABLED-NEXT: st.param.f32 [func_retval0], %f9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: ff(
@@ -151,7 +151,7 @@ define float @ff(ptr %p) {
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; DISABLED-NEXT: st.param.f32 [func_retval0], %f9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr float, ptr %p, i32 1
%p.2 = getelementptr float, ptr %p, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/activemask.ll b/llvm/test/CodeGen/NVPTX/activemask.ll
index 1496b2ebdd4427..e1d169d17c60e9 100644
--- a/llvm/test/CodeGen/NVPTX/activemask.ll
+++ b/llvm/test/CodeGen/NVPTX/activemask.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.nvvm.activemask()
; CHECK-LABEL: activemask(
;
; CHECK: ac...
[truncated]
|
|
@llvm/pr-subscribers-backend-nvptx Author: Youngsuk Kim (JOE1994) ChangesRemove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code. Patch is 474.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113017.diff 66 Files Affected:
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index 3c443420dbd36a..f794b83239f14a 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
__device__ __bf16 test_ret( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
return in;
-// CHECK: st.param.b16 [func_retval0+0], %[[R]]
+// CHECK: st.param.b16 [func_retval0], %[[R]]
// CHECK: ret;
}
@@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in);
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
__device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
-// CHECK: st.param.b16 [param0+0], %[[R]];
+// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
// CHECK: call.uni (retval0),
// CHECK-NEXT: _Z13external_funcDF16b,
// CHECK-NEXT: (
// CHECK-NEXT: param0
// CHECK-NEXT );
-// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
+// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
-// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
+// CHECK: st.param.b16 [func_retval0], %[[RET]]
// CHECK: ret;
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 7d6442a611125f..3bda3b72674276 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -363,6 +363,14 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
}
}
+void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ if (auto &Op = MI->getOperand(OpNum); Op.isImm() && Op.getImm() == 0)
+ return; // don't print '+0'
+ O << "+";
+ printOperand(MI, OpNum, O);
+}
+
void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MCOperand &Op = MI->getOperand(OpNum);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index e6954f861cd10e..e8a4a6dbdd5324 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
+ void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8b34ce4f1001c1..b5478b8f09ceb4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> {
let PrintMethod = "printMmaCode";
}
+def Offseti32imm : Operand<i32> {
+ let PrintMethod = "printOffseti32imm";
+}
+
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
@@ -2482,21 +2486,21 @@ def ProxyReg :
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
+ NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
+ !strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
[]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
+ NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
!strconcat("ld.param.v2", opstr,
- " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ " \t{{$dst, $dst2}}, [retval0$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
- (ins i32imm:$b),
+ (ins Offseti32imm:$b),
!strconcat("ld.param.v4", opstr,
- " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
[]>;
}
@@ -2512,8 +2516,8 @@ let mayStore = true in {
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
: NVPTXInst<(outs),
- (ins op:$val, i32imm:$a, i32imm:$b),
- "st.param" # opstr # " \t[param$a+$b], $val;",
+ (ins op:$val, i32imm:$a, Offseti32imm:$b),
+ "st.param" # opstr # " \t[param$a$b], $val;",
[]>;
}
@@ -2524,8 +2528,8 @@ let mayStore = true in {
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2,
- i32imm:$a, i32imm:$b),
- "st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
+ i32imm:$a, Offseti32imm:$b),
+ "st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
[]>;
}
@@ -2541,29 +2545,29 @@ let mayStore = true in {
: NVPTXInst<(outs),
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
- i32imm:$a, i32imm:$b),
+ i32imm:$a, Offseti32imm:$b),
"st.param.v4" # opstr #
- " \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
+ " \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
[]>;
}
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
+ NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
+ !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
+ NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
!strconcat("st.param.v2", opstr,
- " \t[func_retval0+$a], {{$val, $val2}};"),
+ " \t[func_retval0$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
- regclass:$val4, i32imm:$a),
+ regclass:$val4, Offseti32imm:$a),
!strconcat("st.param.v4", opstr,
- " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}
@@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> {
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
- i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr+$offset];", []>;
+ "\t$dst, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
@@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _ari_64 : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
def _asi : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
- i32imm:$offset),
+ Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
- " \t[$addr+$offset], $src;", []>;
+ " \t[$addr$offset], $src;", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2}}, [$addr$offset];", []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int32Regs:$addr, i32imm:$offset),
+ Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- Int64Regs:$addr, i32imm:$offset),
+ Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v2_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
- imem:$addr, i32imm:$offset),
+ imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2}};", []>;
+ "\t[$addr$offset], {{$src1, $src2}};", []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
@@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
- "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
- LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
+ LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
- "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
+ "$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
}
let mayStore=1, hasSideEffects=0 in {
@@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta :
Requires<[hasPTX<60>, hasSM<70>]>;
def atomic_thread_fence_acq_rel_cta :
NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
- Requires<[hasPTX<60>, hasSM<70>]>;
\ No newline at end of file
+ Requires<[hasPTX<60>, hasSM<70>]>;
diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
index bc58a700cb9828..028fab7ae54d6a 100644
--- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
+++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -19,7 +19,7 @@ define i32 @f(ptr %p) {
; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0];
; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
; ENABLED-NEXT: add.s32 %r3, %r1, %r2;
-; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; ENABLED-NEXT: st.param.b32 [func_retval0], %r3;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: f(
@@ -32,7 +32,7 @@ define i32 @f(ptr %p) {
; DISABLED-NEXT: ld.u32 %r1, [%rd1];
; DISABLED-NEXT: ld.u32 %r2, [%rd1+4];
; DISABLED-NEXT: add.s32 %r3, %r1, %r2;
-; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
+; DISABLED-NEXT: st.param.b32 [func_retval0], %r3;
; DISABLED-NEXT: ret;
%p.1 = getelementptr i32, ptr %p, i32 1
%v0 = load i32, ptr %p, align 8
@@ -68,7 +68,7 @@ define half @fh(ptr %p) {
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: fh(
@@ -100,7 +100,7 @@ define half @fh(ptr %p) {
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
-; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
+; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr half, ptr %p, i32 1
%p.2 = getelementptr half, ptr %p, i32 2
@@ -132,7 +132,7 @@ define float @ff(ptr %p) {
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; ENABLED-NEXT: st.param.f32 [func_retval0], %f9;
; ENABLED-NEXT: ret;
;
; DISABLED-LABEL: ff(
@@ -151,7 +151,7 @@ define float @ff(ptr %p) {
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
-; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
+; DISABLED-NEXT: st.param.f32 [func_retval0], %f9;
; DISABLED-NEXT: ret;
%p.1 = getelementptr float, ptr %p, i32 1
%p.2 = getelementptr float, ptr %p, i32 2
diff --git a/llvm/test/CodeGen/NVPTX/activemask.ll b/llvm/test/CodeGen/NVPTX/activemask.ll
index 1496b2ebdd4427..e1d169d17c60e9 100644
--- a/llvm/test/CodeGen/NVPTX/activemask.ll
+++ b/llvm/test/CodeGen/NVPTX/activemask.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.nvvm.activemask()
; CHECK-LABEL: activemask(
;
; CHECK: ac...
[truncated]
|
Artem-B
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM overall, with a minor style nit.
| if (auto &Op = MI->getOperand(OpNum); Op.isImm() && Op.getImm() == 0) | ||
| return; // don't print '+0' | ||
| O << "+"; | ||
| printOperand(MI, OpNum, O); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: No need for the early return here. Also, instead of just ignoring non-immediate operands, we should probablly assert it, as that should never happen.
auto &Op = MI->getOperand(OpNum);
assert(Op.isImm() && "Invalid operand");
if (Op.getImm() != 0)) {
O << "+";
printOperand(MI, OpNum, O);
}
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/7305 Here is the relevant piece of the build log for the reference |
Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code.