Skip to content

Commit 0f0a96b

Browse files
authored
[llvm][NVPTX] Strip unneeded '+0' in PTX load/store (#113017)
Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code.
1 parent 5aec88f commit 0f0a96b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1225
-1209
lines changed

clang/test/CodeGenCUDA/bf16.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ __device__ void test_arg(__bf16 *out, __bf16 in) {
2525
__device__ __bf16 test_ret( __bf16 in) {
2626
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0];
2727
return in;
28-
// CHECK: st.param.b16 [func_retval0+0], %[[R]]
28+
// CHECK: st.param.b16 [func_retval0], %[[R]]
2929
// CHECK: ret;
3030
}
3131

@@ -35,15 +35,15 @@ __device__ __bf16 external_func( __bf16 in);
3535
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
3636
__device__ __bf16 test_call( __bf16 in) {
3737
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
38-
// CHECK: st.param.b16 [param0+0], %[[R]];
38+
// CHECK: st.param.b16 [param0], %[[R]];
3939
// CHECK: .param .align 2 .b8 retval0[2];
4040
// CHECK: call.uni (retval0),
4141
// CHECK-NEXT: _Z13external_funcDF16b,
4242
// CHECK-NEXT: (
4343
// CHECK-NEXT: param0
4444
// CHECK-NEXT );
45-
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0];
45+
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
4646
return external_func(in);
47-
// CHECK: st.param.b16 [func_retval0+0], %[[RET]]
47+
// CHECK: st.param.b16 [func_retval0], %[[RET]]
4848
// CHECK: ret;
4949
}

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,16 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
363363
}
364364
}
365365

366+
void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
367+
raw_ostream &O, const char *Modifier) {
368+
auto &Op = MI->getOperand(OpNum);
369+
assert(Op.isImm() && "Invalid operand");
370+
if (Op.getImm() != 0) {
371+
O << "+";
372+
printOperand(MI, OpNum, O);
373+
}
374+
}
375+
366376
void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
367377
raw_ostream &O, const char *Modifier) {
368378
const MCOperand &Op = MI->getOperand(OpNum);

llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
4545
const char *Modifier = nullptr);
4646
void printMemOperand(const MCInst *MI, int OpNum,
4747
raw_ostream &O, const char *Modifier = nullptr);
48+
void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
49+
const char *Modifier = nullptr);
4850
void printProtoIdent(const MCInst *MI, int OpNum,
4951
raw_ostream &O, const char *Modifier = nullptr);
5052
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 59 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1934,6 +1934,10 @@ def MmaCode : Operand<i32> {
19341934
let PrintMethod = "printMmaCode";
19351935
}
19361936

1937+
def Offseti32imm : Operand<i32> {
1938+
let PrintMethod = "printOffseti32imm";
1939+
}
1940+
19371941
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
19381942
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
19391943

@@ -2482,21 +2486,21 @@ def ProxyReg :
24822486

24832487
let mayLoad = true in {
24842488
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
2485-
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
2486-
!strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
2489+
NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
2490+
!strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
24872491
[]>;
24882492

24892493
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
2490-
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
2494+
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
24912495
!strconcat("ld.param.v2", opstr,
2492-
" \t{{$dst, $dst2}}, [retval0+$b];"), []>;
2496+
" \t{{$dst, $dst2}}, [retval0$b];"), []>;
24932497

24942498
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
24952499
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
24962500
regclass:$dst4),
2497-
(ins i32imm:$b),
2501+
(ins Offseti32imm:$b),
24982502
!strconcat("ld.param.v4", opstr,
2499-
" \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
2503+
" \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
25002504
[]>;
25012505
}
25022506

@@ -2512,8 +2516,8 @@ let mayStore = true in {
25122516
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
25132517
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
25142518
: NVPTXInst<(outs),
2515-
(ins op:$val, i32imm:$a, i32imm:$b),
2516-
"st.param" # opstr # " \t[param$a+$b], $val;",
2519+
(ins op:$val, i32imm:$a, Offseti32imm:$b),
2520+
"st.param" # opstr # " \t[param$a$b], $val;",
25172521
[]>;
25182522
}
25192523

@@ -2524,8 +2528,8 @@ let mayStore = true in {
25242528
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
25252529
: NVPTXInst<(outs),
25262530
(ins op1:$val1, op2:$val2,
2527-
i32imm:$a, i32imm:$b),
2528-
"st.param.v2" # opstr # " \t[param$a+$b], {{$val1, $val2}};",
2531+
i32imm:$a, Offseti32imm:$b),
2532+
"st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
25292533
[]>;
25302534
}
25312535

@@ -2541,29 +2545,29 @@ let mayStore = true in {
25412545

25422546
: NVPTXInst<(outs),
25432547
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
2544-
i32imm:$a, i32imm:$b),
2548+
i32imm:$a, Offseti32imm:$b),
25452549
"st.param.v4" # opstr #
2546-
" \t[param$a+$b], {{$val1, $val2, $val3, $val4}};",
2550+
" \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
25472551
[]>;
25482552
}
25492553

25502554
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
2551-
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
2552-
!strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
2555+
NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a),
2556+
!strconcat("st.param", opstr, " \t[func_retval0$a], $val;"),
25532557
[]>;
25542558

25552559
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
2556-
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
2560+
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a),
25572561
!strconcat("st.param.v2", opstr,
2558-
" \t[func_retval0+$a], {{$val, $val2}};"),
2562+
" \t[func_retval0$a], {{$val, $val2}};"),
25592563
[]>;
25602564

25612565
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
25622566
NVPTXInst<(outs),
25632567
(ins regclass:$val, regclass:$val2, regclass:$val3,
2564-
regclass:$val4, i32imm:$a),
2568+
regclass:$val4, Offseti32imm:$a),
25652569
!strconcat("st.param.v4", opstr,
2566-
" \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
2570+
" \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"),
25672571
[]>;
25682572
}
25692573

@@ -2827,21 +2831,21 @@ multiclass LD<NVPTXRegClass regclass> {
28272831
def _ari : NVPTXInst<
28282832
(outs regclass:$dst),
28292833
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2830-
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2834+
i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
28312835
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2832-
"\t$dst, [$addr+$offset];", []>;
2836+
"\t$dst, [$addr$offset];", []>;
28332837
def _ari_64 : NVPTXInst<
28342838
(outs regclass:$dst),
28352839
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2836-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2840+
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
28372841
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2838-
"\t$dst, [$addr+$offset];", []>;
2842+
"\t$dst, [$addr$offset];", []>;
28392843
def _asi : NVPTXInst<
28402844
(outs regclass:$dst),
28412845
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2842-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2846+
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
28432847
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2844-
"\t$dst, [$addr+$offset];", []>;
2848+
"\t$dst, [$addr$offset];", []>;
28452849
}
28462850

28472851
let mayLoad=1, hasSideEffects=0 in {
@@ -2876,23 +2880,23 @@ multiclass ST<NVPTXRegClass regclass> {
28762880
(outs),
28772881
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
28782882
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr,
2879-
i32imm:$offset),
2883+
Offseti32imm:$offset),
28802884
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2881-
" \t[$addr+$offset], $src;", []>;
2885+
" \t[$addr$offset], $src;", []>;
28822886
def _ari_64 : NVPTXInst<
28832887
(outs),
28842888
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
28852889
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr,
2886-
i32imm:$offset),
2890+
Offseti32imm:$offset),
28872891
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2888-
" \t[$addr+$offset], $src;", []>;
2892+
" \t[$addr$offset], $src;", []>;
28892893
def _asi : NVPTXInst<
28902894
(outs),
28912895
(ins regclass:$src, LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp,
28922896
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr,
2893-
i32imm:$offset),
2897+
Offseti32imm:$offset),
28942898
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
2895-
" \t[$addr+$offset], $src;", []>;
2899+
" \t[$addr$offset], $src;", []>;
28962900
}
28972901

28982902
let mayStore=1, hasSideEffects=0 in {
@@ -2929,21 +2933,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
29292933
def _v2_ari : NVPTXInst<
29302934
(outs regclass:$dst1, regclass:$dst2),
29312935
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2932-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2936+
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
29332937
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2934-
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
2938+
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
29352939
def _v2_ari_64 : NVPTXInst<
29362940
(outs regclass:$dst1, regclass:$dst2),
29372941
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2938-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2942+
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
29392943
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2940-
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
2944+
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
29412945
def _v2_asi : NVPTXInst<
29422946
(outs regclass:$dst1, regclass:$dst2),
29432947
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2944-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2948+
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
29452949
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2946-
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
2950+
"\t{{$dst1, $dst2}}, [$addr$offset];", []>;
29472951
def _v4_avar : NVPTXInst<
29482952
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29492953
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
@@ -2965,21 +2969,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
29652969
def _v4_ari : NVPTXInst<
29662970
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29672971
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2968-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2972+
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
29692973
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2970-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
2974+
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
29712975
def _v4_ari_64 : NVPTXInst<
29722976
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29732977
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2974-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2978+
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
29752979
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2976-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
2980+
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
29772981
def _v4_asi : NVPTXInst<
29782982
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
29792983
(ins LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
2980-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2984+
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
29812985
"ld${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
2982-
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
2986+
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr$offset];", []>;
29832987
}
29842988
let mayLoad=1, hasSideEffects=0 in {
29852989
defm LDV_i8 : LD_VEC<Int16Regs>;
@@ -3016,23 +3020,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
30163020
(outs),
30173021
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
30183022
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
3019-
Int32Regs:$addr, i32imm:$offset),
3023+
Int32Regs:$addr, Offseti32imm:$offset),
30203024
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3021-
"\t[$addr+$offset], {{$src1, $src2}};", []>;
3025+
"\t[$addr$offset], {{$src1, $src2}};", []>;
30223026
def _v2_ari_64 : NVPTXInst<
30233027
(outs),
30243028
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
30253029
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
3026-
Int64Regs:$addr, i32imm:$offset),
3030+
Int64Regs:$addr, Offseti32imm:$offset),
30273031
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3028-
"\t[$addr+$offset], {{$src1, $src2}};", []>;
3032+
"\t[$addr$offset], {{$src1, $src2}};", []>;
30293033
def _v2_asi : NVPTXInst<
30303034
(outs),
30313035
(ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$scope,
30323036
LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth,
3033-
imem:$addr, i32imm:$offset),
3037+
imem:$addr, Offseti32imm:$offset),
30343038
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3035-
"\t[$addr+$offset], {{$src1, $src2}};", []>;
3039+
"\t[$addr$offset], {{$src1, $src2}};", []>;
30363040
def _v4_avar : NVPTXInst<
30373041
(outs),
30383042
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
@@ -3058,23 +3062,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
30583062
(outs),
30593063
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
30603064
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
3061-
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
3065+
LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, Offseti32imm:$offset),
30623066
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3063-
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
3067+
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
30643068
def _v4_ari_64 : NVPTXInst<
30653069
(outs),
30663070
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
30673071
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
3068-
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
3072+
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, Offseti32imm:$offset),
30693073
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
3070-
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
3074+
"\t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
30713075
def _v4_asi : NVPTXInst<
30723076
(outs),
30733077
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
30743078
LdStCode:$sem, LdStCode:$scope, LdStCode:$addsp, LdStCode:$Vec,
3075-
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
3079+
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, Offseti32imm:$offset),
30763080
"st${sem:sem}${scope:scope}${addsp:addsp}${Vec:vec}.${Sign:sign}"
3077-
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
3081+
"$fromWidth \t[$addr$offset], {{$src1, $src2, $src3, $src4}};", []>;
30783082
}
30793083

30803084
let mayStore=1, hasSideEffects=0 in {
@@ -3903,4 +3907,4 @@ def atomic_thread_fence_seq_cst_cta :
39033907
Requires<[hasPTX<60>, hasSM<70>]>;
39043908
def atomic_thread_fence_acq_rel_cta :
39053909
NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
3906-
Requires<[hasPTX<60>, hasSM<70>]>;
3910+
Requires<[hasPTX<60>, hasSM<70>]>;

llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define i32 @f(ptr %p) {
1919
; ENABLED-NEXT: ld.param.u64 %rd1, [f_param_0];
2020
; ENABLED-NEXT: ld.v2.u32 {%r1, %r2}, [%rd1];
2121
; ENABLED-NEXT: add.s32 %r3, %r1, %r2;
22-
; ENABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
22+
; ENABLED-NEXT: st.param.b32 [func_retval0], %r3;
2323
; ENABLED-NEXT: ret;
2424
;
2525
; DISABLED-LABEL: f(
@@ -32,7 +32,7 @@ define i32 @f(ptr %p) {
3232
; DISABLED-NEXT: ld.u32 %r1, [%rd1];
3333
; DISABLED-NEXT: ld.u32 %r2, [%rd1+4];
3434
; DISABLED-NEXT: add.s32 %r3, %r1, %r2;
35-
; DISABLED-NEXT: st.param.b32 [func_retval0+0], %r3;
35+
; DISABLED-NEXT: st.param.b32 [func_retval0], %r3;
3636
; DISABLED-NEXT: ret;
3737
%p.1 = getelementptr i32, ptr %p, i32 1
3838
%v0 = load i32, ptr %p, align 8
@@ -68,7 +68,7 @@ define half @fh(ptr %p) {
6868
; ENABLED-NEXT: cvt.f32.f16 %f11, %rs5;
6969
; ENABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
7070
; ENABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
71-
; ENABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
71+
; ENABLED-NEXT: st.param.b16 [func_retval0], %rs9;
7272
; ENABLED-NEXT: ret;
7373
;
7474
; DISABLED-LABEL: fh(
@@ -100,7 +100,7 @@ define half @fh(ptr %p) {
100100
; DISABLED-NEXT: cvt.f32.f16 %f11, %rs5;
101101
; DISABLED-NEXT: add.rn.f32 %f12, %f10, %f11;
102102
; DISABLED-NEXT: cvt.rn.f16.f32 %rs9, %f12;
103-
; DISABLED-NEXT: st.param.b16 [func_retval0+0], %rs9;
103+
; DISABLED-NEXT: st.param.b16 [func_retval0], %rs9;
104104
; DISABLED-NEXT: ret;
105105
%p.1 = getelementptr half, ptr %p, i32 1
106106
%p.2 = getelementptr half, ptr %p, i32 2
@@ -132,7 +132,7 @@ define float @ff(ptr %p) {
132132
; ENABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
133133
; ENABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
134134
; ENABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
135-
; ENABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
135+
; ENABLED-NEXT: st.param.f32 [func_retval0], %f9;
136136
; ENABLED-NEXT: ret;
137137
;
138138
; DISABLED-LABEL: ff(
@@ -151,7 +151,7 @@ define float @ff(ptr %p) {
151151
; DISABLED-NEXT: add.rn.f32 %f7, %f3, %f4;
152152
; DISABLED-NEXT: add.rn.f32 %f8, %f6, %f7;
153153
; DISABLED-NEXT: add.rn.f32 %f9, %f8, %f5;
154-
; DISABLED-NEXT: st.param.f32 [func_retval0+0], %f9;
154+
; DISABLED-NEXT: st.param.f32 [func_retval0], %f9;
155155
; DISABLED-NEXT: ret;
156156
%p.1 = getelementptr float, ptr %p, i32 1
157157
%p.2 = getelementptr float, ptr %p, i32 2

0 commit comments

Comments
 (0)