Skip to content

Commit 9fb3e80

Browse files
committed
Make fixes based on recent TOT changes, adjust tests, expand LoadV8 unpacking mov handling for v2i32 packed types
1 parent 9d16efb commit 9fb3e80

File tree

7 files changed

+81
-86
lines changed

7 files changed

+81
-86
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5660,9 +5660,9 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
56605660
Opcode = NVPTXISD::LoadV4;
56615661
break;
56625662
case NVPTXISD::LoadV4:
5663-
// V8 is only supported for f32. Don't forget, we're not changing the load
5664-
// size here. This is already a 256-bit load.
5665-
if (ElementVT != MVT::v2f32)
5663+
// V8 is only supported for f32/i32. Don't forget, we're not changing the
5664+
// load size here. This is already a 256-bit load.
5665+
if (ElementVT != MVT::v2f32 && ElementVT != MVT::v2i32)
56665666
return SDValue();
56675667
OldNumOutputs = 4;
56685668
Opcode = NVPTXISD::LoadV8;
@@ -5737,9 +5737,9 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
57375737
Opcode = NVPTXISD::StoreV4;
57385738
break;
57395739
case NVPTXISD::StoreV4:
5740-
// V8 is only supported for f32. Don't forget, we're not changing the store
5741-
// size here. This is already a 256-bit store.
5742-
if (ElementVT != MVT::v2f32)
5740+
// V8 is only supported for f32/i32. Don't forget, we're not changing the
5741+
// store size here. This is already a 256-bit store.
5742+
if (ElementVT != MVT::v2f32 && ElementVT != MVT::v2i32)
57435743
return SDValue();
57445744
Opcode = NVPTXISD::StoreV8;
57455745
break;

llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
11241124
CI->getType(), CI->getParamAlign(0).valueOrOne(),
11251125
cast<PointerType>(CI->getArgOperand(0)->getType())
11261126
->getAddressSpace(),
1127-
isConstantIntVector(CI->getArgOperand(2))
1127+
isConstantIntVector(CI->getArgOperand(1))
11281128
? TTI::MaskKind::ConstantMask
11291129
: TTI::MaskKind::VariableOrConstantMask))
11301130
return false;
@@ -1136,7 +1136,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
11361136
CI->getParamAlign(1).valueOrOne(),
11371137
cast<PointerType>(CI->getArgOperand(1)->getType())
11381138
->getAddressSpace(),
1139-
isConstantIntVector(CI->getArgOperand(3))
1139+
isConstantIntVector(CI->getArgOperand(2))
11401140
? TTI::MaskKind::ConstantMask
11411141
: TTI::MaskKind::VariableOrConstantMask))
11421142
return false;

llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -346,19 +346,15 @@ define i32 @ld_global_v8i32(ptr addrspace(1) %ptr) {
346346
; SM100-LABEL: ld_global_v8i32(
347347
; SM100: {
348348
; SM100-NEXT: .reg .b32 %r<16>;
349-
; SM100-NEXT: .reg .b64 %rd<6>;
349+
; SM100-NEXT: .reg .b64 %rd<2>;
350350
; SM100-EMPTY:
351351
; SM100-NEXT: // %bb.0:
352352
; SM100-NEXT: ld.param.b64 %rd1, [ld_global_v8i32_param_0];
353-
; SM100-NEXT: ld.global.nc.v4.b64 {%rd2, %rd3, %rd4, %rd5}, [%rd1];
354-
; SM100-NEXT: mov.b64 {%r1, %r2}, %rd5;
355-
; SM100-NEXT: mov.b64 {%r3, %r4}, %rd4;
356-
; SM100-NEXT: mov.b64 {%r5, %r6}, %rd3;
357-
; SM100-NEXT: mov.b64 {%r7, %r8}, %rd2;
358-
; SM100-NEXT: add.s32 %r9, %r7, %r8;
359-
; SM100-NEXT: add.s32 %r10, %r5, %r6;
360-
; SM100-NEXT: add.s32 %r11, %r3, %r4;
361-
; SM100-NEXT: add.s32 %r12, %r1, %r2;
353+
; SM100-NEXT: ld.global.nc.v8.b32 {%r1, %r2, %r3, %r4, %r5, %r6, %r7, %r8}, [%rd1];
354+
; SM100-NEXT: add.s32 %r9, %r1, %r2;
355+
; SM100-NEXT: add.s32 %r10, %r3, %r4;
356+
; SM100-NEXT: add.s32 %r11, %r5, %r6;
357+
; SM100-NEXT: add.s32 %r12, %r7, %r8;
362358
; SM100-NEXT: add.s32 %r13, %r9, %r10;
363359
; SM100-NEXT: add.s32 %r14, %r11, %r12;
364360
; SM100-NEXT: add.s32 %r15, %r13, %r14;

llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ body: |
2626
; CHECK: bb.0.entry:
2727
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.3(0x50000000)
2828
; CHECK-NEXT: {{ $}}
29-
; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
30-
; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
29+
; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, -1, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
30+
; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, -1, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
3131
; CHECK-NEXT: [[ADD64ri:%[0-9]+]]:b64 = nuw ADD64ri killed [[LD_i64_]], 2
32-
; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, [[ADD64ri]], 0
32+
; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, -1, [[ADD64ri]], 0
3333
; CHECK-NEXT: [[SETP_i32ri:%[0-9]+]]:b1 = SETP_i32ri [[LD_i32_]], 0, 0
3434
; CHECK-NEXT: CBranch killed [[SETP_i32ri]], %bb.2
3535
; CHECK-NEXT: {{ $}}
@@ -54,10 +54,10 @@ body: |
5454
bb.0.entry:
5555
successors: %bb.2(0x30000000), %bb.1(0x50000000)
5656
57-
%5:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
58-
%6:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
57+
%5:b32 = LD_i32 0, 0, 101, 3, 32, -1, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
58+
%6:b64 = LD_i64 0, 0, 101, 3, 64, -1, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
5959
%0:b64 = nuw ADD64ri killed %6, 2
60-
%1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0
60+
%1:b32 = LD_i32 0, 0, 1, 3, 32, -1, %0, 0
6161
%7:b1 = SETP_i32ri %5, 0, 0
6262
CBranch killed %7, %bb.2
6363
GOTO %bb.1

llvm/test/CodeGen/NVPTX/masked-load-vectors.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ define void @global_8xi32(ptr addrspace(1) %a, ptr addrspace(1) %b) {
4040
; SM100-NEXT: ld.param.b64 %rd2, [global_8xi32_param_1];
4141
; SM100-NEXT: st.global.v8.b32 [%rd2], {%r1, _, %r3, _, _, _, _, %r8};
4242
; SM100-NEXT: ret;
43-
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) %a, i32 32, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison)
44-
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) %b, i32 32, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
43+
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 32 %a, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison)
44+
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) align 32 %b, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
4545
ret void
4646
}
4747

@@ -93,8 +93,8 @@ define void @global_16xi16(ptr addrspace(1) %a, ptr addrspace(1) %b) {
9393
; SM100-NEXT: st.global.b16 [%rd2+28], %rs1;
9494
; SM100-NEXT: st.global.b16 [%rd2+30], %rs2;
9595
; SM100-NEXT: ret;
96-
%a.load = tail call <16 x i16> @llvm.masked.load.v16i16.p1(ptr addrspace(1) %a, i32 32, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true>, <16 x i16> poison)
97-
tail call void @llvm.masked.store.v16i16.p1(<16 x i16> %a.load, ptr addrspace(1) %b, i32 32, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true>)
96+
%a.load = tail call <16 x i16> @llvm.masked.load.v16i16.p1(ptr addrspace(1) align 32 %a, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true>, <16 x i16> poison)
97+
tail call void @llvm.masked.store.v16i16.p1(<16 x i16> %a.load, ptr addrspace(1) align 32 %b, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true>)
9898
ret void
9999
}
100100

@@ -114,8 +114,8 @@ define void @global_8xi32_no_align(ptr addrspace(1) %a, ptr addrspace(1) %b) {
114114
; CHECK-NEXT: st.global.b32 [%rd2+8], %r2;
115115
; CHECK-NEXT: st.global.b32 [%rd2+28], %r3;
116116
; CHECK-NEXT: ret;
117-
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) %a, i32 16, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison)
118-
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) %b, i32 16, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
117+
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 16 %a, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison)
118+
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) align 16 %b, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
119119
ret void
120120
}
121121

@@ -150,8 +150,8 @@ define void @global_8xi32_invariant(ptr addrspace(1) %a, ptr addrspace(1) %b) {
150150
; SM100-NEXT: ld.param.b64 %rd2, [global_8xi32_invariant_param_1];
151151
; SM100-NEXT: st.global.v8.b32 [%rd2], {%r1, _, %r3, _, _, _, _, %r8};
152152
; SM100-NEXT: ret;
153-
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) %a, i32 32, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison), !invariant.load !0
154-
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) %b, i32 32, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
153+
%a.load = tail call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 32 %a, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> poison), !invariant.load !0
154+
tail call void @llvm.masked.store.v8i32.p1(<8 x i32> %a.load, ptr addrspace(1) align 32 %b, <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>)
155155
ret void
156156
}
157157

@@ -170,8 +170,8 @@ define void @global_2xi16(ptr addrspace(1) %a, ptr addrspace(1) %b) {
170170
; CHECK-NEXT: mov.b32 {%rs1, _}, %r1;
171171
; CHECK-NEXT: st.global.b16 [%rd2], %rs1;
172172
; CHECK-NEXT: ret;
173-
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) %a, i32 4, <2 x i1> <i1 true, i1 false>, <2 x i16> poison)
174-
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) %b, i32 4, <2 x i1> <i1 true, i1 false>)
173+
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) align 4 %a, <2 x i1> <i1 true, i1 false>, <2 x i16> poison)
174+
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) align 4 %b, <2 x i1> <i1 true, i1 false>)
175175
ret void
176176
}
177177

@@ -190,8 +190,8 @@ define void @global_2xi16_invariant(ptr addrspace(1) %a, ptr addrspace(1) %b) {
190190
; CHECK-NEXT: mov.b32 {%rs1, _}, %r1;
191191
; CHECK-NEXT: st.global.b16 [%rd2], %rs1;
192192
; CHECK-NEXT: ret;
193-
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) %a, i32 4, <2 x i1> <i1 true, i1 false>, <2 x i16> poison), !invariant.load !0
194-
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) %b, i32 4, <2 x i1> <i1 true, i1 false>)
193+
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) align 4 %a, <2 x i1> <i1 true, i1 false>, <2 x i16> poison), !invariant.load !0
194+
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) align 4 %b, <2 x i1> <i1 true, i1 false>)
195195
ret void
196196
}
197197

@@ -207,8 +207,8 @@ define void @global_2xi16_no_align(ptr addrspace(1) %a, ptr addrspace(1) %b) {
207207
; CHECK-NEXT: ld.param.b64 %rd2, [global_2xi16_no_align_param_1];
208208
; CHECK-NEXT: st.global.b16 [%rd2], %rs1;
209209
; CHECK-NEXT: ret;
210-
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) %a, i32 2, <2 x i1> <i1 true, i1 false>, <2 x i16> poison)
211-
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) %b, i32 4, <2 x i1> <i1 true, i1 false>)
210+
%a.load = tail call <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1) align 2 %a, <2 x i1> <i1 true, i1 false>, <2 x i16> poison)
211+
tail call void @llvm.masked.store.v2i16.p1(<2 x i16> %a.load, ptr addrspace(1) align 4 %b, <2 x i1> <i1 true, i1 false>)
212212
ret void
213213
}
214214

@@ -227,8 +227,8 @@ define void @global_4xi8(ptr addrspace(1) %a, ptr addrspace(1) %b) {
227227
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7772U;
228228
; CHECK-NEXT: st.global.b8 [%rd2+2], %r2;
229229
; CHECK-NEXT: ret;
230-
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) %a, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison)
231-
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) %b, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
230+
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) align 4 %a, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison)
231+
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) align 4 %b, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
232232
ret void
233233
}
234234

@@ -247,8 +247,8 @@ define void @global_4xi8_invariant(ptr addrspace(1) %a, ptr addrspace(1) %b) {
247247
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7772U;
248248
; CHECK-NEXT: st.global.b8 [%rd2+2], %r2;
249249
; CHECK-NEXT: ret;
250-
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) %a, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison), !invariant.load !0
251-
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) %b, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
250+
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) align 4 %a, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison), !invariant.load !0
251+
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) align 4 %b, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
252252
ret void
253253
}
254254

@@ -266,8 +266,8 @@ define void @global_4xi8_no_align(ptr addrspace(1) %a, ptr addrspace(1) %b) {
266266
; CHECK-NEXT: st.global.b8 [%rd2], %rs1;
267267
; CHECK-NEXT: st.global.b8 [%rd2+2], %rs2;
268268
; CHECK-NEXT: ret;
269-
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) %a, i32 2, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison)
270-
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) %b, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
269+
%a.load = tail call <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1) align 2 %a, <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> poison)
270+
tail call void @llvm.masked.store.v4i8.p1(<4 x i8> %a.load, ptr addrspace(1) align 4 %b, <4 x i1> <i1 true, i1 false, i1 true, i1 false>)
271271
ret void
272272
}
273273

@@ -299,8 +299,8 @@ define void @global_2xf32(ptr addrspace(1) %a, ptr addrspace(1) %b) {
299299
; SM100-NEXT: mov.b64 {%r1, _}, %rd2;
300300
; SM100-NEXT: st.global.b32 [%rd3], %r1;
301301
; SM100-NEXT: ret;
302-
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) %a, i32 8, <2 x i1> <i1 true, i1 false>, <2 x float> poison)
303-
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) %b, i32 8, <2 x i1> <i1 true, i1 false>)
302+
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) align 8 %a, <2 x i1> <i1 true, i1 false>, <2 x float> poison)
303+
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) align 8 %b, <2 x i1> <i1 true, i1 false>)
304304
ret void
305305
}
306306

@@ -331,8 +331,8 @@ define void @global_2xf32_invariant(ptr addrspace(1) %a, ptr addrspace(1) %b) {
331331
; SM100-NEXT: mov.b64 {%r1, _}, %rd2;
332332
; SM100-NEXT: st.global.b32 [%rd3], %r1;
333333
; SM100-NEXT: ret;
334-
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) %a, i32 8, <2 x i1> <i1 true, i1 false>, <2 x float> poison), !invariant.load !0
335-
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) %b, i32 8, <2 x i1> <i1 true, i1 false>)
334+
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) align 8 %a, <2 x i1> <i1 true, i1 false>, <2 x float> poison), !invariant.load !0
335+
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) align 8 %b, <2 x i1> <i1 true, i1 false>)
336336
ret void
337337
}
338338

@@ -348,19 +348,19 @@ define void @global_2xf32_no_align(ptr addrspace(1) %a, ptr addrspace(1) %b) {
348348
; CHECK-NEXT: ld.param.b64 %rd2, [global_2xf32_no_align_param_1];
349349
; CHECK-NEXT: st.global.b32 [%rd2], %r1;
350350
; CHECK-NEXT: ret;
351-
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) %a, i32 4, <2 x i1> <i1 true, i1 false>, <2 x float> poison)
352-
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) %b, i32 8, <2 x i1> <i1 true, i1 false>)
351+
%a.load = tail call <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1) align 4 %a, <2 x i1> <i1 true, i1 false>, <2 x float> poison)
352+
tail call void @llvm.masked.store.v2f32.p1(<2 x float> %a.load, ptr addrspace(1) align 8 %b, <2 x i1> <i1 true, i1 false>)
353353
ret void
354354
}
355355

356-
declare <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1), i32, <8 x i1>, <8 x i32>)
357-
declare void @llvm.masked.store.v8i32.p1(<8 x i32>, ptr addrspace(1), i32, <8 x i1>)
358-
declare <16 x i16> @llvm.masked.load.v16i16.p1(ptr addrspace(1), i32, <16 x i1>, <16 x i16>)
359-
declare void @llvm.masked.store.v16i16.p1(<16 x i16>, ptr addrspace(1), i32, <16 x i1>)
360-
declare <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1), i32, <2 x i1>, <2 x i16>)
361-
declare void @llvm.masked.store.v2i16.p1(<2 x i16>, ptr addrspace(1), i32, <2 x i1>)
362-
declare <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1), i32, <4 x i1>, <4 x i8>)
363-
declare void @llvm.masked.store.v4i8.p1(<4 x i8>, ptr addrspace(1), i32, <4 x i1>)
364-
declare <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1), i32, <2 x i1>, <2 x float>)
365-
declare void @llvm.masked.store.v2f32.p1(<2 x float>, ptr addrspace(1), i32, <2 x i1>)
356+
declare <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1), <8 x i1>, <8 x i32>)
357+
declare void @llvm.masked.store.v8i32.p1(<8 x i32>, ptr addrspace(1), <8 x i1>)
358+
declare <16 x i16> @llvm.masked.load.v16i16.p1(ptr addrspace(1), <16 x i1>, <16 x i16>)
359+
declare void @llvm.masked.store.v16i16.p1(<16 x i16>, ptr addrspace(1), <16 x i1>)
360+
declare <2 x i16> @llvm.masked.load.v2i16.p1(ptr addrspace(1), <2 x i1>, <2 x i16>)
361+
declare void @llvm.masked.store.v2i16.p1(<2 x i16>, ptr addrspace(1), <2 x i1>)
362+
declare <4 x i8> @llvm.masked.load.v4i8.p1(ptr addrspace(1), <4 x i1>, <4 x i8>)
363+
declare void @llvm.masked.store.v4i8.p1(<4 x i8>, ptr addrspace(1), <4 x i1>)
364+
declare <2 x float> @llvm.masked.load.v2f32.p1(ptr addrspace(1), <2 x i1>, <2 x float>)
365+
declare void @llvm.masked.store.v2f32.p1(<2 x float>, ptr addrspace(1), <2 x i1>)
366366
!0 = !{}

llvm/test/CodeGen/NVPTX/masked-store-variable-mask.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ define void @global_variable_mask(ptr addrspace(1) %a, ptr addrspace(1) %b, <4 x
4949
; CHECK-NEXT: $L__BB0_8: // %else6
5050
; CHECK-NEXT: ret;
5151
%a.load = load <4 x i64>, ptr addrspace(1) %a
52-
tail call void @llvm.masked.store.v4i64.p1(<4 x i64> %a.load, ptr addrspace(1) %b, i32 32, <4 x i1> %mask)
52+
tail call void @llvm.masked.store.v4i64.p1(<4 x i64> %a.load, ptr addrspace(1) align 32 %b, <4 x i1> %mask)
5353
ret void
5454
}
5555

56-
declare void @llvm.masked.store.v4i64.p1(<4 x i64>, ptr addrspace(1), i32, <4 x i1>)
56+
declare void @llvm.masked.store.v4i64.p1(<4 x i64>, ptr addrspace(1), <4 x i1>)

0 commit comments

Comments
 (0)