@@ -72,7 +72,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
72
72
; PTX-NEXT: ret;
73
73
; OPT-LABEL: define ptx_kernel void @grid_const_int(
74
74
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
75
- ; OPT-NEXT: [[INPUT11:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
75
+ ; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
76
76
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
77
77
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
78
78
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
@@ -101,7 +101,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
101
101
; PTX-NEXT: ret;
102
102
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
103
103
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
104
- ; OPT-NEXT: [[INPUT1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
104
+ ; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
105
105
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
106
106
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
107
107
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
@@ -137,7 +137,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
137
137
; PTX-NEXT: ret;
138
138
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
139
139
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
140
- ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
140
+ ; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
141
141
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
142
142
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
143
143
; OPT-NEXT: ret void
@@ -180,9 +180,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
180
180
; PTX-NEXT: ret;
181
181
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
182
182
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
183
- ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
183
+ ; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
184
184
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
185
- ; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
185
+ ; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
186
186
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
187
187
; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
188
188
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
@@ -208,7 +208,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
208
208
; PTX-NEXT: ret;
209
209
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
210
210
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
211
- ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
211
+ ; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
212
212
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
213
213
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
214
214
; OPT-NEXT: ret void
@@ -235,7 +235,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
235
235
; PTX-NOT .local
236
236
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
237
237
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
238
- ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
238
+ ; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
239
239
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
240
240
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
241
241
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
@@ -357,7 +357,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
357
357
; PTX-NEXT: ret;
358
358
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
359
359
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
360
- ; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
360
+ ; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
361
361
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
362
362
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
363
363
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -416,7 +416,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
416
416
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
417
417
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
418
418
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
419
- ; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
419
+ ; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
420
420
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
421
421
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
422
422
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -471,7 +471,7 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
471
471
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
472
472
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
473
473
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
474
- ; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
474
+ ; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
475
475
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
476
476
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
477
477
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
@@ -520,7 +520,7 @@ declare void @device_func(ptr byval(i32) align 4)
520
520
define ptx_kernel void @test_forward_byval_arg (ptr byval (i32 ) align 4 %input ) {
521
521
; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
522
522
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
523
- ; OPT-NEXT: [[INPUT_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
523
+ ; OPT-NEXT: [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
524
524
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr
525
525
; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
526
526
; OPT-NEXT: ret void
0 commit comments