7
7
%struct.float3 = type { float , float , float }
8
8
%struct.S1 = type { %struct.float3 , %struct.float3 , i32 , i32 }
9
9
10
- define void @testStore (ptr nocapture writeonly %1 ) {
10
+ define void @testStore (ptr %1 ) {
11
11
; CHECK-LABEL: define void @testStore(
12
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
12
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
13
13
; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[TMP0]], align 16
14
14
; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i64 0, i32 1, i32 1
15
15
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GETELEM10]], align 16
@@ -33,9 +33,9 @@ define void @testStore(ptr nocapture writeonly %1) {
33
33
ret void
34
34
}
35
35
36
- define void @testLoad (ptr nocapture writeonly %1 ) {
36
+ define void @testLoad (ptr %1 ) {
37
37
; CHECK-LABEL: define void @testLoad(
38
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
38
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
39
39
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
40
40
; CHECK-NEXT: [[L11:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
41
41
; CHECK-NEXT: [[L22:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -71,9 +71,9 @@ define void @testLoad(ptr nocapture writeonly %1) {
71
71
72
72
; Also, test without the struct geps, to see if it still works with i8 geps/ptradd
73
73
74
- define void @testStorei8 (ptr nocapture writeonly %1 ) {
74
+ define void @testStorei8 (ptr %1 ) {
75
75
; CHECK-LABEL: define void @testStorei8(
76
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
76
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
77
77
; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[TMP0]], align 16
78
78
; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
79
79
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GETELEM10]], align 16
@@ -97,9 +97,9 @@ define void @testStorei8(ptr nocapture writeonly %1) {
97
97
ret void
98
98
}
99
99
100
- define void @testLoadi8 (ptr nocapture writeonly %1 ) {
100
+ define void @testLoadi8 (ptr %1 ) {
101
101
; CHECK-LABEL: define void @testLoadi8(
102
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
102
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
103
103
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
104
104
; CHECK-NEXT: [[L11:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
105
105
; CHECK-NEXT: [[L22:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -141,9 +141,9 @@ define void @testLoadi8(ptr nocapture writeonly %1) {
141
141
; 4x32 will instead be a 2x32 and a 2x32
142
142
%struct.S2 = type { i32 , i32 , %struct.float3 , %struct.float3 , i32 , i32 }
143
143
144
- define void @testStore_2 (ptr nocapture writeonly %1 ) {
144
+ define void @testStore_2 (ptr %1 ) {
145
145
; CHECK-LABEL: define void @testStore_2(
146
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
146
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
147
147
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP0]], align 8
148
148
; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP0]], i64 0, i32 2
149
149
; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[GETELEM1]], align 16
@@ -173,9 +173,9 @@ define void @testStore_2(ptr nocapture writeonly %1) {
173
173
ret void
174
174
}
175
175
176
- define void @testLoad_2 (ptr nocapture writeonly %1 ) {
176
+ define void @testLoad_2 (ptr %1 ) {
177
177
; CHECK-LABEL: define void @testLoad_2(
178
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
178
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
179
179
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
180
180
; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
181
181
; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
@@ -219,9 +219,9 @@ define void @testLoad_2(ptr nocapture writeonly %1) {
219
219
220
220
; Also, test without the struct geps, to see if it still works with i8 geps/ptradd
221
221
222
- define void @testStorei8_2 (ptr nocapture writeonly %1 ) {
222
+ define void @testStorei8_2 (ptr %1 ) {
223
223
; CHECK-LABEL: define void @testStorei8_2(
224
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
224
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
225
225
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP0]], align 8
226
226
; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
227
227
; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[GETELEM1]], align 16
@@ -251,9 +251,9 @@ define void @testStorei8_2(ptr nocapture writeonly %1) {
251
251
ret void
252
252
}
253
253
254
- define void @testLoadi8_2 (ptr nocapture writeonly %1 ) {
254
+ define void @testLoadi8_2 (ptr %1 ) {
255
255
; CHECK-LABEL: define void @testLoadi8_2(
256
- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
256
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
257
257
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
258
258
; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
259
259
; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
@@ -294,3 +294,157 @@ define void @testLoadi8_2(ptr nocapture writeonly %1) {
294
294
%l0 = load i32 , ptr %getElem13 , align 4
295
295
ret void
296
296
}
297
+
298
+ ; Test that the alignment propagation works both forwards and backwards.
299
+ ; with the "align 16" placed where it is,
300
+ ; we should end up with a v2 followed by two v4s followed by a v2.
301
+ define void @test_forward_and_reverse (ptr %1 ) {
302
+ ; CHECK-LABEL: define void @test_forward_and_reverse(
303
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
304
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
305
+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
306
+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
307
+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
308
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[GETELEM1]], align 16
309
+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
310
+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
311
+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
312
+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
313
+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
314
+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[GETELEM10]], align 16
315
+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
316
+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
317
+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
318
+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
319
+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 40
320
+ ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[GETELEM14]], align 16
321
+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
322
+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
323
+ ; CHECK-NEXT: ret void
324
+ ;
325
+ %l = load i32 , ptr %1 , align 4
326
+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 4
327
+ %l2 = load i32 , ptr %getElem , align 4
328
+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 8
329
+ %l3 = load float , ptr %getElem1 , align 4
330
+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 12
331
+ %l4 = load float , ptr %getElem2 , align 4
332
+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 16
333
+ %l5 = load float , ptr %getElem8 , align 4
334
+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 20
335
+ %l6 = load float , ptr %getElem9 , align 4
336
+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 24
337
+ %l7 = load float , ptr %getElem10 , align 16
338
+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 28
339
+ %l8 = load float , ptr %getElem11 , align 4
340
+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 32
341
+ %l9 = load float , ptr %getElem12 , align 4
342
+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 36
343
+ %l0 = load float , ptr %getElem13 , align 4
344
+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 40
345
+ %l11 = load i32 , ptr %getElem14 , align 4
346
+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 44
347
+ %l12 = load i32 , ptr %getElem15 , align 4
348
+ ret void
349
+ }
350
+
351
+ ; Test an edge case where the defined alignment is max align
352
+ define void @test_forward_and_reverse_max_align (ptr %1 ) {
353
+ ; CHECK-LABEL: define void @test_forward_and_reverse_max_align(
354
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
355
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
356
+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
357
+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
358
+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
359
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[GETELEM1]], align 16
360
+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
361
+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
362
+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
363
+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
364
+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
365
+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[GETELEM10]], align 4294967296
366
+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
367
+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
368
+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
369
+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
370
+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 40
371
+ ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[GETELEM14]], align 16
372
+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
373
+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
374
+ ; CHECK-NEXT: ret void
375
+ ;
376
+ %l = load i32 , ptr %1 , align 4
377
+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 4
378
+ %l2 = load i32 , ptr %getElem , align 4
379
+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 8
380
+ %l3 = load float , ptr %getElem1 , align 4
381
+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 12
382
+ %l4 = load float , ptr %getElem2 , align 4
383
+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 16
384
+ %l5 = load float , ptr %getElem8 , align 4
385
+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 20
386
+ %l6 = load float , ptr %getElem9 , align 4
387
+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 24
388
+ %l7 = load float , ptr %getElem10 , align 4294967296
389
+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 28
390
+ %l8 = load float , ptr %getElem11 , align 4
391
+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 32
392
+ %l9 = load float , ptr %getElem12 , align 4
393
+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 36
394
+ %l0 = load float , ptr %getElem13 , align 4
395
+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 40
396
+ %l11 = load i32 , ptr %getElem14 , align 4
397
+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 44
398
+ %l12 = load i32 , ptr %getElem15 , align 4
399
+ ret void
400
+ }
401
+
402
+ define void @test_i8_elements (ptr %1 ) {
403
+ ; CHECK-LABEL: define void @test_i8_elements(
404
+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
405
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[TMP0]], align 2
406
+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
407
+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
408
+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 2
409
+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GETELEM1]], align 4
410
+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x i8> [[TMP3]], i32 0
411
+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x i8> [[TMP3]], i32 1
412
+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x i8> [[TMP3]], i32 2
413
+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x i8> [[TMP3]], i32 3
414
+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 6
415
+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GETELEM10]], align 4
416
+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x i8> [[TMP4]], i32 0
417
+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x i8> [[TMP4]], i32 1
418
+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x i8> [[TMP4]], i32 2
419
+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x i8> [[TMP4]], i32 3
420
+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 10
421
+ ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr [[GETELEM14]], align 4
422
+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
423
+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
424
+ ; CHECK-NEXT: ret void
425
+ ;
426
+ %l = load i8 , ptr %1 , align 1
427
+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 1
428
+ %l2 = load i8 , ptr %getElem , align 1
429
+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 2
430
+ %l3 = load i8 , ptr %getElem1 , align 1
431
+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 3
432
+ %l4 = load i8 , ptr %getElem2 , align 1
433
+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 4
434
+ %l5 = load i8 , ptr %getElem8 , align 1
435
+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 5
436
+ %l6 = load i8 , ptr %getElem9 , align 1
437
+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 6
438
+ %l7 = load i8 , ptr %getElem10 , align 4
439
+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 7
440
+ %l8 = load i8 , ptr %getElem11 , align 1
441
+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 8
442
+ %l9 = load i8 , ptr %getElem12 , align 1
443
+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 9
444
+ %l0 = load i8 , ptr %getElem13 , align 1
445
+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 10
446
+ %l11 = load i8 , ptr %getElem14 , align 1
447
+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 11
448
+ %l12 = load i8 , ptr %getElem15 , align 1
449
+ ret void
450
+ }
0 commit comments