77%struct.float3 = type { float , float , float }
88%struct.S1 = type { %struct.float3 , %struct.float3 , i32 , i32 }
99
10- define void @testStore (ptr nocapture writeonly %1 ) {
10+ define void @testStore (ptr %1 ) {
1111; CHECK-LABEL: define void @testStore(
12- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
12+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
1313; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[TMP0]], align 16
1414; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i64 0, i32 1, i32 1
1515; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GETELEM10]], align 16
@@ -33,9 +33,9 @@ define void @testStore(ptr nocapture writeonly %1) {
3333 ret void
3434}
3535
36- define void @testLoad (ptr nocapture writeonly %1 ) {
36+ define void @testLoad (ptr %1 ) {
3737; CHECK-LABEL: define void @testLoad(
38- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
38+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
3939; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
4040; CHECK-NEXT: [[L11:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
4141; CHECK-NEXT: [[L22:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -71,9 +71,9 @@ define void @testLoad(ptr nocapture writeonly %1) {
7171
7272; Also, test without the struct geps, to see if it still works with i8 geps/ptradd
7373
74- define void @testStorei8 (ptr nocapture writeonly %1 ) {
74+ define void @testStorei8 (ptr %1 ) {
7575; CHECK-LABEL: define void @testStorei8(
76- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
76+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
7777; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[TMP0]], align 16
7878; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
7979; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GETELEM10]], align 16
@@ -97,9 +97,9 @@ define void @testStorei8(ptr nocapture writeonly %1) {
9797 ret void
9898}
9999
100- define void @testLoadi8 (ptr nocapture writeonly %1 ) {
100+ define void @testLoadi8 (ptr %1 ) {
101101; CHECK-LABEL: define void @testLoadi8(
102- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
102+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
103103; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
104104; CHECK-NEXT: [[L11:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
105105; CHECK-NEXT: [[L22:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -141,9 +141,9 @@ define void @testLoadi8(ptr nocapture writeonly %1) {
141141; 4x32 will instead be a 2x32 and a 2x32
142142%struct.S2 = type { i32 , i32 , %struct.float3 , %struct.float3 , i32 , i32 }
143143
144- define void @testStore_2 (ptr nocapture writeonly %1 ) {
144+ define void @testStore_2 (ptr %1 ) {
145145; CHECK-LABEL: define void @testStore_2(
146- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
146+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
147147; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP0]], align 8
148148; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP0]], i64 0, i32 2
149149; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[GETELEM1]], align 16
@@ -173,9 +173,9 @@ define void @testStore_2(ptr nocapture writeonly %1) {
173173 ret void
174174}
175175
176- define void @testLoad_2 (ptr nocapture writeonly %1 ) {
176+ define void @testLoad_2 (ptr %1 ) {
177177; CHECK-LABEL: define void @testLoad_2(
178- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
178+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
179179; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
180180; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
181181; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
@@ -219,9 +219,9 @@ define void @testLoad_2(ptr nocapture writeonly %1) {
219219
220220; Also, test without the struct geps, to see if it still works with i8 geps/ptradd
221221
222- define void @testStorei8_2 (ptr nocapture writeonly %1 ) {
222+ define void @testStorei8_2 (ptr %1 ) {
223223; CHECK-LABEL: define void @testStorei8_2(
224- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
224+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
225225; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP0]], align 8
226226; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
227227; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[GETELEM1]], align 16
@@ -251,9 +251,9 @@ define void @testStorei8_2(ptr nocapture writeonly %1) {
251251 ret void
252252}
253253
254- define void @testLoadi8_2 (ptr nocapture writeonly %1 ) {
254+ define void @testLoadi8_2 (ptr %1 ) {
255255; CHECK-LABEL: define void @testLoadi8_2(
256- ; CHECK-SAME: ptr writeonly captures(none) [[TMP0:%.*]]) {
256+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
257257; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
258258; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
259259; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
@@ -294,3 +294,157 @@ define void @testLoadi8_2(ptr nocapture writeonly %1) {
294294 %l0 = load i32 , ptr %getElem13 , align 4
295295 ret void
296296}
297+
298+ ; Test that the alignment propagation works both forwards and backwards.
299+ ; with the "align 16" placed where it is,
300+ ; we should end up with a v2 followed by two v4s followed by a v2.
301+ define void @test_forward_and_reverse (ptr %1 ) {
302+ ; CHECK-LABEL: define void @test_forward_and_reverse(
303+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
304+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
305+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
306+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
307+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
308+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[GETELEM1]], align 16
309+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
310+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
311+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
312+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
313+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
314+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[GETELEM10]], align 16
315+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
316+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
317+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
318+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
319+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 40
320+ ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[GETELEM14]], align 16
321+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
322+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
323+ ; CHECK-NEXT: ret void
324+ ;
325+ %l = load i32 , ptr %1 , align 4
326+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 4
327+ %l2 = load i32 , ptr %getElem , align 4
328+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 8
329+ %l3 = load float , ptr %getElem1 , align 4
330+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 12
331+ %l4 = load float , ptr %getElem2 , align 4
332+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 16
333+ %l5 = load float , ptr %getElem8 , align 4
334+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 20
335+ %l6 = load float , ptr %getElem9 , align 4
336+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 24
337+ %l7 = load float , ptr %getElem10 , align 16
338+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 28
339+ %l8 = load float , ptr %getElem11 , align 4
340+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 32
341+ %l9 = load float , ptr %getElem12 , align 4
342+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 36
343+ %l0 = load float , ptr %getElem13 , align 4
344+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 40
345+ %l11 = load i32 , ptr %getElem14 , align 4
346+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 44
347+ %l12 = load i32 , ptr %getElem15 , align 4
348+ ret void
349+ }
350+
351+ ; Test an edge case where the defined alignment is max align
352+ define void @test_forward_and_reverse_max_align (ptr %1 ) {
353+ ; CHECK-LABEL: define void @test_forward_and_reverse_max_align(
354+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
355+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP0]], align 8
356+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
357+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
358+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
359+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[GETELEM1]], align 16
360+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
361+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
362+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
363+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
364+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
365+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[GETELEM10]], align 4294967296
366+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
367+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
368+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
369+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
370+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 40
371+ ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[GETELEM14]], align 16
372+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
373+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
374+ ; CHECK-NEXT: ret void
375+ ;
376+ %l = load i32 , ptr %1 , align 4
377+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 4
378+ %l2 = load i32 , ptr %getElem , align 4
379+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 8
380+ %l3 = load float , ptr %getElem1 , align 4
381+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 12
382+ %l4 = load float , ptr %getElem2 , align 4
383+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 16
384+ %l5 = load float , ptr %getElem8 , align 4
385+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 20
386+ %l6 = load float , ptr %getElem9 , align 4
387+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 24
388+ %l7 = load float , ptr %getElem10 , align 4294967296
389+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 28
390+ %l8 = load float , ptr %getElem11 , align 4
391+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 32
392+ %l9 = load float , ptr %getElem12 , align 4
393+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 36
394+ %l0 = load float , ptr %getElem13 , align 4
395+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 40
396+ %l11 = load i32 , ptr %getElem14 , align 4
397+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 44
398+ %l12 = load i32 , ptr %getElem15 , align 4
399+ ret void
400+ }
401+
402+ define void @test_i8_elements (ptr %1 ) {
403+ ; CHECK-LABEL: define void @test_i8_elements(
404+ ; CHECK-SAME: ptr [[TMP0:%.*]]) {
405+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[TMP0]], align 2
406+ ; CHECK-NEXT: [[L1:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
407+ ; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
408+ ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 2
409+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GETELEM1]], align 4
410+ ; CHECK-NEXT: [[L33:%.*]] = extractelement <4 x i8> [[TMP3]], i32 0
411+ ; CHECK-NEXT: [[L44:%.*]] = extractelement <4 x i8> [[TMP3]], i32 1
412+ ; CHECK-NEXT: [[L55:%.*]] = extractelement <4 x i8> [[TMP3]], i32 2
413+ ; CHECK-NEXT: [[L66:%.*]] = extractelement <4 x i8> [[TMP3]], i32 3
414+ ; CHECK-NEXT: [[GETELEM10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 6
415+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GETELEM10]], align 4
416+ ; CHECK-NEXT: [[L77:%.*]] = extractelement <4 x i8> [[TMP4]], i32 0
417+ ; CHECK-NEXT: [[L88:%.*]] = extractelement <4 x i8> [[TMP4]], i32 1
418+ ; CHECK-NEXT: [[L99:%.*]] = extractelement <4 x i8> [[TMP4]], i32 2
419+ ; CHECK-NEXT: [[L010:%.*]] = extractelement <4 x i8> [[TMP4]], i32 3
420+ ; CHECK-NEXT: [[GETELEM14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 10
421+ ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr [[GETELEM14]], align 4
422+ ; CHECK-NEXT: [[L1111:%.*]] = extractelement <2 x i8> [[TMP5]], i32 0
423+ ; CHECK-NEXT: [[L1212:%.*]] = extractelement <2 x i8> [[TMP5]], i32 1
424+ ; CHECK-NEXT: ret void
425+ ;
426+ %l = load i8 , ptr %1 , align 1
427+ %getElem = getelementptr inbounds i8 , ptr %1 , i64 1
428+ %l2 = load i8 , ptr %getElem , align 1
429+ %getElem1 = getelementptr inbounds i8 , ptr %1 , i64 2
430+ %l3 = load i8 , ptr %getElem1 , align 1
431+ %getElem2 = getelementptr inbounds i8 , ptr %1 , i64 3
432+ %l4 = load i8 , ptr %getElem2 , align 1
433+ %getElem8 = getelementptr inbounds i8 , ptr %1 , i64 4
434+ %l5 = load i8 , ptr %getElem8 , align 1
435+ %getElem9 = getelementptr inbounds i8 , ptr %1 , i64 5
436+ %l6 = load i8 , ptr %getElem9 , align 1
437+ %getElem10 = getelementptr inbounds i8 , ptr %1 , i64 6
438+ %l7 = load i8 , ptr %getElem10 , align 4
439+ %getElem11 = getelementptr inbounds i8 , ptr %1 , i64 7
440+ %l8 = load i8 , ptr %getElem11 , align 1
441+ %getElem12 = getelementptr inbounds i8 , ptr %1 , i64 8
442+ %l9 = load i8 , ptr %getElem12 , align 1
443+ %getElem13 = getelementptr inbounds i8 , ptr %1 , i64 9
444+ %l0 = load i8 , ptr %getElem13 , align 1
445+ %getElem14 = getelementptr inbounds i8 , ptr %1 , i64 10
446+ %l11 = load i8 , ptr %getElem14 , align 1
447+ %getElem15 = getelementptr inbounds i8 , ptr %1 , i64 11
448+ %l12 = load i8 , ptr %getElem15 , align 1
449+ ret void
450+ }
0 commit comments