Skip to content

Commit f961be4

Browse files
committed
Rebase, update tests
1 parent ca97396 commit f961be4

22 files changed

+620
-591
lines changed

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -525,29 +525,29 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
525525
; GFX908-NEXT: v_rcp_iflag_f32_e32 v0, v0
526526
; GFX908-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
527527
; GFX908-NEXT: v_cvt_u32_f32_e32 v0, v0
528-
; GFX908-NEXT: v_readfirstlane_b32 s2, v0
529-
; GFX908-NEXT: s_mul_i32 s1, s1, s2
530-
; GFX908-NEXT: s_mul_hi_u32 s1, s2, s1
531-
; GFX908-NEXT: s_add_i32 s2, s2, s1
532-
; GFX908-NEXT: s_mul_hi_u32 s1, s6, s2
533-
; GFX908-NEXT: s_mul_i32 s2, s1, s7
534-
; GFX908-NEXT: s_sub_i32 s2, s6, s2
535-
; GFX908-NEXT: s_add_i32 s3, s1, 1
536-
; GFX908-NEXT: s_sub_i32 s6, s2, s7
537-
; GFX908-NEXT: s_cmp_ge_u32 s2, s7
538-
; GFX908-NEXT: s_cselect_b32 s1, s3, s1
539-
; GFX908-NEXT: s_cselect_b32 s2, s6, s2
540-
; GFX908-NEXT: s_add_i32 s3, s1, 1
541-
; GFX908-NEXT: s_cmp_ge_u32 s2, s7
542-
; GFX908-NEXT: s_cselect_b32 s8, s3, s1
543-
; GFX908-NEXT: s_lshr_b32 s2, s0, 16
544-
; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s2
545-
; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
546-
; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
528+
; GFX908-NEXT: v_readfirstlane_b32 s10, v0
529+
; GFX908-NEXT: s_mul_i32 s8, s8, s10
530+
; GFX908-NEXT: s_mul_hi_u32 s8, s10, s8
531+
; GFX908-NEXT: s_add_i32 s10, s10, s8
532+
; GFX908-NEXT: s_mul_hi_u32 s8, s0, s10
533+
; GFX908-NEXT: s_mul_i32 s10, s8, s1
534+
; GFX908-NEXT: s_sub_i32 s0, s0, s10
535+
; GFX908-NEXT: s_add_i32 s11, s8, 1
536+
; GFX908-NEXT: s_sub_i32 s10, s0, s1
537+
; GFX908-NEXT: s_cmp_ge_u32 s0, s1
538+
; GFX908-NEXT: s_cselect_b32 s8, s11, s8
539+
; GFX908-NEXT: s_cselect_b32 s0, s10, s0
540+
; GFX908-NEXT: s_add_i32 s10, s8, 1
541+
; GFX908-NEXT: s_cmp_ge_u32 s0, s1
542+
; GFX908-NEXT: s_cselect_b32 s8, s10, s8
543+
; GFX908-NEXT: s_lshr_b32 s5, s5, 16
544+
; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s5
545+
; GFX908-NEXT: s_lshl_b64 s[10:11], s[2:3], 5
546+
; GFX908-NEXT: s_lshl_b64 s[14:15], s[8:9], 5
547+
; GFX908-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
547548
; GFX908-NEXT: v_mov_b32_e32 v0, 0
548549
; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
549-
; GFX908-NEXT: s_or_b32 s14, s14, 28
550-
; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
550+
; GFX908-NEXT: s_or_b32 s12, s12, 28
551551
; GFX908-NEXT: v_mov_b32_e32 v1, 0
552552
; GFX908-NEXT: s_waitcnt vmcnt(0)
553553
; GFX908-NEXT: v_readfirstlane_b32 s5, v16
@@ -610,8 +610,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
610610
; GFX908-NEXT: .LBB3_5: ; %bb16
611611
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
612612
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
613-
; GFX908-NEXT: s_add_u32 s22, s20, s9
614-
; GFX908-NEXT: s_addc_u32 s23, s21, s13
613+
; GFX908-NEXT: s_add_u32 s22, s20, s5
614+
; GFX908-NEXT: s_addc_u32 s23, s21, s9
615615
; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:-12 glc
616616
; GFX908-NEXT: s_waitcnt vmcnt(0)
617617
; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:-8 glc
@@ -715,8 +715,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
715715
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[8:9], 5
716716
; GFX90A-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
717717
; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
718-
; GFX90A-NEXT: s_or_b32 s14, s14, 28
719-
; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
718+
; GFX90A-NEXT: s_or_b32 s12, s12, 28
720719
; GFX90A-NEXT: s_waitcnt vmcnt(0)
721720
; GFX90A-NEXT: v_readfirstlane_b32 s5, v18
722721
; GFX90A-NEXT: s_and_b32 s5, 0xffff, s5
@@ -774,8 +773,8 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
774773
; GFX90A-NEXT: .LBB3_5: ; %bb16
775774
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1
776775
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
777-
; GFX90A-NEXT: s_add_u32 s22, s20, s9
778-
; GFX90A-NEXT: s_addc_u32 s23, s21, s13
776+
; GFX90A-NEXT: s_add_u32 s22, s20, s5
777+
; GFX90A-NEXT: s_addc_u32 s23, s21, s9
779778
; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
780779
; GFX90A-NEXT: s_waitcnt vmcnt(0)
781780
; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 71 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5512,15 +5512,16 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
55125512
; GFX6-LABEL: udiv_v2i32_pow2k_denom:
55135513
; GFX6: ; %bb.0:
55145514
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
5515+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
5516+
; GFX6-NEXT: s_mov_b32 s6, -1
55155517
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5516-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5517-
; GFX6-NEXT: s_lshr_b32 s4, s4, 12
5518-
; GFX6-NEXT: s_lshr_b32 s5, s5, 12
5519-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
5520-
; GFX6-NEXT: s_mov_b32 s2, -1
5521-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
5522-
; GFX6-NEXT: v_mov_b32_e32 v1, s5
5523-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5518+
; GFX6-NEXT: s_mov_b32 s4, s0
5519+
; GFX6-NEXT: s_mov_b32 s5, s1
5520+
; GFX6-NEXT: s_lshr_b32 s0, s2, 12
5521+
; GFX6-NEXT: s_lshr_b32 s1, s3, 12
5522+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
5523+
; GFX6-NEXT: v_mov_b32_e32 v1, s1
5524+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
55245525
; GFX6-NEXT: s_endpgm
55255526
;
55265527
; GFX9-LABEL: udiv_v2i32_pow2k_denom:
@@ -5554,18 +5555,19 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, <
55545555
; GFX6: ; %bb.0:
55555556
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
55565557
; GFX6-NEXT: v_mov_b32_e32 v0, 0x100101
5558+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
5559+
; GFX6-NEXT: s_mov_b32 s6, -1
55575560
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5558-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5559-
; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
5560-
; GFX6-NEXT: s_lshr_b32 s4, s4, 12
5561-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
5562-
; GFX6-NEXT: s_mov_b32 s2, -1
5563-
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v0
5561+
; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0
5562+
; GFX6-NEXT: s_mov_b32 s4, s0
5563+
; GFX6-NEXT: s_lshr_b32 s0, s2, 12
5564+
; GFX6-NEXT: s_mov_b32 s5, s1
5565+
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s3, v0
55645566
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
55655567
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0
55665568
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 11, v0
5567-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
5568-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5569+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
5570+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
55695571
; GFX6-NEXT: s_endpgm
55705572
;
55715573
; GFX9-LABEL: udiv_v2i32_mixed_pow2k_denom:
@@ -5906,15 +5908,16 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
59065908
; GFX6-LABEL: urem_v2i32_pow2k_denom:
59075909
; GFX6: ; %bb.0:
59085910
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
5911+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
5912+
; GFX6-NEXT: s_mov_b32 s6, -1
59095913
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5910-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5911-
; GFX6-NEXT: s_and_b32 s4, s4, 0xfff
5912-
; GFX6-NEXT: s_and_b32 s5, s5, 0xfff
5913-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
5914-
; GFX6-NEXT: s_mov_b32 s2, -1
5915-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
5916-
; GFX6-NEXT: v_mov_b32_e32 v1, s5
5917-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5914+
; GFX6-NEXT: s_mov_b32 s4, s0
5915+
; GFX6-NEXT: s_mov_b32 s5, s1
5916+
; GFX6-NEXT: s_and_b32 s0, s2, 0xfff
5917+
; GFX6-NEXT: s_and_b32 s1, s3, 0xfff
5918+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
5919+
; GFX6-NEXT: v_mov_b32_e32 v1, s1
5920+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
59185921
; GFX6-NEXT: s_endpgm
59195922
;
59205923
; GFX9-LABEL: urem_v2i32_pow2k_denom:
@@ -6288,21 +6291,22 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
62886291
; GFX6-LABEL: sdiv_v2i32_pow2k_denom:
62896292
; GFX6: ; %bb.0:
62906293
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6294+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
6295+
; GFX6-NEXT: s_mov_b32 s6, -1
62916296
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6292-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6293-
; GFX6-NEXT: s_ashr_i32 s6, s4, 31
6294-
; GFX6-NEXT: s_ashr_i32 s7, s5, 31
6295-
; GFX6-NEXT: s_lshr_b32 s6, s6, 20
6296-
; GFX6-NEXT: s_lshr_b32 s7, s7, 20
6297-
; GFX6-NEXT: s_add_i32 s4, s4, s6
6298-
; GFX6-NEXT: s_add_i32 s5, s5, s7
6299-
; GFX6-NEXT: s_ashr_i32 s4, s4, 12
6300-
; GFX6-NEXT: s_ashr_i32 s5, s5, 12
6301-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
6302-
; GFX6-NEXT: s_mov_b32 s2, -1
6303-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
6304-
; GFX6-NEXT: v_mov_b32_e32 v1, s5
6305-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
6297+
; GFX6-NEXT: s_mov_b32 s4, s0
6298+
; GFX6-NEXT: s_mov_b32 s5, s1
6299+
; GFX6-NEXT: s_ashr_i32 s0, s2, 31
6300+
; GFX6-NEXT: s_ashr_i32 s1, s3, 31
6301+
; GFX6-NEXT: s_lshr_b32 s0, s0, 20
6302+
; GFX6-NEXT: s_lshr_b32 s1, s1, 20
6303+
; GFX6-NEXT: s_add_i32 s0, s2, s0
6304+
; GFX6-NEXT: s_add_i32 s1, s3, s1
6305+
; GFX6-NEXT: s_ashr_i32 s0, s0, 12
6306+
; GFX6-NEXT: s_ashr_i32 s1, s1, 12
6307+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
6308+
; GFX6-NEXT: v_mov_b32_e32 v1, s1
6309+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
63066310
; GFX6-NEXT: s_endpgm
63076311
;
63086312
; GFX9-LABEL: sdiv_v2i32_pow2k_denom:
@@ -6342,21 +6346,22 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out,
63426346
; GFX6: ; %bb.0:
63436347
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
63446348
; GFX6-NEXT: v_mov_b32_e32 v0, 0x80080081
6349+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
6350+
; GFX6-NEXT: s_mov_b32 s6, -1
63456351
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6346-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6347-
; GFX6-NEXT: v_mul_hi_i32 v0, s5, v0
6348-
; GFX6-NEXT: s_ashr_i32 s6, s4, 31
6349-
; GFX6-NEXT: s_lshr_b32 s6, s6, 20
6350-
; GFX6-NEXT: s_add_i32 s4, s4, s6
6351-
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s5, v0
6352-
; GFX6-NEXT: s_ashr_i32 s4, s4, 12
6352+
; GFX6-NEXT: v_mul_hi_i32 v0, s3, v0
6353+
; GFX6-NEXT: s_mov_b32 s4, s0
6354+
; GFX6-NEXT: s_ashr_i32 s0, s2, 31
6355+
; GFX6-NEXT: s_lshr_b32 s0, s0, 20
6356+
; GFX6-NEXT: s_add_i32 s0, s2, s0
6357+
; GFX6-NEXT: v_add_i32_e32 v0, vcc, s3, v0
6358+
; GFX6-NEXT: s_ashr_i32 s0, s0, 12
63536359
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 31, v0
63546360
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 11, v0
6355-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
6356-
; GFX6-NEXT: s_mov_b32 s2, -1
6361+
; GFX6-NEXT: s_mov_b32 s5, s1
63576362
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
6358-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
6359-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
6363+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
6364+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
63606365
; GFX6-NEXT: s_endpgm
63616366
;
63626367
; GFX9-LABEL: ssdiv_v2i32_mixed_pow2k_denom:
@@ -6793,23 +6798,24 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
67936798
; GFX6-LABEL: srem_v2i32_pow2k_denom:
67946799
; GFX6: ; %bb.0:
67956800
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6801+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
6802+
; GFX6-NEXT: s_mov_b32 s6, -1
67966803
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6797-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6798-
; GFX6-NEXT: s_ashr_i32 s6, s4, 31
6799-
; GFX6-NEXT: s_lshr_b32 s6, s6, 20
6800-
; GFX6-NEXT: s_ashr_i32 s7, s5, 31
6801-
; GFX6-NEXT: s_add_i32 s6, s4, s6
6802-
; GFX6-NEXT: s_lshr_b32 s7, s7, 20
6803-
; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
6804-
; GFX6-NEXT: s_sub_i32 s4, s4, s6
6805-
; GFX6-NEXT: s_add_i32 s6, s5, s7
6806-
; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
6807-
; GFX6-NEXT: s_sub_i32 s5, s5, s6
6808-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
6809-
; GFX6-NEXT: s_mov_b32 s2, -1
6810-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
6811-
; GFX6-NEXT: v_mov_b32_e32 v1, s5
6812-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
6804+
; GFX6-NEXT: s_mov_b32 s4, s0
6805+
; GFX6-NEXT: s_mov_b32 s5, s1
6806+
; GFX6-NEXT: s_ashr_i32 s0, s2, 31
6807+
; GFX6-NEXT: s_ashr_i32 s1, s3, 31
6808+
; GFX6-NEXT: s_lshr_b32 s0, s0, 20
6809+
; GFX6-NEXT: s_lshr_b32 s1, s1, 20
6810+
; GFX6-NEXT: s_add_i32 s0, s2, s0
6811+
; GFX6-NEXT: s_add_i32 s1, s3, s1
6812+
; GFX6-NEXT: s_and_b32 s0, s0, 0xfffff000
6813+
; GFX6-NEXT: s_and_b32 s1, s1, 0xfffff000
6814+
; GFX6-NEXT: s_sub_i32 s0, s2, s0
6815+
; GFX6-NEXT: s_sub_i32 s1, s3, s1
6816+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
6817+
; GFX6-NEXT: v_mov_b32_e32 v1, s1
6818+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
68136819
; GFX6-NEXT: s_endpgm
68146820
;
68156821
; GFX9-LABEL: srem_v2i32_pow2k_denom:

llvm/test/CodeGen/AMDGPU/build_vector.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,15 +257,16 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
257257
; GFX6-LABEL: build_v2i32_from_v4i16_shuffle:
258258
; GFX6: ; %bb.0: ; %entry
259259
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
260+
; GFX6-NEXT: s_mov_b32 s7, 0xf000
261+
; GFX6-NEXT: s_mov_b32 s6, -1
260262
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
261-
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
262-
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
263-
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
264-
; GFX6-NEXT: s_mov_b32 s3, 0xf000
265-
; GFX6-NEXT: s_mov_b32 s2, -1
266-
; GFX6-NEXT: v_mov_b32_e32 v0, s4
267-
; GFX6-NEXT: v_mov_b32_e32 v1, s5
268-
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
263+
; GFX6-NEXT: s_mov_b32 s4, s0
264+
; GFX6-NEXT: s_mov_b32 s5, s1
265+
; GFX6-NEXT: s_lshl_b32 s0, s3, 16
266+
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
267+
; GFX6-NEXT: v_mov_b32_e32 v0, s1
268+
; GFX6-NEXT: v_mov_b32_e32 v1, s0
269+
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
269270
; GFX6-NEXT: s_endpgm
270271
;
271272
; GFX8-LABEL: build_v2i32_from_v4i16_shuffle:

llvm/test/CodeGen/AMDGPU/fabs.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,16 @@ define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
9999
; SI-LABEL: fabs_v2f32:
100100
; SI: ; %bb.0:
101101
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
102+
; SI-NEXT: s_mov_b32 s7, 0xf000
103+
; SI-NEXT: s_mov_b32 s6, -1
102104
; SI-NEXT: s_waitcnt lgkmcnt(0)
103-
; SI-NEXT: s_mov_b64 s[4:5], s[2:3]
104-
; SI-NEXT: s_mov_b32 s3, 0xf000
105-
; SI-NEXT: s_mov_b32 s2, -1
106-
; SI-NEXT: s_bitset0_b32 s5, 31
107-
; SI-NEXT: s_bitset0_b32 s4, 31
108-
; SI-NEXT: v_mov_b32_e32 v0, s4
109-
; SI-NEXT: v_mov_b32_e32 v1, s5
110-
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
105+
; SI-NEXT: s_mov_b32 s4, s0
106+
; SI-NEXT: s_mov_b32 s5, s1
107+
; SI-NEXT: s_and_b32 s0, s3, 0x7fffffff
108+
; SI-NEXT: s_and_b32 s1, s2, 0x7fffffff
109+
; SI-NEXT: v_mov_b32_e32 v0, s1
110+
; SI-NEXT: v_mov_b32_e32 v1, s0
111+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
111112
; SI-NEXT: s_endpgm
112113
;
113114
; VI-LABEL: fabs_v2f32:

llvm/test/CodeGen/AMDGPU/fdiv.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,8 +1242,8 @@ define amdgpu_kernel void @s_fdiv_ulp25_v2f32(ptr addrspace(1) %out, <2 x float>
12421242
; GFX11-NEXT: v_rcp_f32_e32 v1, s7
12431243
; GFX11-NEXT: v_mov_b32_e32 v2, 0
12441244
; GFX11-NEXT: s_waitcnt_depctr depctr_va_vdst(0)
1245-
; GFX11-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
1246-
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
1245+
; GFX11-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1
1246+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
12471247
; GFX11-NEXT: s_endpgm
12481248
;
12491249
; EG-LABEL: s_fdiv_ulp25_v2f32:
@@ -1320,8 +1320,8 @@ define amdgpu_kernel void @s_fdiv_v2f32_fast_math(ptr addrspace(1) %out, <2 x fl
13201320
; GFX11-NEXT: v_rcp_f32_e32 v2, s6
13211321
; GFX11-NEXT: v_mov_b32_e32 v3, 0
13221322
; GFX11-NEXT: s_waitcnt_depctr depctr_va_vdst(0)
1323-
; GFX11-NEXT: v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2
1324-
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[4:5]
1323+
; GFX11-NEXT: v_dual_mul_f32 v1, s3, v0 :: v_dual_mul_f32 v0, s2, v2
1324+
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
13251325
; GFX11-NEXT: s_endpgm
13261326
;
13271327
; EG-LABEL: s_fdiv_v2f32_fast_math:
@@ -1398,8 +1398,8 @@ define amdgpu_kernel void @s_fdiv_v2f32_arcp_math(ptr addrspace(1) %out, <2 x fl
13981398
; GFX11-NEXT: v_rcp_f32_e32 v2, s6
13991399
; GFX11-NEXT: v_mov_b32_e32 v3, 0
14001400
; GFX11-NEXT: s_waitcnt_depctr depctr_va_vdst(0)
1401-
; GFX11-NEXT: v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2
1402-
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[4:5]
1401+
; GFX11-NEXT: v_dual_mul_f32 v1, s3, v0 :: v_dual_mul_f32 v0, s2, v2
1402+
; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1]
14031403
; GFX11-NEXT: s_endpgm
14041404
;
14051405
; EG-LABEL: s_fdiv_v2f32_arcp_math:

llvm/test/CodeGen/AMDGPU/fnearbyint.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,14 @@ define amdgpu_kernel void @fnearbyint_v2f32(ptr addrspace(1) %out, <2 x float> %
121121
; SICI-LABEL: fnearbyint_v2f32:
122122
; SICI: ; %bb.0: ; %entry
123123
; SICI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
124+
; SICI-NEXT: s_mov_b32 s7, 0xf000
125+
; SICI-NEXT: s_mov_b32 s6, -1
124126
; SICI-NEXT: s_waitcnt lgkmcnt(0)
125-
; SICI-NEXT: s_mov_b64 s[4:5], s[2:3]
126-
; SICI-NEXT: s_mov_b32 s3, 0xf000
127-
; SICI-NEXT: s_mov_b32 s2, -1
128-
; SICI-NEXT: v_rndne_f32_e32 v1, s5
129-
; SICI-NEXT: v_rndne_f32_e32 v0, s4
130-
; SICI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
127+
; SICI-NEXT: s_mov_b32 s4, s0
128+
; SICI-NEXT: s_mov_b32 s5, s1
129+
; SICI-NEXT: v_rndne_f32_e32 v1, s3
130+
; SICI-NEXT: v_rndne_f32_e32 v0, s2
131+
; SICI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
131132
; SICI-NEXT: s_endpgm
132133
;
133134
; VI-LABEL: fnearbyint_v2f32:

0 commit comments

Comments
 (0)