Skip to content

Commit ec99ec1

Browse files
committed
Redesign Straight-Line Strength Reduction (SLSR)
1 parent 948c749 commit ec99ec1

File tree

10 files changed

+1102
-527
lines changed

10 files changed

+1102
-527
lines changed

llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp

Lines changed: 866 additions & 276 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,9 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
541541
; GFX908-NEXT: s_lshr_b32 s2, s0, 16
542542
; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s2
543543
; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
544-
; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
545544
; GFX908-NEXT: v_mov_b32_e32 v0, 0
545+
; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
546546
; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
547-
; GFX908-NEXT: s_or_b32 s14, s14, 28
548547
; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
549548
; GFX908-NEXT: v_mov_b32_e32 v1, 0
550549
; GFX908-NEXT: s_waitcnt vmcnt(0)
@@ -610,13 +609,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
610609
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
611610
; GFX908-NEXT: s_add_u32 s22, s20, s9
612611
; GFX908-NEXT: s_addc_u32 s23, s21, s13
613-
; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:-12 glc
612+
; GFX908-NEXT: global_load_dword v21, v17, s[22:23] offset:16 glc
614613
; GFX908-NEXT: s_waitcnt vmcnt(0)
615-
; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:-8 glc
614+
; GFX908-NEXT: global_load_dword v20, v17, s[22:23] offset:20 glc
616615
; GFX908-NEXT: s_waitcnt vmcnt(0)
617-
; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:-4 glc
616+
; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:24 glc
618617
; GFX908-NEXT: s_waitcnt vmcnt(0)
619-
; GFX908-NEXT: global_load_dword v12, v17, s[22:23] glc
618+
; GFX908-NEXT: global_load_dword v12, v17, s[22:23] offset:28 glc
620619
; GFX908-NEXT: s_waitcnt vmcnt(0)
621620
; GFX908-NEXT: ds_read_b64 v[12:13], v17
622621
; GFX908-NEXT: ds_read_b64 v[14:15], v0
@@ -710,7 +709,6 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
710709
; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
711710
; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
712711
; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
713-
; GFX90A-NEXT: s_or_b32 s14, s14, 28
714712
; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
715713
; GFX90A-NEXT: s_waitcnt vmcnt(0)
716714
; GFX90A-NEXT: v_readfirstlane_b32 s2, v18
@@ -771,13 +769,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
771769
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
772770
; GFX90A-NEXT: s_add_u32 s22, s20, s9
773771
; GFX90A-NEXT: s_addc_u32 s23, s21, s13
774-
; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
772+
; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:16 glc
775773
; GFX90A-NEXT: s_waitcnt vmcnt(0)
776-
; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
774+
; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:20 glc
777775
; GFX90A-NEXT: s_waitcnt vmcnt(0)
778-
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:-4 glc
776+
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:24 glc
779777
; GFX90A-NEXT: s_waitcnt vmcnt(0)
780-
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] glc
778+
; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:28 glc
781779
; GFX90A-NEXT: s_waitcnt vmcnt(0)
782780
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
783781
; GFX90A-NEXT: ds_read_b64 v[16:17], v0

0 commit comments

Comments
 (0)