Skip to content

Commit 82b6998

Browse files
committed
[AMDGPU] Change default loop alignment for GFX9 and higher targets
Align small loops aggresively to 32 bytes and larger loops to 16 bytes
1 parent 525ccd4 commit 82b6998

File tree

111 files changed

+424
-425
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+424
-425
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@ using namespace llvm::SDPatternMatch;
5353

5454
STATISTIC(NumTailCalls, "Number of tail calls");
5555

56-
static cl::opt<bool> DisableLoopAlignment("amdgpu-disable-loop-alignment",
57-
cl::desc("Do not align loops"),
58-
cl::init(false));
56+
static cl::opt<bool>
57+
DisableAllLoopAlignment("amdgpu-disable-all-loop-alignment",
58+
cl::desc("Do not align loops"), cl::init(false));
5959

6060
static cl::opt<bool>
61-
DisableLoopAlignmentPrefetch("amdgpu-disable-loop-alignment-prefetch",
62-
cl::desc("Do not align and prefetch loops"),
63-
cl::init(false));
61+
DisableLoopAlignment("amdgpu-disable-loop-alignment",
62+
cl::desc("Do not align loops for prefetch"),
63+
cl::init(false));
6464

6565
static cl::opt<bool> UseDivergentRegisterIndexing(
6666
"amdgpu-use-divergent-register-indexing", cl::Hidden,
@@ -17440,7 +17440,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
1744017440
const Align CacheLineAlign = Align(64);
1744117441
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
1744217442
const MachineBasicBlock *Header = ML->getHeader();
17443-
if (DisableLoopAlignment || Header->getAlignment() > PrefAlign)
17443+
if (DisableAllLoopAlignment || Header->getAlignment() > PrefAlign)
1744417444
return Header->getAlignment(); // Already processed.
1744517445

1744617446
unsigned LoopSize = 0;
@@ -17459,8 +17459,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
1745917459

1746017460
// Pre-GFX10 targets did not benefit from loop alignment driven by prefetch
1746117461
// considerations
17462-
if (!ML || DisableLoopAlignmentPrefetch ||
17463-
!getSubtarget()->hasInstPrefetch() ||
17462+
if (!ML || DisableLoopAlignment || !getSubtarget()->hasInstPrefetch() ||
1746417463
getSubtarget()->hasInstFwdPrefetchBug()) {
1746517464
// Align loops < 32 bytes agrressively
1746617465
if (LoopSize <= 32)

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
4-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
5-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
7-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
4+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
5+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
7+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
88
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
99
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
1010

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
4-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
5-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
7-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
4+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
5+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
7+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
88
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
99
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
1010

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
33

44
; Divergent phis that don't require lowering using lane mask merging
55

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
33

44
; This file contains various tests that have divergent i1s used outside of
55
; the loop. These are lane masks is sgpr and need to have correct value in

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
33

44
; Simples case, if - then, that requires lane mask merging,
55
; %phi lane mask will hold %val_A at %A. Lanes that are active in %B

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
33

44
define void @temporal_divergent_i1_phi(float %val, ptr %addr) {
55
; GFX10-LABEL: temporal_divergent_i1_phi:

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
33

44
define void @temporal_divergent_i32(float %val, ptr %addr) {
55
; GFX10-LABEL: temporal_divergent_i32:

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
2+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
33

44
; Make sure the branch targets are correct after lowering llvm.amdgcn.if
55

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
3-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
5-
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s
3+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
5+
; RUN: llc -amdgpu-disable-all-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s
66

77
define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
88
; SI-LABEL: static_exact:

0 commit comments

Comments
 (0)