Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1431,6 +1431,14 @@ def FeatureDisable : SubtargetFeature<"",
"Dummy feature to disable assembler instructions"
>;

// GFX-9 & higher targets have a 16-dword Instruction Buffer and per-SQ
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// GFX-9 & higher targets have a 16-dword Instruction Buffer and per-SQ
// GFX9 & higher targets have a 16-dword Instruction Buffer and per-SQ

// instruction store which can supply 4 dwords to each of the 2 waves per
// cycle. Change default alignment to 4 dwords or 16 bytes.
def FeaturePrefLoopAlign32B : SubtargetFeature<"loop-align",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Include the value in the feature name

"PrefLoopAlignmentLog2",
"5",
"Prefer 32-byte alignment for loops">;

//===----------------------------------------------------------------------===//

class GCNSubtargetFeatureGeneration <string Value,
Expand Down Expand Up @@ -1495,7 +1503,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
FeaturePrefLoopAlign32B
]
>;

Expand All @@ -1519,7 +1528,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeaturePrefLoopAlign32B
]
>;

Expand All @@ -1542,7 +1551,7 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureVmemWriteVgprInOrder
FeatureVmemWriteVgprInOrder, FeaturePrefLoopAlign32B
]
>;

Expand All @@ -1566,7 +1575,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
FeaturePrefLoopAlign32B
]
>;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setMaxAtomicSizeInBitsSupported(64);
setMaxDivRemBitWidthSupported(64);
setMaxLargeFPConvertBitWidthSupported(64);
setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopAlignment()));
}

bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class AMDGPUSubtarget {
unsigned LocalMemorySize = 0;
unsigned AddressableLocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
unsigned PrefLoopAlignmentLog2 = 0;

public:
AMDGPUSubtarget(Triple TT);
Expand Down Expand Up @@ -377,6 +378,8 @@ class AMDGPUSubtarget {
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;

unsigned getPrefLoopAlignment() const { return PrefLoopAlignmentLog2; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need a target feature for this? Can we just have the getter return the correct value based on gfx version directly below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefer target feature over a check for every gfx version esp as new versions show up

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm more wondering if this is a universal property that should have always been done. The prior comment says there's no benefit pre-gfx10, so why is this now needed for gfx9?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't see sufficient documentation to confirm this would benefit older targets, but another amdgpu compiler is doing this for all targets. So will make it universal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The prior comment says there's no benefit pre-gfx10, so why is this now needed for gfx9?

Yes, same doubt.


/// \returns Corresponding DWARF register number mapping flavour for the
/// \p WavefrontSize.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
Expand Down
61 changes: 40 additions & 21 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,14 @@ using namespace llvm::SDPatternMatch;

STATISTIC(NumTailCalls, "Number of tail calls");

static cl::opt<bool> DisableLoopAlignment("amdgpu-disable-loop-alignment",
cl::desc("Do not align loops"),
cl::init(false));

static cl::opt<bool>
DisableLoopAlignment("amdgpu-disable-loop-alignment",
cl::desc("Do not align and prefetch loops"),
cl::init(false));
DisableLoopAlignmentPrefetch("amdgpu-disable-loop-alignment-prefetch",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if that's a good idea to change existing options, as it might have already been used by someone else.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will change that, thanks

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nobody should be using backend defined cl::opts

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not used by "anyone" in the LLVM repo but who knows whether there would be downstream users using it directly. If there is no one even in the downstream, I'd just remove it.

cl::desc("Do not align and prefetch loops"),
cl::init(false));

static cl::opt<bool> UseDivergentRegisterIndexing(
"amdgpu-use-divergent-register-indexing", cl::Hidden,
Expand Down Expand Up @@ -17434,25 +17438,9 @@ Align SITargetLowering::computeKnownAlignForTargetInstr(
Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
const Align CacheLineAlign = Align(64);

// Pre-GFX10 target did not benefit from loop alignment
if (!ML || DisableLoopAlignment || !getSubtarget()->hasInstPrefetch() ||
getSubtarget()->hasInstFwdPrefetchBug())
return PrefAlign;

// On GFX10 I$ is 4 x 64 bytes cache lines.
// By default prefetcher keeps one cache line behind and reads two ahead.
// We can modify it with S_INST_PREFETCH for larger loops to have two lines
// behind and one ahead.
// Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
// If loop fits 64 bytes it always spans no more than two cache lines and
// does not need an alignment.
// Else if loop is less or equal 128 bytes we do not need to modify prefetch,
// Else if loop is less or equal 192 bytes we need two lines behind.

const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const MachineBasicBlock *Header = ML->getHeader();
if (Header->getAlignment() != PrefAlign)
if (DisableLoopAlignment || Header->getAlignment() > PrefAlign)
return Header->getAlignment(); // Already processed.

unsigned LoopSize = 0;
Expand All @@ -17465,10 +17453,41 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
for (const MachineInstr &MI : *MBB) {
LoopSize += TII->getInstSizeInBytes(MI);
if (LoopSize > 192)
return PrefAlign;
break;
}
}

// Pre-GFX10 targets did not benefit from loop alignment driven by prefetch
// considerations
if (!ML || DisableLoopAlignmentPrefetch ||
!getSubtarget()->hasInstPrefetch() ||
getSubtarget()->hasInstFwdPrefetchBug()) {
// Align loops < 32 bytes agrressively
if (LoopSize <= 32)
return PrefAlign;
// Align larger loops less aggressively
if (!ML->isInnermost())
return Header->getAlignment();
return (PrefAlign.value() > 1) ? Align(PrefAlign.value() >> 1) : PrefAlign;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rewrite this to use Align more directly, this is unnecessarily confusing

}

// On GFX10 I$ is 4 x 64 bytes cache lines.
// By default prefetcher keeps one cache line behind and reads two ahead.
// We can modify it with S_INST_PREFETCH for larger loops to have two lines
// behind and one ahead.
// Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
// If loop fits 64 bytes it always spans no more than two cache lines and
// does not need an alignment.
// Else if loop is less or equal 128 bytes we do not need to modify prefetch,
// Else if loop is less or equal 192 bytes we need two lines behind.

// Align larger loops less aggressively
if (LoopSize > 192) {
if (!ML->isInnermost())
return Header->getAlignment();
return (PrefAlign.value() > 1) ? Align(PrefAlign.value() >> 1) : PrefAlign;
}

if (LoopSize <= 64)
return PrefAlign;

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s

; Divergent phis that don't require lowering using lane mask merging

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s

; This file contains various tests that have divergent i1s used outside of
; the loop. These are lane masks is sgpr and need to have correct value in
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s

; Simples case, if - then, that requires lane mask merging,
; %phi lane mask will hold %val_A at %A. Lanes that are active in %B
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s

define void @temporal_divergent_i1_phi(float %val, ptr %addr) {
; GFX10-LABEL: temporal_divergent_i1_phi:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX10 %s

define void @temporal_divergent_i32(float %val, ptr %addr) {
; GFX10-LABEL: temporal_divergent_i32:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s

; Make sure the branch targets are correct after lowering llvm.amdgcn.if

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-32 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=GFX10-64 %s

define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
; SI-LABEL: static_exact:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=NEW_RBS %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck -check-prefix=NEW_RBS %s

; if instruction is uniform and there is available instruction, select SALU instruction
define amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck --check-prefix=GFX906 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -global-isel -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck --check-prefix=GFX906 %s

define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst) {
; GFX906-LABEL: v3i8_liveout:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -amdgpu-disable-loop-alignment=true -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
; RUN: llc -amdgpu-disable-loop-alignment=true -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s

; This testcase would fail on GFX908 due to not having a free VGPR available to
; copy between AGPRs.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
; RUN: llc -amdgpu-disable-loop-alignment=true -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10

declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
Expand Down
Loading