diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 50da8fd7a47a1..3d0691d5d8179 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -137,6 +138,14 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { }; } +// Retrieves the scalar type that's the same size as the mem desc +static LegalizeMutation getScalarTypeFromMemDesc(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); + return std::make_pair(TypeIdx, LLT::scalar(MemSize)); + }; +} + // Increase the number of vector elements to reach the next legal RegClass. static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { @@ -384,6 +393,16 @@ static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) { }; } +// If we have a truncating store or an extending load with a data size larger +// than 32-bits and mem location is a power of 2 +static LegalityPredicate isTruncStoreToSizePowerOf2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); + return isWideScalarExtLoadTruncStore(TypeIdx)(Query) && + isPowerOf2_64(MemSize); + }; +} + // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we // handle some operations by just promoting the register during // selection. There are also d16 loads on GFX9+ which preserve the high bits. @@ -1635,11 +1654,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // May need relegalization for the scalars. return std::pair(0, EltTy); }) - .minScalar(0, S32) - .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32)) - .widenScalarToNextPow2(0) - .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) - .lower(); + .minScalar(0, S32) + .narrowScalarIf(isTruncStoreToSizePowerOf2(0), + getScalarTypeFromMemDesc(0)) + .widenScalarToNextPow2(0) + .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) + .lower(); } // FIXME: Unaligned accesses not lowered. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 2b84c6bcba7b5..acbcb098e8367 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -886,33 +886,34 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]] ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32) ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C6]](s64) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]] - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C6]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) @@ -922,11 +923,12 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) @@ -936,22 +938,23 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64) ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) @@ -960,11 +963,11 @@ body: | ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) - ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C4]](s16) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C5]](s64) - ; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) + ; VI-NEXT: G_STORE [[TRUNC4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) ; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; @@ -973,11 +976,12 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) @@ -998,17 +1002,18 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -1018,11 +1023,12 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -1032,17 +1038,18 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) @@ -1052,11 +1059,12 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) ; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index a931c6366c403..7fd23197a5dd6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -285,13 +285,13 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C]](s16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) + ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll new file mode 100644 index 0000000000000..0aa08cc2b1d6f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll @@ -0,0 +1,224 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 -global-isel=true -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s + +define void @store_i48(ptr addrspace(1) %ptr, i48 %arg) #0 { + ; UNPACKED-LABEL: name: store_i48 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C]](s32) + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64) + ; UNPACKED-NEXT: G_STORE [[COPY2]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i48 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i55(ptr addrspace(1) %ptr, i55 %arg) #0 { + ; UNPACKED-LABEL: name: store_i55 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36028797018963967 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]] + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND]](s64) + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C1]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C2]](s64) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C3]](s32) + ; UNPACKED-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 6, align 2, basealign 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i55 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i56(ptr addrspace(1) %ptr, i56 %arg) #0 { + ; UNPACKED-LABEL: name: store_i56 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C]](s32) + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64) + ; UNPACKED-NEXT: G_STORE [[COPY2]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C2]](s32) + ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1) + ; UNPACKED-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 6, align 2, basealign 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i56 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i65(ptr addrspace(1) %ptr, i65 %arg) #0 { + ; UNPACKED-LABEL: name: store_i65 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[DEF]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C2]](s64) + ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s8) into %ir.ptr + 8, align 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i65 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i95(ptr addrspace(1) %ptr, i95 %arg) #0 { + ; UNPACKED-LABEL: name: store_i95 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[DEF]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2147483647 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C2]](s64) + ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i95 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i96(ptr addrspace(1) %ptr, i96 %arg) #0 { + ; UNPACKED-LABEL: name: store_i96 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[MV1]](s96) + ; UNPACKED-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[MV]](p1) :: (store (<3 x s32>) into %ir.ptr, align 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i96 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i97(ptr addrspace(1) %ptr, i97 %arg) #0 { + ; UNPACKED-LABEL: name: store_i97 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[MV]], [[C2]](s64) + ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1) + ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C3]](s32) + ; UNPACKED-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1) + ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; UNPACKED-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 12, align 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i97 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +define void @store_i127(ptr addrspace(1) %ptr, i127 %arg) #0 { + ; UNPACKED-LABEL: name: store_i127 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]] + ; UNPACKED-NEXT: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[MV3]](s128) + ; UNPACKED-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[MV]](p1) :: (store (<4 x s32>) into %ir.ptr, align 4, addrspace 1) + ; UNPACKED-NEXT: SI_RETURN + store i127 %arg, ptr addrspace(1) %ptr, align 4 + ret void +} + +attributes #0 = { nounwind }