@@ -253,7 +253,8 @@ static bool isRegisterType(const GCNSubtarget &ST, LLT Ty) {
253253
254254// Any combination of 32 or 64-bit elements up the maximum register size, and
255255// multiples of v2s16.
256- static LegalityPredicate isRegisterType (const GCNSubtarget &ST, unsigned TypeIdx) {
256+ static LegalityPredicate isRegisterType (const GCNSubtarget &ST,
257+ unsigned TypeIdx) {
257258 return [=, &ST](const LegalityQuery &Query) {
258259 return isRegisterType (ST, Query.Types [TypeIdx]);
259260 };
@@ -262,7 +263,8 @@ static LegalityPredicate isRegisterType(const GCNSubtarget &ST, unsigned TypeIdx
262263// RegisterType that doesn't have a corresponding RegClass.
263264// TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
264265// should be removed.
265- static LegalityPredicate isIllegalRegisterType (const GCNSubtarget &ST, unsigned TypeIdx) {
266+ static LegalityPredicate isIllegalRegisterType (const GCNSubtarget &ST,
267+ unsigned TypeIdx) {
266268 return [=, &ST](const LegalityQuery &Query) {
267269 LLT Ty = Query.Types [TypeIdx];
268270 return isRegisterType (ST, Ty) &&
@@ -356,10 +358,11 @@ static bool isRegisterClassType(const GCNSubtarget &ST, LLT Ty) {
356358 return is_contained (AllS32Vectors, Ty) || is_contained (AllS64Vectors, Ty) ||
357359 is_contained (AllScalarTypes, Ty) ||
358360 (ST.useRealTrue16Insts () && Ty == S16) ||
359- is_contained (AllS16Vectors, Ty);
361+ is_contained (AllS16Vectors, Ty);
360362}
361363
362- static LegalityPredicate isRegisterClassType (const GCNSubtarget &ST, unsigned TypeIdx) {
364+ static LegalityPredicate isRegisterClassType (const GCNSubtarget &ST,
365+ unsigned TypeIdx) {
363366 return [&ST, TypeIdx](const LegalityQuery &Query) {
364367 return isRegisterClassType (ST, Query.Types [TypeIdx]);
365368 };
@@ -1782,7 +1785,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
17821785 unsigned IdxTypeIdx = 2 ;
17831786
17841787 getActionDefinitionsBuilder (Op)
1785- .customIf ([=](const LegalityQuery &Query) {
1788+ .customIf ([=](const LegalityQuery &Query) {
17861789 const LLT EltTy = Query.Types [EltTypeIdx];
17871790 const LLT VecTy = Query.Types [VecTypeIdx];
17881791 const LLT IdxTy = Query.Types [IdxTypeIdx];
@@ -1803,36 +1806,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
18031806 IdxTy.getSizeInBits () == 32 &&
18041807 isLegalVecType;
18051808 })
1806- .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx), scalarOrEltNarrowerThan (VecTypeIdx, 32 )),
1807- bitcastToVectorElement32 (VecTypeIdx))
1808- // .bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
1809- .bitcastIf (
1810- all (sizeIsMultipleOf32 (VecTypeIdx), scalarOrEltWiderThan (VecTypeIdx, 64 )),
1811- [=](const LegalityQuery &Query) {
1812- // For > 64-bit element types, try to turn this into a 64-bit
1813- // element vector since we may be able to do better indexing
1814- // if this is scalar. If not, fall back to 32.
1815- const LLT EltTy = Query.Types [EltTypeIdx];
1816- const LLT VecTy = Query.Types [VecTypeIdx];
1817- const unsigned DstEltSize = EltTy.getSizeInBits ();
1818- const unsigned VecSize = VecTy.getSizeInBits ();
1819-
1820- const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32 ;
1821- return std::pair (
1822- VecTypeIdx,
1823- LLT::fixed_vector (VecSize / TargetEltSize, TargetEltSize));
1824- })
1825- .clampScalar (EltTypeIdx, S32, S64)
1826- .clampScalar (VecTypeIdx, S32, S64)
1827- .clampScalar (IdxTypeIdx, S32, S32)
1828- .clampMaxNumElements (VecTypeIdx, S32, 32 )
1829- // TODO: Clamp elements for 64-bit vectors?
1830- .moreElementsIf (
1831- isIllegalRegisterType (ST, VecTypeIdx),
1832- moreElementsToNextExistingRegClass (VecTypeIdx))
1833- // It should only be necessary with variable indexes.
1834- // As a last resort, lower to the stack
1835- .lower ();
1809+ .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx),
1810+ scalarOrEltNarrowerThan (VecTypeIdx, 32 )),
1811+ bitcastToVectorElement32 (VecTypeIdx))
1812+ // .bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
1813+ .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx),
1814+ scalarOrEltWiderThan (VecTypeIdx, 64 )),
1815+ [=](const LegalityQuery &Query) {
1816+ // For > 64-bit element types, try to turn this into a
1817+ // 64-bit element vector since we may be able to do better
1818+ // indexing if this is scalar. If not, fall back to 32.
1819+ const LLT EltTy = Query.Types [EltTypeIdx];
1820+ const LLT VecTy = Query.Types [VecTypeIdx];
1821+ const unsigned DstEltSize = EltTy.getSizeInBits ();
1822+ const unsigned VecSize = VecTy.getSizeInBits ();
1823+
1824+ const unsigned TargetEltSize =
1825+ DstEltSize % 64 == 0 ? 64 : 32 ;
1826+ return std::pair (VecTypeIdx,
1827+ LLT::fixed_vector (VecSize / TargetEltSize,
1828+ TargetEltSize));
1829+ })
1830+ .clampScalar (EltTypeIdx, S32, S64)
1831+ .clampScalar (VecTypeIdx, S32, S64)
1832+ .clampScalar (IdxTypeIdx, S32, S32)
1833+ .clampMaxNumElements (VecTypeIdx, S32, 32 )
1834+ // TODO: Clamp elements for 64-bit vectors?
1835+ .moreElementsIf (isIllegalRegisterType (ST, VecTypeIdx),
1836+ moreElementsToNextExistingRegClass (VecTypeIdx))
1837+ // It should only be necessary with variable indexes.
1838+ // As a last resort, lower to the stack
1839+ .lower ();
18361840 }
18371841
18381842 getActionDefinitionsBuilder (G_EXTRACT_VECTOR_ELT)
@@ -1879,15 +1883,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
18791883
18801884 }
18811885
1882- auto &BuildVector = getActionDefinitionsBuilder (G_BUILD_VECTOR)
1883- . legalForCartesianProduct (AllS32Vectors, {S32} )
1884- .legalForCartesianProduct (AllS64Vectors , {S64 })
1885- . clampNumElements ( 0 , V16S32, V32S32 )
1886- .clampNumElements (0 , V2S64, V16S64 )
1887- . fewerElementsIf ( isWideVec16 ( 0 ), changeTo ( 0 , V2S16) )
1888- . moreElementsIf (
1889- isIllegalRegisterType (ST, 0 ),
1890- moreElementsToNextExistingRegClass (0 ));
1886+ auto &BuildVector =
1887+ getActionDefinitionsBuilder (G_BUILD_VECTOR )
1888+ .legalForCartesianProduct (AllS32Vectors , {S32 })
1889+ . legalForCartesianProduct (AllS64Vectors, {S64} )
1890+ .clampNumElements (0 , V16S32, V32S32 )
1891+ . clampNumElements ( 0 , V2S64, V16S64 )
1892+ . fewerElementsIf ( isWideVec16 ( 0 ), changeTo ( 0 , V2S16))
1893+ . moreElementsIf ( isIllegalRegisterType (ST, 0 ),
1894+ moreElementsToNextExistingRegClass (0 ));
18911895
18921896 if (ST.hasScalarPackInsts ()) {
18931897 BuildVector
@@ -1911,10 +1915,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19111915
19121916 // FIXME: Clamp maximum size
19131917 getActionDefinitionsBuilder (G_CONCAT_VECTORS)
1914- .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1915- .clampMaxNumElements (0 , S32, 32 )
1916- .clampMaxNumElements (1 , S16, 2 ) // TODO: Make 4?
1917- .clampMaxNumElements (0 , S16, 64 );
1918+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1919+ .clampMaxNumElements (0 , S32, 32 )
1920+ .clampMaxNumElements (1 , S16, 2 ) // TODO: Make 4?
1921+ .clampMaxNumElements (0 , S16, 64 );
19181922
19191923 getActionDefinitionsBuilder (G_SHUFFLE_VECTOR).lower ();
19201924
@@ -1935,34 +1939,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19351939 return false ;
19361940 };
19371941
1938- auto &Builder = getActionDefinitionsBuilder (Op)
1939- .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1940- .lowerFor ({{S16, V2S16}})
1941- .lowerIf ([=](const LegalityQuery &Query) {
1942- const LLT BigTy = Query.Types [BigTyIdx];
1943- return BigTy.getSizeInBits () == 32 ;
1944- })
1945- // Try to widen to s16 first for small types.
1946- // TODO: Only do this on targets with legal s16 shifts
1947- .minScalarOrEltIf (scalarNarrowerThan (LitTyIdx, 16 ), LitTyIdx, S16)
1948- .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 16 )
1949- .moreElementsIf (isSmallOddVector (BigTyIdx), oneMoreElement (BigTyIdx))
1950- .fewerElementsIf (all (typeIs (0 , S16), vectorWiderThan (1 , 32 ),
1951- elementTypeIs (1 , S16)),
1952- changeTo (1 , V2S16))
1953- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
1954- // worth considering the multiples of 64 since 2*192 and 2*384 are not
1955- // valid.
1956- .clampScalar (LitTyIdx, S32, S512)
1957- .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 32 )
1958- // Break up vectors with weird elements into scalars
1959- .fewerElementsIf (
1960- [=](const LegalityQuery &Query) { return notValidElt (Query, LitTyIdx); },
1961- scalarize (0 ))
1962- .fewerElementsIf (
1963- [=](const LegalityQuery &Query) { return notValidElt (Query, BigTyIdx); },
1964- scalarize (1 ))
1965- .clampScalar (BigTyIdx, S32, MaxScalar);
1942+ auto &Builder =
1943+ getActionDefinitionsBuilder (Op)
1944+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1945+ .lowerFor ({{S16, V2S16}})
1946+ .lowerIf ([=](const LegalityQuery &Query) {
1947+ const LLT BigTy = Query.Types [BigTyIdx];
1948+ return BigTy.getSizeInBits () == 32 ;
1949+ })
1950+ // Try to widen to s16 first for small types.
1951+ // TODO: Only do this on targets with legal s16 shifts
1952+ .minScalarOrEltIf (scalarNarrowerThan (LitTyIdx, 16 ), LitTyIdx, S16)
1953+ .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 16 )
1954+ .moreElementsIf (isSmallOddVector (BigTyIdx),
1955+ oneMoreElement (BigTyIdx))
1956+ .fewerElementsIf (all (typeIs (0 , S16), vectorWiderThan (1 , 32 ),
1957+ elementTypeIs (1 , S16)),
1958+ changeTo (1 , V2S16))
1959+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's
1960+ // not worth considering the multiples of 64 since 2*192 and 2*384
1961+ // are not valid.
1962+ .clampScalar (LitTyIdx, S32, S512)
1963+ .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 32 )
1964+ // Break up vectors with weird elements into scalars
1965+ .fewerElementsIf (
1966+ [=](const LegalityQuery &Query) {
1967+ return notValidElt (Query, LitTyIdx);
1968+ },
1969+ scalarize (0 ))
1970+ .fewerElementsIf (
1971+ [=](const LegalityQuery &Query) {
1972+ return notValidElt (Query, BigTyIdx);
1973+ },
1974+ scalarize (1 ))
1975+ .clampScalar (BigTyIdx, S32, MaxScalar);
19661976
19671977 if (Op == G_MERGE_VALUES) {
19681978 Builder.widenScalarIf (
0 commit comments