@@ -223,8 +223,9 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
223223 };
224224}
225225
226- static bool isRegisterSize (unsigned Size) {
227- return Size % 32 == 0 && Size <= MaxRegisterSize;
226+ static bool isRegisterSize (const GCNSubtarget &ST, unsigned Size) {
227+ return ((ST.useRealTrue16Insts () && Size == 16 ) || Size % 32 == 0 ) &&
228+ Size <= MaxRegisterSize;
228229}
229230
230231static bool isRegisterVectorElementType (LLT EltTy) {
@@ -240,8 +241,8 @@ static bool isRegisterVectorType(LLT Ty) {
240241}
241242
242243// TODO: replace all uses of isRegisterType with isRegisterClassType
243- static bool isRegisterType (LLT Ty) {
244- if (!isRegisterSize (Ty.getSizeInBits ()))
244+ static bool isRegisterType (const GCNSubtarget &ST, LLT Ty) {
245+ if (!isRegisterSize (ST, Ty.getSizeInBits ()))
245246 return false ;
246247
247248 if (Ty.isVector ())
@@ -252,19 +253,19 @@ static bool isRegisterType(LLT Ty) {
252253
253254// Any combination of 32 or 64-bit elements up the maximum register size, and
254255// multiples of v2s16.
255- static LegalityPredicate isRegisterType (unsigned TypeIdx) {
256- return [=](const LegalityQuery &Query) {
257- return isRegisterType (Query.Types [TypeIdx]);
256+ static LegalityPredicate isRegisterType (const GCNSubtarget &ST, unsigned TypeIdx) {
257+ return [=, &ST ](const LegalityQuery &Query) {
258+ return isRegisterType (ST, Query.Types [TypeIdx]);
258259 };
259260}
260261
261262// RegisterType that doesn't have a corresponding RegClass.
262263// TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
263264// should be removed.
264- static LegalityPredicate isIllegalRegisterType (unsigned TypeIdx) {
265- return [=](const LegalityQuery &Query) {
265+ static LegalityPredicate isIllegalRegisterType (const GCNSubtarget &ST, unsigned TypeIdx) {
266+ return [=, &ST ](const LegalityQuery &Query) {
266267 LLT Ty = Query.Types [TypeIdx];
267- return isRegisterType (Ty) &&
268+ return isRegisterType (ST, Ty) &&
268269 !SIRegisterInfo::getSGPRClassForBitWidth (Ty.getSizeInBits ());
269270 };
270271}
@@ -348,17 +349,19 @@ static std::initializer_list<LLT> AllS64Vectors = {V2S64, V3S64, V4S64, V5S64,
348349 V6S64, V7S64, V8S64, V16S64};
349350
350351// Checks whether a type is in the list of legal register types.
351- static bool isRegisterClassType (LLT Ty) {
352+ static bool isRegisterClassType (const GCNSubtarget &ST, LLT Ty) {
352353 if (Ty.isPointerOrPointerVector ())
353354 Ty = Ty.changeElementType (LLT::scalar (Ty.getScalarSizeInBits ()));
354355
355356 return is_contained (AllS32Vectors, Ty) || is_contained (AllS64Vectors, Ty) ||
356- is_contained (AllScalarTypes, Ty) || is_contained (AllS16Vectors, Ty);
357+ is_contained (AllScalarTypes, Ty) ||
358+ (ST.useRealTrue16Insts () && Ty == S16) ||
359+ is_contained (AllS16Vectors, Ty);
357360}
358361
359- static LegalityPredicate isRegisterClassType (unsigned TypeIdx) {
360- return [TypeIdx](const LegalityQuery &Query) {
361- return isRegisterClassType (Query.Types [TypeIdx]);
362+ static LegalityPredicate isRegisterClassType (const GCNSubtarget &ST, unsigned TypeIdx) {
363+ return [&ST, TypeIdx](const LegalityQuery &Query) {
364+ return isRegisterClassType (ST, Query.Types [TypeIdx]);
362365 };
363366}
364367
@@ -510,7 +513,7 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
510513
511514static bool isLoadStoreLegal (const GCNSubtarget &ST, const LegalityQuery &Query) {
512515 const LLT Ty = Query.Types [0 ];
513- return isRegisterType (Ty) && isLoadStoreSizeLegal (ST, Query) &&
516+ return isRegisterType (ST, Ty) && isLoadStoreSizeLegal (ST, Query) &&
514517 !hasBufferRsrcWorkaround (Ty) && !loadStoreBitcastWorkaround (Ty);
515518}
516519
@@ -523,12 +526,12 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
523526 if (Size != MemSizeInBits)
524527 return Size <= 32 && Ty.isVector ();
525528
526- if (loadStoreBitcastWorkaround (Ty) && isRegisterType (Ty))
529+ if (loadStoreBitcastWorkaround (Ty) && isRegisterType (ST, Ty))
527530 return true ;
528531
529532 // Don't try to handle bitcasting vector ext loads for now.
530533 return Ty.isVector () && (!MemTy.isVector () || MemTy == Ty) &&
531- (Size <= 32 || isRegisterSize (Size)) &&
534+ (Size <= 32 || isRegisterSize (ST, Size)) &&
532535 !isRegisterVectorElementType (Ty.getElementType ());
533536}
534537
@@ -875,7 +878,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
875878
876879 getActionDefinitionsBuilder (G_BITCAST)
877880 // Don't worry about the size constraint.
878- .legalIf (all (isRegisterClassType (0 ), isRegisterClassType (1 )))
881+ .legalIf (all (isRegisterClassType (ST, 0 ), isRegisterClassType (ST, 1 )))
879882 .lower ();
880883
881884 getActionDefinitionsBuilder (G_CONSTANT)
@@ -890,7 +893,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
890893 .clampScalar (0 , S16, S64);
891894
892895 getActionDefinitionsBuilder ({G_IMPLICIT_DEF, G_FREEZE})
893- .legalIf (isRegisterClassType (0 ))
896+ .legalIf (isRegisterClassType (ST, 0 ))
894897 // s1 and s16 are special cases because they have legal operations on
895898 // them, but don't really occupy registers in the normal way.
896899 .legalFor ({S1, S16})
@@ -1825,7 +1828,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
18251828 .clampMaxNumElements (VecTypeIdx, S32, 32 )
18261829 // TODO: Clamp elements for 64-bit vectors?
18271830 .moreElementsIf (
1828- isIllegalRegisterType (VecTypeIdx),
1831+ isIllegalRegisterType (ST, VecTypeIdx),
18291832 moreElementsToNextExistingRegClass (VecTypeIdx))
18301833 // It should only be necessary with variable indexes.
18311834 // As a last resort, lower to the stack
@@ -1883,7 +1886,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
18831886 .clampNumElements (0 , V2S64, V16S64)
18841887 .fewerElementsIf (isWideVec16 (0 ), changeTo (0 , V2S16))
18851888 .moreElementsIf (
1886- isIllegalRegisterType (0 ),
1889+ isIllegalRegisterType (ST, 0 ),
18871890 moreElementsToNextExistingRegClass (0 ));
18881891
18891892 if (ST.hasScalarPackInsts ()) {
@@ -1904,11 +1907,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19041907 .lower ();
19051908 }
19061909
1907- BuildVector.legalIf (isRegisterType (0 ));
1910+ BuildVector.legalIf (isRegisterType (ST, 0 ));
19081911
19091912 // FIXME: Clamp maximum size
19101913 getActionDefinitionsBuilder (G_CONCAT_VECTORS)
1911- .legalIf (all (isRegisterType (0 ), isRegisterType (1 )))
1914+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
19121915 .clampMaxNumElements (0 , S32, 32 )
19131916 .clampMaxNumElements (1 , S16, 2 ) // TODO: Make 4?
19141917 .clampMaxNumElements (0 , S16, 64 );
@@ -1933,7 +1936,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19331936 };
19341937
19351938 auto &Builder = getActionDefinitionsBuilder (Op)
1936- .legalIf (all (isRegisterType (0 ), isRegisterType (1 )))
1939+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
19371940 .lowerFor ({{S16, V2S16}})
19381941 .lowerIf ([=](const LegalityQuery &Query) {
19391942 const LLT BigTy = Query.Types [BigTyIdx];
@@ -3149,7 +3152,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
31493152 } else {
31503153 // Extract the subvector.
31513154
3152- if (isRegisterType (ValTy)) {
3155+ if (isRegisterType (ST, ValTy)) {
31533156 // If this a case where G_EXTRACT is legal, use it.
31543157 // (e.g. <3 x s32> -> <4 x s32>)
31553158 WideLoad = B.buildLoadFromOffset (WideTy, PtrReg, *MMO, 0 ).getReg (0 );
0 commit comments