@@ -120,14 +120,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
120120 .clampMaxNumElements (0 , s64, 2 )
121121 .clampMaxNumElements (0 , p0, 2 );
122122
123- getActionDefinitionsBuilder (G_BSWAP)
124- .legalFor ({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
125- .widenScalarOrEltToNextPow2 (0 , 16 )
123+ getActionDefinitionsBuilder (G_INSERT)
124+ .legalIf (all (typeInSet (0 , {s32, s64, p0}), typeInSet (1 , {s8, s16, s32}),
125+ smallerThan (1 , 0 )))
126+ .widenScalarToNextPow2 (0 )
126127 .clampScalar (0 , s32, s64)
127- .clampNumElements (0 , v4s16, v8s16)
128- .clampNumElements (0 , v2s32, v4s32)
129- .clampNumElements (0 , v2s64, v2s64)
130- .moreElementsToNextPow2 (0 );
128+ .widenScalarToNextPow2 (1 )
129+ .minScalar (1 , s8)
130+ .maxScalarIf (typeInSet (0 , {s32}), 1 , s16)
131+ .maxScalarIf (typeInSet (0 , {s64, p0}), 1 , s32);
132+
133+ getActionDefinitionsBuilder (G_EXTRACT)
134+ .legalIf (all (typeInSet (0 , {s16, s32, s64, p0}),
135+ typeInSet (1 , {s32, s64, s128, p0}), smallerThan (0 , 1 )))
136+ .widenScalarToNextPow2 (1 )
137+ .clampScalar (1 , s32, s128)
138+ .widenScalarToNextPow2 (0 )
139+ .minScalar (0 , s16)
140+ .maxScalarIf (typeInSet (1 , {s32}), 0 , s16)
141+ .maxScalarIf (typeInSet (1 , {s64, p0}), 0 , s32)
142+ .maxScalarIf (typeInSet (1 , {s128}), 0 , s64);
131143
132144 getActionDefinitionsBuilder ({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133145 .legalFor ({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
@@ -253,13 +265,112 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
253265 .clampNumElements (0 , v2s32, v4s32)
254266 .lower ();
255267
268+ // FIXME: Legal vector types are only legal with NEON.
269+ getActionDefinitionsBuilder (G_ABS)
270+ .legalFor (HasCSSC, {s32, s64})
271+ .legalFor (PackedVectorAllTypeList)
272+ .customIf ([=](const LegalityQuery &Q) {
273+ // TODO: Fix suboptimal codegen for 128+ bit types.
274+ LLT SrcTy = Q.Types [0 ];
275+ return SrcTy.isScalar () && SrcTy.getSizeInBits () < 128 ;
276+ })
277+ .widenScalarIf (
278+ [=](const LegalityQuery &Query) { return Query.Types [0 ] == v4s8; },
279+ [=](const LegalityQuery &Query) { return std::make_pair (0 , v4s16); })
280+ .widenScalarIf (
281+ [=](const LegalityQuery &Query) { return Query.Types [0 ] == v2s16; },
282+ [=](const LegalityQuery &Query) { return std::make_pair (0 , v2s32); })
283+ .clampNumElements (0 , v8s8, v16s8)
284+ .clampNumElements (0 , v4s16, v8s16)
285+ .clampNumElements (0 , v2s32, v4s32)
286+ .clampNumElements (0 , v2s64, v2s64)
287+ .moreElementsToNextPow2 (0 )
288+ .lower ();
289+
256290 getActionDefinitionsBuilder (
257291 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
258292 .legalFor ({{s32, s32}, {s64, s32}})
259293 .clampScalar (0 , s32, s64)
260294 .clampScalar (1 , s32, s64)
261295 .widenScalarToNextPow2 (0 );
262296
297+ getActionDefinitionsBuilder ({G_FSHL, G_FSHR})
298+ .customFor ({{s32, s32}, {s32, s64}, {s64, s64}})
299+ .lower ();
300+
301+ getActionDefinitionsBuilder (G_ROTR)
302+ .legalFor ({{s32, s64}, {s64, s64}})
303+ .customIf ([=](const LegalityQuery &Q) {
304+ return Q.Types [0 ].isScalar () && Q.Types [1 ].getScalarSizeInBits () < 64 ;
305+ })
306+ .lower ();
307+ getActionDefinitionsBuilder (G_ROTL).lower ();
308+
309+ getActionDefinitionsBuilder ({G_SBFX, G_UBFX})
310+ .customFor ({{s32, s32}, {s64, s64}});
311+
312+ auto always = [=](const LegalityQuery &Q) { return true ; };
313+ getActionDefinitionsBuilder (G_CTPOP)
314+ .legalFor (HasCSSC, {{s32, s32}, {s64, s64}})
315+ .legalFor ({{v8s8, v8s8}, {v16s8, v16s8}})
316+ .customFor (!HasCSSC, {{s32, s32}, {s64, s64}})
317+ .customFor ({{s128, s128},
318+ {v4s16, v4s16},
319+ {v8s16, v8s16},
320+ {v2s32, v2s32},
321+ {v4s32, v4s32},
322+ {v2s64, v2s64}})
323+ .clampScalar (0 , s32, s128)
324+ .widenScalarToNextPow2 (0 )
325+ .minScalarEltSameAsIf (always, 1 , 0 )
326+ .maxScalarEltSameAsIf (always, 1 , 0 );
327+
328+ getActionDefinitionsBuilder (G_CTLZ)
329+ .legalForCartesianProduct (
330+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
331+ .scalarize (1 )
332+ .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
333+ .clampScalar (1 , s32, s64)
334+ .scalarSameSizeAs (0 , 1 );
335+ getActionDefinitionsBuilder (G_CTLZ_ZERO_UNDEF).lower ();
336+
337+ getActionDefinitionsBuilder (G_CTTZ)
338+ .lowerIf (isVector (0 ))
339+ .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
340+ .clampScalar (1 , s32, s64)
341+ .scalarSameSizeAs (0 , 1 )
342+ .legalFor (HasCSSC, {s32, s64})
343+ .customFor (!HasCSSC, {s32, s64});
344+
345+ getActionDefinitionsBuilder (G_CTTZ_ZERO_UNDEF).lower ();
346+
347+ // TODO: Custom lowering for v2s32, v4s32, v2s64.
348+ getActionDefinitionsBuilder (G_BITREVERSE)
349+ .legalFor ({s32, s64, v8s8, v16s8})
350+ .widenScalarToNextPow2 (0 , /* Min = */ 32 )
351+ .clampScalar (0 , s32, s64)
352+ .lower ();
353+
354+ getActionDefinitionsBuilder (G_BSWAP)
355+ .legalFor ({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
356+ .widenScalarOrEltToNextPow2 (0 , 16 )
357+ .clampScalar (0 , s32, s64)
358+ .clampNumElements (0 , v4s16, v8s16)
359+ .clampNumElements (0 , v2s32, v4s32)
360+ .clampNumElements (0 , v2s64, v2s64)
361+ .moreElementsToNextPow2 (0 );
362+
363+ getActionDefinitionsBuilder ({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
364+ .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
365+ .legalFor (HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
366+ .clampNumElements (0 , v8s8, v16s8)
367+ .clampNumElements (0 , v4s16, v8s16)
368+ .clampNumElements (0 , v2s32, v4s32)
369+ .clampMaxNumElements (0 , s64, 2 )
370+ .scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
371+ .moreElementsToNextPow2 (0 )
372+ .lower ();
373+
263374 getActionDefinitionsBuilder (
264375 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
265376 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
@@ -309,27 +420,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
309420 .minScalar (0 , s32)
310421 .libcallFor ({{s32, s32}, {s64, s32}, {s128, s32}});
311422
312- getActionDefinitionsBuilder (G_INSERT)
313- .legalIf (all (typeInSet (0 , {s32, s64, p0}),
314- typeInSet (1 , {s8, s16, s32}), smallerThan (1 , 0 )))
315- .widenScalarToNextPow2 (0 )
316- .clampScalar (0 , s32, s64)
317- .widenScalarToNextPow2 (1 )
318- .minScalar (1 , s8)
319- .maxScalarIf (typeInSet (0 , {s32}), 1 , s16)
320- .maxScalarIf (typeInSet (0 , {s64, p0}), 1 , s32);
423+ // TODO: Libcall support for s128.
424+ // TODO: s16 should be legal with full FP16 support.
425+ getActionDefinitionsBuilder ({G_LROUND, G_LLROUND})
426+ .legalFor ({{s64, s32}, {s64, s64}});
321427
322- getActionDefinitionsBuilder (G_EXTRACT)
323- .legalIf (all (typeInSet (0 , {s16, s32, s64, p0}),
324- typeInSet (1 , {s32, s64, s128, p0}), smallerThan (0 , 1 )))
325- .widenScalarToNextPow2 (1 )
326- .clampScalar (1 , s32, s128)
327- .widenScalarToNextPow2 (0 )
328- .minScalar (0 , s16)
329- .maxScalarIf (typeInSet (1 , {s32}), 0 , s16)
330- .maxScalarIf (typeInSet (1 , {s64, p0}), 0 , s32)
331- .maxScalarIf (typeInSet (1 , {s128}), 0 , s64);
428+ // TODO: Custom legalization for mismatched types.
429+ getActionDefinitionsBuilder (G_FCOPYSIGN)
430+ .moreElementsIf (
431+ [](const LegalityQuery &Query) { return Query.Types [0 ].isScalar (); },
432+ [=](const LegalityQuery &Query) {
433+ const LLT Ty = Query.Types [0 ];
434+ return std::pair (0 , LLT::fixed_vector (Ty == s16 ? 4 : 2 , Ty));
435+ })
436+ .lower ();
332437
438+ getActionDefinitionsBuilder (G_FMAD).lower ();
333439
334440 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
335441 auto &Actions = getActionDefinitionsBuilder (Op);
@@ -1035,32 +1141,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10351141
10361142 getActionDefinitionsBuilder (G_BUILD_VECTOR_TRUNC).lower ();
10371143
1038- getActionDefinitionsBuilder (G_CTLZ)
1039- .legalForCartesianProduct (
1040- {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1041- .scalarize (1 )
1042- .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
1043- .clampScalar (1 , s32, s64)
1044- .scalarSameSizeAs (0 , 1 );
1045- getActionDefinitionsBuilder (G_CTLZ_ZERO_UNDEF).lower ();
1046-
1047- // TODO: Custom lowering for v2s32, v4s32, v2s64.
1048- getActionDefinitionsBuilder (G_BITREVERSE)
1049- .legalFor ({s32, s64, v8s8, v16s8})
1050- .widenScalarToNextPow2 (0 , /* Min = */ 32 )
1051- .clampScalar (0 , s32, s64)
1052- .lower ();
1053-
1054- getActionDefinitionsBuilder (G_CTTZ_ZERO_UNDEF).lower ();
1055-
1056- getActionDefinitionsBuilder (G_CTTZ)
1057- .lowerIf (isVector (0 ))
1058- .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
1059- .clampScalar (1 , s32, s64)
1060- .scalarSameSizeAs (0 , 1 )
1061- .legalFor (HasCSSC, {s32, s64})
1062- .customFor (!HasCSSC, {s32, s64});
1063-
10641144 getActionDefinitionsBuilder (G_SHUFFLE_VECTOR)
10651145 .legalIf ([=](const LegalityQuery &Query) {
10661146 const LLT &DstTy = Query.Types [0 ];
@@ -1122,6 +1202,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
11221202 SrcTy.getNumElements ())));
11231203 });
11241204
1205+ getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
1206+ .legalFor ({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1207+ .widenScalarOrEltToNextPow2 (0 )
1208+ .immIdx (0 ); // Inform verifier imm idx 0 is handled.
1209+
1210+ // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1211+ getActionDefinitionsBuilder (G_SPLAT_VECTOR)
1212+ .legalFor (HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1213+
11251214 getActionDefinitionsBuilder (G_JUMP_TABLE).legalFor ({p0});
11261215
11271216 getActionDefinitionsBuilder (G_BRJT).legalFor ({{p0, s64}});
@@ -1153,28 +1242,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
11531242 .libcall ();
11541243 }
11551244
1156- // FIXME: Legal vector types are only legal with NEON.
1157- getActionDefinitionsBuilder (G_ABS)
1158- .legalFor (HasCSSC, {s32, s64})
1159- .legalFor (PackedVectorAllTypeList)
1160- .customIf ([=](const LegalityQuery &Q) {
1161- // TODO: Fix suboptimal codegen for 128+ bit types.
1162- LLT SrcTy = Q.Types [0 ];
1163- return SrcTy.isScalar () && SrcTy.getSizeInBits () < 128 ;
1164- })
1165- .widenScalarIf (
1166- [=](const LegalityQuery &Query) { return Query.Types [0 ] == v4s8; },
1167- [=](const LegalityQuery &Query) { return std::make_pair (0 , v4s16); })
1168- .widenScalarIf (
1169- [=](const LegalityQuery &Query) { return Query.Types [0 ] == v2s16; },
1170- [=](const LegalityQuery &Query) { return std::make_pair (0 , v2s32); })
1171- .clampNumElements (0 , v8s8, v16s8)
1172- .clampNumElements (0 , v4s16, v8s16)
1173- .clampNumElements (0 , v2s32, v4s32)
1174- .clampNumElements (0 , v2s64, v2s64)
1175- .moreElementsToNextPow2 (0 )
1176- .lower ();
1177-
11781245 // For fadd reductions we have pairwise operations available. We treat the
11791246 // usual legal types as legal and handle the lowering to pairwise instructions
11801247 // later.
@@ -1284,65 +1351,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
12841351 // TODO: Update this to correct handling when adding AArch64/SVE support.
12851352 getActionDefinitionsBuilder (G_VECTOR_COMPRESS).lower ();
12861353
1287- getActionDefinitionsBuilder ({G_FSHL, G_FSHR})
1288- .customFor ({{s32, s32}, {s32, s64}, {s64, s64}})
1289- .lower ();
1290-
1291- getActionDefinitionsBuilder (G_ROTR)
1292- .legalFor ({{s32, s64}, {s64, s64}})
1293- .customIf ([=](const LegalityQuery &Q) {
1294- return Q.Types [0 ].isScalar () && Q.Types [1 ].getScalarSizeInBits () < 64 ;
1295- })
1296- .lower ();
1297- getActionDefinitionsBuilder (G_ROTL).lower ();
1298-
1299- getActionDefinitionsBuilder ({G_SBFX, G_UBFX})
1300- .customFor ({{s32, s32}, {s64, s64}});
1301-
1302- auto always = [=](const LegalityQuery &Q) { return true ; };
1303- getActionDefinitionsBuilder (G_CTPOP)
1304- .legalFor (HasCSSC, {{s32, s32}, {s64, s64}})
1305- .legalFor ({{v8s8, v8s8}, {v16s8, v16s8}})
1306- .customFor (!HasCSSC, {{s32, s32}, {s64, s64}})
1307- .customFor ({{s128, s128},
1308- {v2s32, v2s32},
1309- {v4s32, v4s32},
1310- {v4s16, v4s16},
1311- {v8s16, v8s16},
1312- {v2s64, v2s64}})
1313- .clampScalar (0 , s32, s128)
1314- .widenScalarToNextPow2 (0 )
1315- .minScalarEltSameAsIf (always, 1 , 0 )
1316- .maxScalarEltSameAsIf (always, 1 , 0 );
1317-
1318- getActionDefinitionsBuilder ({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1319- .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
1320- .legalFor (HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
1321- .clampNumElements (0 , v8s8, v16s8)
1322- .clampNumElements (0 , v4s16, v8s16)
1323- .clampNumElements (0 , v2s32, v4s32)
1324- .clampMaxNumElements (0 , s64, 2 )
1325- .scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
1326- .moreElementsToNextPow2 (0 )
1327- .lower ();
1328-
1329- // TODO: Libcall support for s128.
1330- // TODO: s16 should be legal with full FP16 support.
1331- getActionDefinitionsBuilder ({G_LROUND, G_LLROUND})
1332- .legalFor ({{s64, s32}, {s64, s64}});
1333-
1334- // TODO: Custom legalization for mismatched types.
1335- getActionDefinitionsBuilder (G_FCOPYSIGN)
1336- .moreElementsIf (
1337- [](const LegalityQuery &Query) { return Query.Types [0 ].isScalar (); },
1338- [=](const LegalityQuery &Query) {
1339- const LLT Ty = Query.Types [0 ];
1340- return std::pair (0 , LLT::fixed_vector (Ty == s16 ? 4 : 2 , Ty));
1341- })
1342- .lower ();
1343-
1344- getActionDefinitionsBuilder (G_FMAD).lower ();
1345-
13461354 // Access to floating-point environment.
13471355 getActionDefinitionsBuilder ({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
13481356 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
@@ -1354,15 +1362,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
13541362
13551363 getActionDefinitionsBuilder ({G_SCMP, G_UCMP}).lower ();
13561364
1357- getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
1358- .legalFor ({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1359- .widenScalarOrEltToNextPow2 (0 )
1360- .immIdx (0 ); // Inform verifier imm idx 0 is handled.
1361-
1362- // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1363- getActionDefinitionsBuilder (G_SPLAT_VECTOR)
1364- .legalFor (HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1365-
13661365 getLegacyLegalizerInfo ().computeTables ();
13671366 verify (*ST.getInstrInfo ());
13681367}
0 commit comments