@@ -2785,10 +2785,18 @@ static SDValue expandFSH64(SDValue A, SDValue B, SDValue ShiftAmount, SDLoc DL,
27852785 SDValue BHi = UnpackB.getValue (1 );
27862786
27872787 // The bitfeild consists of { AHi : ALo : BHi : BLo }
2788- // FSHL, Amt < 32 - The window will contain { AHi : ALo : BHi }
2789- // FSHL, Amt >= 32 - The window will contain { ALo : BHi : BLo }
2790- // FSHR, Amt < 32 - The window will contain { ALo : BHi : BLo }
2791- // FSHR, Amt >= 32 - The window will contain { AHi : ALo : BHi }
2788+ //
2789+ // * FSHL, Amt < 32 - The window will contain { AHi : ALo : BHi }
2790+ // * FSHL, Amt >= 32 - The window will contain { ALo : BHi : BLo }
2791+ // * FSHR, Amt < 32 - The window will contain { ALo : BHi : BLo }
2792+ // * FSHR, Amt >= 32 - The window will contain { AHi : ALo : BHi }
2793+ //
2794+ // Note that Amt = 0 and Amt = 32 are special cases where 32-bit funnel shifts
2795+ // are not needed at all. Amt = 0 is a no-op producing either A or B depending
2796+ // on the direction. Amt = 32 can be implemented by a packing and unpacking
2797+ // move to select and arrange the 32bit values. For simplicity, these cases
2798+ // are not handled here explicitly and instead we rely on DAGCombiner to
2799+ // remove the no-op funnel shifts we insert.
27922800 auto [High, Mid, Low] = ((Opcode == ISD::FSHL) == (Amt < 32 ))
27932801 ? std::make_tuple (AHi, ALo, BHi)
27942802 : std::make_tuple (ALo, BHi, BLo);
0 commit comments