@@ -1377,21 +1377,24 @@ bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
13771377 const LLT SrcLHSTy = MRI.getType (SrcLHS);
13781378 const LLT SrcRHSTy = MRI.getType (SrcRHS);
13791379
1380- // Handle bf16 vectors code assumes the input is <32 x bf16>, the LegalizerInfo makes
1381- // sure that the input is either padded or unmerged to <32 x bf16>.
1380+ // Handle bf16 vectors code assumes the input is <32 x bf16>, the
1381+ // LegalizerInfo makes sure that the input is either padded or unmerged to <32
1382+ // x bf16>.
13821383 if (isBF16Vector (SrcLHSTy) && isBF16Vector (SrcRHSTy)) {
13831384 // vector should be of size 32 asssert
1384- assert (SrcLHSTy.getNumElements () == 32 && SrcRHSTy.getNumElements () == 32 &&
1385- " Expected vector of size 32 for inputs of G_FADD/G_FSUB" );
1385+ assert (SrcLHSTy.getNumElements () == 32 && SrcRHSTy.getNumElements () == 32 &&
1386+ " Expected vector of size 32 for inputs of G_FADD/G_FSUB" );
13861387
1387- // Step 2: Convert bf16 vectors to f32 vectors using FPExt
1388- const LLT F32VecTy = LLT::fixed_vector (SrcLHSTy.getNumElements (), LLT::scalar (32 ));
1388+ // Step 1: Convert bf16 vectors to f32 vectors using FPExt
1389+ const LLT F32VecTy =
1390+ LLT::fixed_vector (SrcLHSTy.getNumElements (), LLT::scalar (32 ));
13891391 Register SrcLHSF32 = MRI.createGenericVirtualRegister (F32VecTy);
13901392 Register SrcRHSF32 = MRI.createGenericVirtualRegister (F32VecTy);
13911393 MIRBuilder.buildFPExt (SrcLHSF32, SrcLHS);
13921394 MIRBuilder.buildFPExt (SrcRHSF32, SrcRHS);
1393-
1394- // Step 3: Input is going to be <32 x bf16> pad it to <64 x f32> for AIE2P as AccV64S32 is legal on AIE2P.
1395+
1396+ // Step 2: Input is going to be <32 x bf16> pad it to <64 x f32> for AIE2P
1397+ // as AccV64S32 is legal on AIE2P.
13951398 if (ST.isAIE2P ()) {
13961399 const Register UndefVec = MIRBuilder.buildUndef (F32VecTy).getReg (0 );
13971400 const Register ConcatLHS = MRI.createGenericVirtualRegister (V64FP32);
@@ -1401,35 +1404,41 @@ bool AIELegalizerHelper::legalizeG_FADD_G_FSUB(LegalizerHelper &Helper,
14011404 SrcLHSF32 = ConcatLHS;
14021405 SrcRHSF32 = ConcatRHS;
14031406 }
1404-
1405- // Step 4: Perform the floating point operation
1406- Register Res = MIRBuilder.buildInstr (MI.getOpcode (), {MRI.getType (SrcLHSF32)}, {SrcLHSF32, SrcRHSF32}).getReg (0 );
1407-
1408- // Step 5: Handle accumulator conversion based on target
1407+
1408+ // Step 3: Perform the floating point operation
1409+ Register Res = MIRBuilder
1410+ .buildInstr (MI.getOpcode (), {MRI.getType (SrcLHSF32)},
1411+ {SrcLHSF32, SrcRHSF32})
1412+ .getReg (0 );
1413+
1414+ // Step 4: Handle accumulator conversion based on target
14091415 if (ST.isAIE2 ()) {
14101416 Res = MIRBuilder.buildBitcast (V8ACC64, Res).getReg (0 );
14111417 } else if (ST.isAIE2P ()) {
14121418 // Unmerge to get 2 vectors of <32xf32> as FADD/FSUB was done on <64xf32>
14131419 SmallVector<Register, 2 > UnmergedRegs;
14141420 const auto Unmerge = MIRBuilder.buildUnmerge (F32VecTy, Res);
14151421 getUnmergeResults (UnmergedRegs, *Unmerge);
1416- Res = UnmergedRegs[0 ]; // Take the first <32xf32> vector, other half is just zeros.
1422+ Res = UnmergedRegs[0 ]; // Take the first <32xf32> vector, other half is
1423+ // just zeros.
14171424 }
1418-
1419- // Step 6 : Convert back to bf16 using the truncation intrinsic
1425+
1426+ // Step 5 : Convert back to bf16 using the truncation intrinsic
14201427 const int VecSize = MRI.getType (Res).getSizeInBits ();
14211428 const LLT DstLLT = ST.isAIE2P () ? V32BF16 : V16BF16;
1422- Res = MIRBuilder.buildIntrinsic (getFpTrunc32ToBF16IntrID (ST, VecSize), {DstLLT}, true , false )
1429+ Res = MIRBuilder
1430+ .buildIntrinsic (getFpTrunc32ToBF16IntrID (ST, VecSize), {DstLLT},
1431+ true , false )
14231432 .addUse (Res)
14241433 .getReg (0 );
1425-
1434+
14261435 // Handle AIE2 padding
14271436 if (ST.isAIE2 ()) {
14281437 Res = emitPadUndefVector (MRI, MIRBuilder, V32BF16, Res);
14291438 }
1430-
1439+
14311440 MIRBuilder.buildCopy (DstReg, Res);
1432-
1441+
14331442 MI.eraseFromParent ();
14341443 return true ;
14351444 }
0 commit comments