@@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
108108 unsigned Opcode) const ;
109109
110110 bool selectFirstBitSet64 (Register ResVReg, const SPIRVType *ResType,
111- MachineInstr &I, unsigned BitSetOpcode ,
112- bool SwapPrimarySide) const ;
111+ MachineInstr &I, Register SrcReg ,
112+ unsigned BitSetOpcode, bool SwapPrimarySide) const ;
113113
114114 bool selectGlobalValue (Register ResVReg, MachineInstr &I,
115115 const MachineInstr *Init = nullptr ) const ;
@@ -3171,23 +3171,116 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
31713171 .constrainAllUses (TII, TRI, RBI);
31723172}
31733173
3174- bool SPIRVInstructionSelector::selectFirstBitSet64 (Register ResVReg,
3175- const SPIRVType *ResType,
3176- MachineInstr &I,
3177- unsigned BitSetOpcode,
3178- bool SwapPrimarySide) const {
3179- Register OpReg = I.getOperand (2 ).getReg ();
3180-
3181- // 1. Split int64 into 2 pieces using a bitcast
3174+ bool SPIRVInstructionSelector::selectFirstBitSet64 (
3175+ Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
3176+ Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
31823177 unsigned ComponentCount = GR.getScalarOrVectorComponentCount (ResType);
31833178 SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType (ResType);
3179+ bool ZeroAsNull = STI.isOpenCLEnv ();
3180+ Register ConstIntZero =
3181+ GR.getOrCreateConstInt (0 , I, BaseType, TII, ZeroAsNull);
3182+ Register ConstIntOne =
3183+ GR.getOrCreateConstInt (1 , I, BaseType, TII, ZeroAsNull);
3184+
3185+ // SPIRV doesn't support vectors with more than 4 components. Since the
3186+ // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
3187+ // operate on vectors with 2 or less components. When largers vectors are
3188+ // seen. Split them, recurse, then recombine them.
3189+ if (ComponentCount > 2 ) {
3190+ unsigned LeftComponentCount = ComponentCount / 2 ;
3191+ unsigned RightComponentCount = ComponentCount - LeftComponentCount;
3192+ bool LeftIsVector = LeftComponentCount > 1 ;
3193+
3194+ // Split the SrcReg in half into 2 smaller vec registers
3195+ // (ie i64x4 -> i64x2, i64x2)
3196+ MachineIRBuilder MIRBuilder (I);
3197+ SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType (64 , MIRBuilder);
3198+ SPIRVType *LeftVecOpType;
3199+ SPIRVType *LeftVecResType;
3200+ if (LeftIsVector) {
3201+ LeftVecOpType =
3202+ GR.getOrCreateSPIRVVectorType (OpType, LeftComponentCount, MIRBuilder);
3203+ LeftVecResType = GR.getOrCreateSPIRVVectorType (
3204+ BaseType, LeftComponentCount, MIRBuilder);
3205+ } else {
3206+ LeftVecOpType = OpType;
3207+ LeftVecResType = BaseType;
3208+ }
3209+
3210+ SPIRVType *RightVecOpType =
3211+ GR.getOrCreateSPIRVVectorType (OpType, RightComponentCount, MIRBuilder);
3212+ SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType (
3213+ BaseType, RightComponentCount, MIRBuilder);
3214+
3215+ Register LeftSideIn =
3216+ MRI->createVirtualRegister (GR.getRegClass (LeftVecOpType));
3217+ Register RightSideIn =
3218+ MRI->createVirtualRegister (GR.getRegClass (RightVecOpType));
3219+
3220+ bool Result;
3221+
3222+ if (LeftIsVector) {
3223+ auto MIB =
3224+ BuildMI (*I.getParent (), I, I.getDebugLoc (),
3225+ TII.get (SPIRV::OpVectorShuffle))
3226+ .addDef (LeftSideIn)
3227+ .addUse (GR.getSPIRVTypeID (LeftVecOpType))
3228+ .addUse (SrcReg)
3229+ // Per the spec, repeat the vector if only one vec is needed
3230+ .addUse (SrcReg);
3231+
3232+ for (unsigned J = 0 ; J < LeftComponentCount; J++) {
3233+ MIB.addImm (J);
3234+ }
3235+
3236+ Result = MIB.constrainAllUses (TII, TRI, RBI);
3237+ } else {
3238+ Result =
3239+ selectOpWithSrcs (LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
3240+ SPIRV::OpVectorExtractDynamic);
3241+ }
3242+
3243+ auto MIB = BuildMI (*I.getParent (), I, I.getDebugLoc (),
3244+ TII.get (SPIRV::OpVectorShuffle))
3245+ .addDef (RightSideIn)
3246+ .addUse (GR.getSPIRVTypeID (RightVecOpType))
3247+ .addUse (SrcReg)
3248+ // Per the spec, repeat the vector if only one vec is needed
3249+ .addUse (SrcReg);
3250+
3251+ for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
3252+ MIB.addImm (J);
3253+ }
3254+
3255+ Result = Result && MIB.constrainAllUses (TII, TRI, RBI);
3256+
3257+ // Recursively call selectFirstBitSet64 on the 2 registers
3258+ Register LeftSideOut =
3259+ MRI->createVirtualRegister (GR.getRegClass (LeftVecResType));
3260+ Register RightSideOut =
3261+ MRI->createVirtualRegister (GR.getRegClass (RightVecResType));
3262+ Result = Result &&
3263+ selectFirstBitSet64 (LeftSideOut, LeftVecResType, I, LeftSideIn,
3264+ BitSetOpcode, SwapPrimarySide);
3265+ Result = Result &&
3266+ selectFirstBitSet64 (RightSideOut, RightVecResType, I, RightSideIn,
3267+ BitSetOpcode, SwapPrimarySide);
3268+
3269+ // Join the two resulting registers back into the return type
3270+ // (ie i32x2, i32x2 -> i32x4)
3271+ return Result &&
3272+ selectOpWithSrcs (ResVReg, ResType, I, {LeftSideOut, RightSideOut},
3273+ SPIRV::OpCompositeConstruct);
3274+ }
3275+
3276+ // 1. Split int64 into 2 pieces using a bitcast
31843277 MachineIRBuilder MIRBuilder (I);
31853278 SPIRVType *PostCastType =
31863279 GR.getOrCreateSPIRVVectorType (BaseType, 2 * ComponentCount, MIRBuilder);
31873280 Register BitcastReg =
31883281 MRI->createVirtualRegister (GR.getRegClass (PostCastType));
31893282 bool Result =
3190- selectOpWithSrcs (BitcastReg, PostCastType, I, {OpReg }, SPIRV::OpBitcast);
3283+ selectOpWithSrcs (BitcastReg, PostCastType, I, {SrcReg }, SPIRV::OpBitcast);
31913284
31923285 // 2. Find the first set bit from the primary side for all the pieces in #1
31933286 Register FBSReg = MRI->createVirtualRegister (GR.getRegClass (PostCastType));
@@ -3198,20 +3291,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
31983291 Register HighReg = MRI->createVirtualRegister (GR.getRegClass (ResType));
31993292 Register LowReg = MRI->createVirtualRegister (GR.getRegClass (ResType));
32003293
3201- bool ZeroAsNull = STI.isOpenCLEnv ();
32023294 bool IsScalarRes = ResType->getOpcode () != SPIRV::OpTypeVector;
32033295 if (IsScalarRes) {
32043296 // if scalar do a vector extract
3205- Result = Result &&
3206- selectOpWithSrcs (HighReg, ResType, I,
3207- {FBSReg, GR.getOrCreateConstInt (0 , I, ResType,
3208- TII, ZeroAsNull)},
3209- SPIRV::OpVectorExtractDynamic);
3210- Result = Result &&
3211- selectOpWithSrcs (LowReg, ResType, I,
3212- {FBSReg, GR.getOrCreateConstInt (1 , I, ResType,
3213- TII, ZeroAsNull)},
3214- SPIRV::OpVectorExtractDynamic);
3297+ Result =
3298+ Result && selectOpWithSrcs (HighReg, ResType, I, {FBSReg, ConstIntZero},
3299+ SPIRV::OpVectorExtractDynamic);
3300+ Result =
3301+ Result && selectOpWithSrcs (LowReg, ResType, I, {FBSReg, ConstIntOne},
3302+ SPIRV::OpVectorExtractDynamic);
32153303 } else {
32163304 // if vector do a shufflevector
32173305 auto MIB = BuildMI (*I.getParent (), I, I.getDebugLoc (),
@@ -3324,7 +3412,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
33243412 case 32 :
33253413 return selectFirstBitSet32 (ResVReg, ResType, I, OpReg, BitSetOpcode);
33263414 case 64 :
3327- return selectFirstBitSet64 (ResVReg, ResType, I, BitSetOpcode,
3415+ return selectFirstBitSet64 (ResVReg, ResType, I, OpReg, BitSetOpcode,
33283416 /* SwapPrimarySide=*/ false );
33293417 default :
33303418 report_fatal_error (
@@ -3350,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
33503438 case 32 :
33513439 return selectFirstBitSet32 (ResVReg, ResType, I, OpReg, BitSetOpcode);
33523440 case 64 :
3353- return selectFirstBitSet64 (ResVReg, ResType, I, BitSetOpcode,
3441+ return selectFirstBitSet64 (ResVReg, ResType, I, OpReg, BitSetOpcode,
33543442 /* SwapPrimarySide=*/ true );
33553443 default :
33563444 report_fatal_error (" spv_firstbitlow only supports 16,32,64 bits." );
0 commit comments