@@ -3178,100 +3178,74 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow(
3178
3178
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
3179
3179
Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
3180
3180
3181
- unsigned ComponentCount = GR.getScalarOrVectorComponentCount (ResType);
3182
3181
// SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires
3183
- // creating a return type with an invalid vec size. If that is resolved
3184
- // then this function is valid up to vec8 as the intermediate splitting
3185
- // would create 2 vec4.
3182
+ // creating a param reg and return reg with an invalid vec size. If that is
3183
+ // resolved then this function is valid for vectors of any component size.
3184
+ unsigned ComponentCount = GR. getScalarOrVectorComponentCount (ResType);
3186
3185
assert (ComponentCount < 5 && " Vec 5+ will generate invalid SPIR-V ops" );
3187
3186
3188
-
3189
- SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType (ResType);
3190
3187
bool ZeroAsNull = STI.isOpenCLEnv ();
3191
- Register ConstIntZero =
3192
- GR.getOrCreateConstInt (0 , I, BaseType, TII, ZeroAsNull);
3193
- unsigned LeftComponentCount = ComponentCount / 2 ;
3194
- unsigned RightComponentCount = ComponentCount - LeftComponentCount;
3195
- bool LeftIsVector = LeftComponentCount > 1 ;
3196
-
3197
- // Split the SrcReg in half into 2 smaller vec registers
3198
- // (ie i64x4 -> i64x2, i64x2)
3199
3188
MachineIRBuilder MIRBuilder (I);
3200
- SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType (64 , MIRBuilder);
3201
- SPIRVType *LeftOpType = OpType;
3202
- SPIRVType *LeftResType = BaseType;
3203
- if (LeftIsVector) {
3204
- LeftOpType =
3205
- GR.getOrCreateSPIRVVectorType (OpType, LeftComponentCount, MIRBuilder);
3206
- LeftResType =
3207
- GR.getOrCreateSPIRVVectorType (BaseType, LeftComponentCount, MIRBuilder);
3208
- }
3209
-
3210
- SPIRVType *RightOpType =
3211
- GR.getOrCreateSPIRVVectorType (OpType, RightComponentCount, MIRBuilder);
3212
- SPIRVType *RightResType =
3213
- GR.getOrCreateSPIRVVectorType (BaseType, RightComponentCount, MIRBuilder);
3214
-
3215
- Register LeftSideIn = MRI->createVirtualRegister (GR.getRegClass (LeftOpType));
3216
- Register RightSideIn =
3217
- MRI->createVirtualRegister (GR.getRegClass (RightOpType));
3218
-
3219
- // Extract the left half from the SrcReg into LeftSideIn
3220
- // accounting for the special case when it only has one element
3221
- if (LeftIsVector) {
3189
+ SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType (ResType);
3190
+ SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType (64 , MIRBuilder);
3191
+ SPIRVType *I64x2Type = GR.getOrCreateSPIRVVectorType (I64Type, 2 , MIRBuilder);
3192
+ SPIRVType *Vec2ResType =
3193
+ GR.getOrCreateSPIRVVectorType (BaseType, 2 , MIRBuilder);
3194
+
3195
+ std::vector<Register> PartialRegs;
3196
+
3197
+ // Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd
3198
+ unsigned CurrentComponent = 0 ;
3199
+ for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2 ) {
3200
+ Register SubVecReg = MRI->createVirtualRegister (GR.getRegClass (I64x2Type));
3201
+
3222
3202
auto MIB = BuildMI (*I.getParent (), I, I.getDebugLoc (),
3223
3203
TII.get (SPIRV::OpVectorShuffle))
3224
- .addDef (LeftSideIn )
3225
- .addUse (GR.getSPIRVTypeID (LeftOpType ))
3204
+ .addDef (SubVecReg )
3205
+ .addUse (GR.getSPIRVTypeID (I64x2Type ))
3226
3206
.addUse (SrcReg)
3227
3207
// Per the spec, repeat the vector if only one vec is needed
3228
3208
.addUse (SrcReg);
3229
3209
3230
- for ( unsigned J = 0 ; J < LeftComponentCount; ++J)
3231
- MIB.addImm (J );
3210
+ MIB. addImm (CurrentComponent);
3211
+ MIB.addImm (CurrentComponent + 1 );
3232
3212
3233
3213
if (!MIB.constrainAllUses (TII, TRI, RBI))
3234
3214
return false ;
3235
3215
3236
- } else {
3237
- if (!selectOpWithSrcs (LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero},
3238
- SPIRV::OpVectorExtractDynamic))
3216
+ Register SubVecBitSetReg =
3217
+ MRI->createVirtualRegister (GR.getRegClass (Vec2ResType));
3218
+
3219
+ if (!selectFirstBitSet64 (SubVecBitSetReg, Vec2ResType, I, SubVecReg,
3220
+ BitSetOpcode, SwapPrimarySide))
3239
3221
return false ;
3222
+
3223
+ PartialRegs.push_back (SubVecBitSetReg);
3240
3224
}
3241
3225
3242
- // Extract the right half from the SrcReg into RightSideIn.
3243
- // Right will always be a vector since the only time one element is left is
3244
- // when Component == 3, and in that case Left is one element.
3245
- auto MIB = BuildMI (*I.getParent (), I, I.getDebugLoc (),
3246
- TII.get (SPIRV::OpVectorShuffle))
3247
- .addDef (RightSideIn)
3248
- .addUse (GR.getSPIRVTypeID (RightOpType))
3249
- .addUse (SrcReg)
3250
- // Per the spec, repeat the vector if only one vec is needed
3251
- .addUse (SrcReg);
3226
+ // On odd component counts we need to handle one more component
3227
+ if (CurrentComponent != ComponentCount) {
3228
+ Register FinalElemReg = MRI->createVirtualRegister (GR.getRegClass (I64Type));
3229
+ Register ConstIntLastIdx = GR.getOrCreateConstInt (
3230
+ ComponentCount - 1 , I, BaseType, TII, ZeroAsNull);
3252
3231
3253
- for (unsigned J = LeftComponentCount; J < ComponentCount; ++J)
3254
- MIB.addImm (J);
3232
+ if (!selectOpWithSrcs (FinalElemReg, I64Type, I, {SrcReg, ConstIntLastIdx},
3233
+ SPIRV::OpVectorExtractDynamic))
3234
+ return false ;
3255
3235
3256
- if (!MIB. constrainAllUses (TII, TRI, RBI))
3257
- return false ;
3236
+ Register FinalElemBitSetReg =
3237
+ MRI-> createVirtualRegister (GR. getRegClass (BaseType)) ;
3258
3238
3259
- // Recursively call selectFirstBitSet64 on the 2 halves
3260
- Register LeftSideOut =
3261
- MRI->createVirtualRegister (GR.getRegClass (LeftResType));
3262
- Register RightSideOut =
3263
- MRI->createVirtualRegister (GR.getRegClass (RightResType));
3239
+ if (!selectFirstBitSet64 (FinalElemBitSetReg, BaseType, I, FinalElemReg,
3240
+ BitSetOpcode, SwapPrimarySide))
3241
+ return false ;
3264
3242
3265
- if (!selectFirstBitSet64 (LeftSideOut, LeftResType, I, LeftSideIn,
3266
- BitSetOpcode, SwapPrimarySide))
3267
- return false ;
3268
- if (!selectFirstBitSet64 (RightSideOut, RightResType, I, RightSideIn,
3269
- BitSetOpcode, SwapPrimarySide))
3270
- return false ;
3243
+ PartialRegs.push_back (FinalElemBitSetReg);
3244
+ }
3271
3245
3272
- // Join the two resulting registers back into the return type
3273
- // (ie i32x2, i32x2 -> i32x4 )
3274
- return selectOpWithSrcs (ResVReg, ResType, I, {LeftSideOut, RightSideOut} ,
3246
+ // Join all the resulting registers back into the return type in order
3247
+ // (ie i32x2, i32x2, i32x1 -> i32x5 )
3248
+ return selectOpWithSrcs (ResVReg, ResType, I, PartialRegs ,
3275
3249
SPIRV::OpCompositeConstruct);
3276
3250
}
3277
3251
0 commit comments