@@ -9990,31 +9990,6 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
99909990 SmallVectorImpl<Value*> &Ops,
99919991 unsigned IntID) {
99929992 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9993-
9994- unsigned N;
9995- switch (IntID) {
9996- case Intrinsic::aarch64_sve_ld2_sret:
9997- case Intrinsic::aarch64_sve_ld1_pn_x2:
9998- case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9999- case Intrinsic::aarch64_sve_ld2q_sret:
10000- N = 2;
10001- break;
10002- case Intrinsic::aarch64_sve_ld3_sret:
10003- case Intrinsic::aarch64_sve_ld3q_sret:
10004- N = 3;
10005- break;
10006- case Intrinsic::aarch64_sve_ld4_sret:
10007- case Intrinsic::aarch64_sve_ld1_pn_x4:
10008- case Intrinsic::aarch64_sve_ldnt1_pn_x4:
10009- case Intrinsic::aarch64_sve_ld4q_sret:
10010- N = 4;
10011- break;
10012- default:
10013- llvm_unreachable("unknown intrinsic!");
10014- }
10015- auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10016- VTy->getElementCount() * N);
10017-
100189993 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
100199994 Value *BasePtr = Ops[1];
100209995
@@ -10023,15 +9998,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
100239998 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
100249999
1002510000 Function *F = CGM.getIntrinsic(IntID, {VTy});
10026- Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10027- unsigned MinElts = VTy->getMinNumElements();
10028- Value *Ret = llvm::PoisonValue::get(RetTy);
10029- for (unsigned I = 0; I < N; I++) {
10030- Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10031- Value *SRet = Builder.CreateExtractValue(Call, I);
10032- Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10033- }
10034- return Ret;
10001+ return Builder.CreateCall(F, {Predicate, BasePtr});
1003510002}
1003610003
1003710004Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
@@ -10304,6 +10271,19 @@ Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
1030410271 // view (when storing/reloading), whereas the svreinterpret builtin
1030510272 // implements bitwise equivalent cast from register point of view.
1030610273 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10274+
10275+ if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10276+ Value *Tuple = llvm::PoisonValue::get(Ty);
10277+
10278+ for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10279+ Value *In = Builder.CreateExtractValue(Val, I);
10280+ Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10281+ Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10282+ }
10283+
10284+ return Tuple;
10285+ }
10286+
1030710287 return Builder.CreateBitCast(Val, Ty);
1030810288}
1030910289
@@ -10346,44 +10326,26 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
1034610326}
1034710327
1034810328Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10349- llvm::Type *Ty,
1035010329 ArrayRef<Value *> Ops) {
1035110330 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
1035210331 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10353-
10354- unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10355- auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10356- TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10357-
10358- if (!SingleVecTy)
10359- return nullptr;
10360-
10361- Value *Idx = ConstantInt::get(CGM.Int64Ty,
10362- I * SingleVecTy->getMinNumElements());
10332+ unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
1036310333
1036410334 if (TypeFlags.isTupleSet())
10365- return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10366- return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10335+ return Builder.CreateInsertValue( Ops[0], Ops[2], Idx);
10336+ return Builder.CreateExtractValue( Ops[0], Idx);
1036710337}
1036810338
1036910339Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10370- llvm::Type *Ty,
10371- ArrayRef<Value *> Ops) {
10340+ llvm::Type *Ty,
10341+ ArrayRef<Value *> Ops) {
1037210342 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
1037310343
10374- auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10375-
10376- if (!SrcTy)
10377- return nullptr;
10344+ Value *Tuple = llvm::PoisonValue::get(Ty);
10345+ for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10346+ Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
1037810347
10379- unsigned MinElts = SrcTy->getMinNumElements();
10380- Value *Call = llvm::PoisonValue::get(Ty);
10381- for (unsigned I = 0; I < Ops.size(); I++) {
10382- Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10383- Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10384- }
10385-
10386- return Call;
10348+ return Tuple;
1038710349}
1038810350
1038910351Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
@@ -10453,27 +10415,14 @@ void CodeGenFunction::GetAArch64SVEProcessedOperands(
1045310415 continue;
1045410416 }
1045510417
10456- if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10457- Ops.push_back(Arg);
10458- continue;
10459- }
10418+ if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10419+ for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10420+ Ops.push_back(Builder.CreateExtractValue(Arg, I));
1046010421
10461- auto *VTy = cast<ScalableVectorType>(Arg->getType());
10462- unsigned MinElts = VTy->getMinNumElements();
10463- bool IsPred = VTy->getElementType()->isIntegerTy(1);
10464- unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10465-
10466- if (N == 1) {
10467- Ops.push_back(Arg);
1046810422 continue;
1046910423 }
1047010424
10471- for (unsigned I = 0; I < N; ++I) {
10472- Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10473- auto *NewVTy =
10474- ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10475- Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10476- }
10425+ Ops.push_back(Arg);
1047710426 }
1047810427}
1047910428
@@ -10511,7 +10460,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
1051110460 else if (TypeFlags.isStructStore())
1051210461 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
1051310462 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10514- return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10463+ return EmitSVETupleSetOrGet(TypeFlags, Ops);
1051510464 else if (TypeFlags.isTupleCreate())
1051610465 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
1051710466 else if (TypeFlags.isUndef())
0 commit comments