10
10
//
11
11
// ===----------------------------------------------------------------------===//
12
12
13
+ #include < numeric>
14
+
13
15
#include " CIRGenOpenACCRecipe.h"
14
16
15
17
namespace clang ::CIRGen {
@@ -35,6 +37,110 @@ mlir::Block *OpenACCRecipeBuilderBase::createRecipeBlock(mlir::Region ®ion,
35
37
return builder.createBlock (®ion, region.end (), types, locs);
36
38
}
37
39
40
+ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca (
41
+ mlir::Block *block, SourceRange exprRange, mlir::Location loc,
42
+ std::string_view allocaName, size_t numBounds,
43
+ llvm::ArrayRef<QualType> boundTypes) {
44
+ mlir::OpBuilder::InsertionGuard guardCase (builder);
45
+
46
+ // Get the range of bounds arguments, which are all but the 1st arg.
47
+ llvm::ArrayRef<mlir::BlockArgument> boundsRange =
48
+ block->getArguments ().drop_front (1 );
49
+
50
+ // boundTypes contains the before and after of each bounds, so it ends up
51
+ // having 1 extra. Assert this is the case to ensure we don't call this in the
52
+ // wrong 'block'.
53
+ assert (boundsRange.size () + 1 == boundTypes.size ());
54
+
55
+ mlir::Type itrTy = cgf.cgm .convertType (cgf.getContext ().UnsignedLongLongTy );
56
+ auto idxType = mlir::IndexType::get (&cgf.getMLIRContext ());
57
+
58
+ auto getUpperBound = [&](mlir::Value bound) {
59
+ auto upperBoundVal =
60
+ mlir::acc::GetUpperboundOp::create (builder, loc, idxType, bound);
61
+ return mlir::UnrealizedConversionCastOp::create (builder, loc, itrTy,
62
+ upperBoundVal.getResult ())
63
+ .getResult (0 );
64
+ };
65
+
66
+ auto isArrayTy = [&](QualType ty) {
67
+ if (ty->isArrayType () && !ty->isConstantArrayType ())
68
+ cgf.cgm .errorNYI (exprRange, " OpenACC recipe init for VLAs" );
69
+ return ty->isConstantArrayType ();
70
+ };
71
+
72
+ mlir::Type topLevelTy = cgf.convertType (boundTypes.back ());
73
+ cir::PointerType topLevelTyPtr = builder.getPointerTo (topLevelTy);
74
+ // Do an alloca for the 'top' level type without bounds.
75
+ mlir::Value initialAlloca = builder.createAlloca (
76
+ loc, topLevelTyPtr, topLevelTy, allocaName,
77
+ cgf.getContext ().getTypeAlignInChars (boundTypes.back ()));
78
+
79
+ bool lastBoundWasArray = isArrayTy (boundTypes.back ());
80
+
81
+ // Since we're iterating the types in reverse, this sets up for each index
82
+ // corresponding to the boundsRange to be the 'after application of the
83
+ // bounds.
84
+ llvm::ArrayRef<QualType> boundResults = boundTypes.drop_back (1 );
85
+
86
+ // Collect the 'do we have any allocas needed after this type' list.
87
+ llvm::SmallVector<bool > allocasLeftArr;
88
+ llvm::ArrayRef<QualType> resultTypes = boundTypes.drop_front ();
89
+ std::transform_inclusive_scan (
90
+ resultTypes.begin (), resultTypes.end (),
91
+ std::back_inserter (allocasLeftArr), std::plus<bool >{},
92
+ [](QualType ty) { return !ty->isConstantArrayType (); });
93
+
94
+ // Keep track of the number of 'elements' that we're allocating. Individual
95
+ // allocas should multiply this by the size of its current allocation.
96
+ mlir::Value cumulativeElts;
97
+ for (auto [bound, resultType, allocasLeft] : llvm::reverse (
98
+ llvm::zip_equal (boundsRange, boundResults, allocasLeftArr))) {
99
+
100
+ // if there is no further 'alloca' operation we need to do, we can skip
101
+ // creating the UB/multiplications/etc.
102
+ if (!allocasLeft)
103
+ break ;
104
+
105
+ // First: figure out the number of elements in the current 'bound' list.
106
+ mlir::Value eltsPerSubArray = getUpperBound (bound);
107
+ mlir::Value eltsToAlloca;
108
+
109
+ // IF we are in a sub-bounds, the total number of elements to alloca is
110
+ // the product of that one and the current 'bounds' size. That is,
111
+ // arr[5][5], we would need 25 elements, not just 5. Else it is just the
112
+ // current number of elements.
113
+ if (cumulativeElts)
114
+ eltsToAlloca = builder.createMul (loc, eltsPerSubArray, cumulativeElts);
115
+ else
116
+ eltsToAlloca = eltsPerSubArray;
117
+
118
+ if (!lastBoundWasArray) {
119
+ // If we have to do an allocation, figure out the size of the
120
+ // allocation. alloca takes the number of bytes, not elements.
121
+ TypeInfoChars eltInfo = cgf.getContext ().getTypeInfoInChars (resultType);
122
+ cir::ConstantOp eltSize = builder.getConstInt (
123
+ loc, itrTy, eltInfo.Width .alignTo (eltInfo.Align ).getQuantity ());
124
+ mlir::Value curSize = builder.createMul (loc, eltsToAlloca, eltSize);
125
+
126
+ mlir::Type eltTy = cgf.convertType (resultType);
127
+ cir::PointerType ptrTy = builder.getPointerTo (eltTy);
128
+ builder.createAlloca (loc, ptrTy, eltTy, " openacc.init.bounds" ,
129
+ cgf.getContext ().getTypeAlignInChars (resultType),
130
+ curSize);
131
+
132
+ // TODO: OpenACC : At this point we should be copying the addresses of
133
+ // each element of this to the last allocation. At the moment, that is
134
+ // not yet implemented.
135
+ cgf.cgm .errorNYI (exprRange, " OpenACC recipe alloca copying" );
136
+ }
137
+
138
+ cumulativeElts = eltsToAlloca;
139
+ lastBoundWasArray = isArrayTy (resultType);
140
+ }
141
+ return initialAlloca;
142
+ }
143
+
38
144
mlir::Value
39
145
OpenACCRecipeBuilderBase::createBoundsLoop (mlir::Value subscriptedValue,
40
146
mlir::Value bound,
@@ -258,7 +364,11 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
258
364
cgf.emitAutoVarAlloca (*allocaDecl, builder.saveInsertionPoint ());
259
365
cgf.emitAutoVarInit (tempDeclEmission);
260
366
} else {
261
- cgf.cgm .errorNYI (exprRange, " private-init with bounds" );
367
+ makeBoundsAlloca (block, exprRange, loc, " openacc.private.init" , numBounds,
368
+ boundTypes);
369
+
370
+ if (initExpr)
371
+ cgf.cgm .errorNYI (exprRange, " private-init with bounds initialization" );
262
372
}
263
373
264
374
mlir::acc::YieldOp::create (builder, locEnd);
0 commit comments