diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 2fa55844aec7c..8061237ce29d7 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -49,13 +49,6 @@ class MapsForPrivatizedSymbolsPass : public flangomp::impl::MapsForPrivatizedSymbolsPassBase< MapsForPrivatizedSymbolsPass> { - bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) { - Region &allocRegion = privatizer.getAllocRegion(); - Value blockArg0 = allocRegion.getArgument(0); - if (blockArg0.use_empty()) - return false; - return true; - } omp::MapInfoOp createMapInfo(Location loc, Value var, fir::FirOpBuilder &builder) { uint64_t mapTypeTo = static_cast< @@ -132,9 +125,9 @@ class MapsForPrivatizedSymbolsPass omp::PrivateClauseOp privatizer = SymbolTable::lookupNearestSymbolFrom( targetOp, privatizerName); - if (!privatizerNeedsMap(privatizer)) { + if (!privatizer.needsMap()) continue; - } + builder.setInsertionPoint(targetOp); Location loc = targetOp.getLoc(); omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 626539cb7bde4..873a40d5ee75b 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -135,6 +135,17 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]> auto ®ion = getDeallocRegion(); return region.empty() ? nullptr : region.getArgument(0); } + /// privatizer is a PrivateClauseOp that privatizes an MLIR value. + /// privatizerNeedsMap returns true if the value being privatized in an + /// omp.target p should additionally be mapped to the target region + /// using a MapInfoOp. This is most common when an allocatable is privatized. + /// In such cases, the descriptor is use in privatization and needs to be + /// mapped on to the device. + bool needsMap() { + Value blockArg0 = getAllocRegion().getArgument(0); + return !blockArg0.use_empty(); + } + }]; let hasRegionVerifier = 1; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7c45e89cd8ac4..efe6577f01022 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3316,7 +3316,57 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, return builder.saveIP(); } +/// privatizer is a PrivateClauseOp that privatizes an MLIR value. +/// privatizerNeedsMap returns true if the value being privatized in an +/// omp.target p should additionally be mapped to the target region +/// using a MapInfoOp. This is most common when an allocatable is privatized. +/// In such cases, the descriptor is use in privatization and needs to be +/// mapped on to the device. +// static bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) { +// Region &allocRegion = privatizer.getAllocRegion(); +// Value blockArg0 = allocRegion.getArgument(0); +// return !blockArg0.use_empty(); +// } + +/// Return the llvm::Value * corresponding to the privateVar that +/// is being privatized. It isn't always as simple as looking up +/// moduleTranslation with privateVar. For instance, in case of +/// an allocatable, the descriptor for the allocatable is privatized. +/// This descriptor is mapped using an MapInfoOp. So, this function +/// will return a pointer to the llvm::Value corresponding to the +/// block argument for the mapped descriptor. +static llvm::Value * +findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp, + llvm::DenseMap &mappedPrivateVars, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + if (mappedPrivateVars.contains(privateVar)) { + int blockArgIndex = mappedPrivateVars[privateVar]; + Value blockArg = targetOp.getRegion().getArgument(blockArgIndex); + mlir::Type privVarType = privateVar.getType(); + mlir::Type blockArgType = blockArg.getType(); + assert(isa(blockArgType) && + "A block argument corresponding to a mapped var should have " + "!llvm.ptr type"); + + if (privVarType == blockArg.getType()) { + llvm::Value *v = moduleTranslation.lookupValue(blockArg); + return v; + } + if (!isa(privVarType)) { + // This typically happens when the privatized type is lowered from + // boxchar and gets lowered to !llvm.struct<(ptr, i64)>. That is the + // struct/pair is passed by value. But, mapped values are passed only as + // pointers, so before we privatize, we must load the pointer. + llvm::Value *load = + builder.CreateLoad(moduleTranslation.convertType(privVarType), + moduleTranslation.lookupValue(blockArg)); + return load; + } + } + return moduleTranslation.lookupValue(privateVar); +} static LogicalResult convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -3329,6 +3379,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto parentFn = opInst.getParentOfType(); auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); + // Holds the private vars that have been mapped along with + // the block argument that corresponds to the MapInfoOp + // corresponding to the private var in question. + // So, for instance + // + // %10 = omp.map.info var_ptr(%6#0 : !fir.ref>>, ..) + // omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1) + // + // Then, %10 has been created so that the descriptor can be used by the + // privatizer + // @box.privatizer on the device side. Here we'd record {%6#0, 0} in the + // mappedPrivateVars map. + llvm::DenseMap mappedPrivateVars; DataLayout dl = DataLayout(opInst.getParentOfType()); SmallVector mapVars = targetOp.getMapVars(); ArrayRef mapBlockArgs = @@ -3340,6 +3403,48 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, bool isOffloadEntry = isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); + // For some private variables, the MapsForPrivatizedVariablesPass + // creates MapInfoOp instances. Go through the private variables and + // the mapped variables so that during codegeneration we are able + // to quickly look up the corresponding map variable, if any for each + // private variable. + if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) { + auto argIface = llvm::cast(*targetOp); + unsigned lastMapBlockArgsIdx = + argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs() - 1; + OperandRange privateVars = targetOp.getPrivateVars(); + std::optional privateSyms = targetOp.getPrivateSyms(); + auto reverseIt = mapVars.rbegin(); + for (auto [privVar, privSym] : + llvm::reverse(llvm::zip_equal(privateVars, *privateSyms))) { + SymbolRefAttr privatizerName = llvm::cast(privSym); + omp::PrivateClauseOp privatizer = + findPrivatizer(targetOp, privatizerName); + if (!privatizer.needsMap()) + continue; + + // The MapInfoOp defining the map var isn't really needed later. + // So, we don't store it in any datastructure. Instead, we just + // do some sanity checks on it right now. + omp::MapInfoOp mapInfoOp = + llvm::cast((*reverseIt).getDefiningOp()); + Type varType = mapInfoOp.getVarType(); + + // Check #1: Check that the type of the private variable matches + // the type of the variable being mapped. + if (!isa(privVar.getType())) + assert( + varType == privVar.getType() && + "Type of private var doesn't match the type of the mapped value"); + + // Ok, only 1 sanity check for now. + // Record the index of the block argument corresponding to this + // mapvar. + mappedPrivateVars.insert({privVar, lastMapBlockArgsIdx}); + lastMapBlockArgsIdx--; + reverseIt++; + } + } LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto bodyCB = [&](InsertPointTy allocaIP, @@ -3367,9 +3472,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, moduleTranslation.lookupValue(mapInfoOp.getVarPtr()); moduleTranslation.mapValue(arg, mapOpValue); } - // Do privatization after moduleTranslation has already recorded // mapped values. + SmallVector llvmPrivateVars; + SmallVector privateCleanupRegions; if (!targetOp.getPrivateVars().empty()) { builder.restoreIP(allocaIP); @@ -3384,16 +3490,18 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, SymbolRefAttr privSym = cast(privatizerNameAttr); omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym); if (privatizer.getDataSharingType() == - omp::DataSharingClauseType::FirstPrivate || - !privatizer.getDeallocRegion().empty()) { + omp::DataSharingClauseType::FirstPrivate) { opInst.emitError("Translation of omp.target from MLIR to LLVMIR " - "failed because translation of firstprivate and " - " private allocatables is not supported yet"); + "failed because translation of firstprivate is not " + "supported yet"); bodyGenStatus = failure(); } else { - moduleTranslation.mapValue(privatizer.getAllocMoldArg(), - moduleTranslation.lookupValue(privVar)); Region &allocRegion = privatizer.getAllocRegion(); + BlockArgument allocRegionArg = allocRegion.getArgument(0); + moduleTranslation.mapValue( + allocRegionArg, + findHostAssociatedValue(privVar, targetOp, mappedPrivateVars, + builder, moduleTranslation)); SmallVector yieldedValues; if (failed(inlineConvertOmpRegions( allocRegion, "omp.targetop.privatizer", builder, @@ -3404,7 +3512,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, bodyGenStatus = failure(); } else { assert(yieldedValues.size() == 1); - moduleTranslation.mapValue(privBlockArg, yieldedValues.front()); + llvm::Value *llvmReplacementValue = yieldedValues.front(); + moduleTranslation.mapValue(privBlockArg, llvmReplacementValue); + if (!privatizer.getDeallocRegion().empty()) { + llvmPrivateVars.push_back(llvmReplacementValue); + privateCleanupRegions.push_back(&privatizer.getDeallocRegion()); + } } moduleTranslation.forgetMapping(allocRegion); builder.restoreIP(builder.saveIP()); @@ -3414,6 +3527,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::BasicBlock *exitBlock = convertOmpOpRegions( targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus); builder.SetInsertPoint(exitBlock); + if (!llvmPrivateVars.empty()) { + assert(llvmPrivateVars.size() == privateCleanupRegions.size() && + "Number of private variables needing cleanup not equal to number" + "of privatizers with dealloc regions"); + if (failed(inlineOmpRegionCleanup( + privateCleanupRegions, llvmPrivateVars, moduleTranslation, + builder, "omp.targetop.private.cleanup", + /*shouldLoadCleanupRegionArg=*/false))) { + opInst.emitError("failed to inline `dealloc` region of `omp.private` " + "op in the target region"); + bodyGenStatus = failure(); + } + } return builder.saveIP(); }; diff --git a/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir new file mode 100644 index 0000000000000..b604336b3e1ea --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir @@ -0,0 +1,114 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @free(!llvm.ptr) +llvm.func @malloc(i64) -> !llvm.ptr +omp.private {type = private} @box.heap_privatizer0 : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i32) : i32 + %10 = llvm.getelementptr %arg0[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %1 = llvm.load %10 : !llvm.ptr -> i64 + %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i32) -> !llvm.ptr + %17 = llvm.call @malloc(%1) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr + %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + omp.yield(%7 : !llvm.ptr) +} dealloc { +^bb0(%arg0: !llvm.ptr): + %6 = llvm.mlir.constant(0 : i64) : i64 + %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr + llvm.call @free(%9) : (!llvm.ptr) -> () + omp.yield +} +omp.private {type = private} @box.heap_privatizer1 : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i32) : i32 + %10 = llvm.getelementptr %arg0[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %1 = llvm.load %10 : !llvm.ptr -> i64 + %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i32) -> !llvm.ptr + %17 = llvm.call @malloc(%1) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr + %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + omp.yield(%7 : !llvm.ptr) +} dealloc { +^bb0(%arg0: !llvm.ptr): + %6 = llvm.mlir.constant(0 : i64) : i64 + %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr + llvm.call @free(%9) : (!llvm.ptr) -> () + omp.yield +} +llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} { + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr + %13 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var0"} : (i64) -> !llvm.ptr + %14 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var1"} : (i64) -> !llvm.ptr + %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} + %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr + %55 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr + omp.target map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6, @box.heap_privatizer1 %14 -> %arg7 : !llvm.ptr, !llvm.ptr) { + %64 = llvm.mlir.constant(1 : i32) : i32 + %65 = llvm.alloca %64 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %67 = llvm.alloca %64 x i32 : (i32) -> !llvm.ptr + %66 = llvm.mlir.constant(19 : i32) : i32 + %69 = llvm.mlir.constant(10 : i32) : i32 + llvm.store %64, %arg3 : i32, !llvm.ptr + llvm.store %69, %67 : i32, !llvm.ptr + %75 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %90 = llvm.insertvalue %67, %75[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %90, %65 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %91 = llvm.mlir.zero : !llvm.ptr + %92 = llvm.call @_FortranAAssign(%arg6, %65, %91, %66) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> + %93 = llvm.call @_FortranAAssign(%arg7, %65, %91, %66) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> + omp.terminator + } + llvm.return +} + + +llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"} + +// The first set of checks ensure that we are calling the offloaded function +// with the right arguments, especially the second argument which needs to +// be a memory reference to the descriptor for the privatized allocatable +// CHECK: define void @target_allocatable_ +// CHECK-NOT: define internal void +// CHECK: %[[DESC_ALLOC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1 +// CHECK: %[[DESC_ALLOC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1 +// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}}, +// CHECK-SAME: ptr %[[DESC_ALLOC0]], ptr %[[DESC_ALLOC1]]) + +// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]] +// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG0:[^,]+]], +// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1:.*]]) { +// CHECK: %[[I0:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, +// CHECK-SAME: ptr %[[DESCRIPTOR_ARG0]], i32 0, i32 1 +// CHECK: %[[MALLOC_ARG0:.*]] = load i64, ptr %[[I0]] +// CHECK: %[[PRIV_DESC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK: %[[HEAP_PTR0:.*]] = call ptr @malloc(i64 %[[MALLOC_ARG0]]) +// CHECK: %[[TMP0:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK-SAME: undef, ptr %[[HEAP_PTR0]], 0 +// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[TMP0]], ptr %[[PRIV_DESC0]] + +// CHECK: %[[I1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, +// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1]], i32 0, i32 1 +// CHECK: %[[MALLOC_ARG1:.*]] = load i64, ptr %[[I1]] +// CHECK: %[[PRIV_DESC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK: %[[HEAP_PTR1:.*]] = call ptr @malloc(i64 %[[MALLOC_ARG1]]) +// CHECK: %[[TMP1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK-SAME: undef, ptr %[[HEAP_PTR1]], 0 +// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[TMP1]], ptr %[[PRIV_DESC1]] + +// CHECK: call {} @_FortranAAssign(ptr %[[PRIV_DESC0]] +// CHECK: call {} @_FortranAAssign(ptr %[[PRIV_DESC1]] + +// CHECK: %[[PTR0:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, +// CHECK-SAME: ptr %[[PRIV_DESC0]], i32 0, i32 0 +// CHECK: %[[HEAP_MEMREF0:.*]] = load ptr, ptr %[[PTR0]] +// CHECK: call void @free(ptr %[[HEAP_MEMREF0]]) +// CHECK: %[[PTR1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, +// CHECK-SAME: ptr %[[PRIV_DESC1]], i32 0, i32 0 +// CHECK: %[[HEAP_MEMREF1:.*]] = load ptr, ptr %[[PTR1]] +// CHECK: call void @free(ptr %[[HEAP_MEMREF1]]) diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir new file mode 100644 index 0000000000000..6d65b25a7fbc0 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir @@ -0,0 +1,207 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @free(!llvm.ptr) +llvm.func @malloc(i64) -> !llvm.ptr +omp.private {type = private} @box.heap_privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i32) : i32 + %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var", pinned} : (i32) -> !llvm.ptr + %8 = llvm.mlir.constant(0 : i64) : i64 + %10 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr + %12 = llvm.ptrtoint %11 : !llvm.ptr to i64 + %13 = llvm.icmp "ne" %12, %8 : i64 + llvm.cond_br %13, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + %14 = llvm.mlir.zero : !llvm.ptr + %16 = llvm.ptrtoint %14 : !llvm.ptr to i64 + %17 = llvm.call @malloc(%16) {fir.must_be_heap = true, in_type = i32} : (i64) -> !llvm.ptr + %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %37 = llvm.insertvalue %17, %22[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %37, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + llvm.br ^bb3 +^bb2: // pred: ^bb0 + %39 = llvm.mlir.zero : !llvm.ptr + %44 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %59 = llvm.insertvalue %39, %44[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %59, %7 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + llvm.br ^bb3 +^bb3: // 2 preds: ^bb1, ^bb2 + omp.yield(%7 : !llvm.ptr) +} dealloc { +^bb0(%arg0: !llvm.ptr): + %6 = llvm.mlir.constant(0 : i64) : i64 + %8 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr + %10 = llvm.ptrtoint %9 : !llvm.ptr to i64 + %11 = llvm.icmp "ne" %10, %6 : i64 + llvm.cond_br %11, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + llvm.call @free(%9) : (!llvm.ptr) -> () + %15 = llvm.mlir.zero : !llvm.ptr + %16 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %17 = llvm.insertvalue %15, %16[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %17, %arg0 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + llvm.br ^bb2 +^bb2: // 2 preds: ^bb0, ^bb1 + omp.yield +} +llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i32) : i32 + %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x f32 {bindc_name = "real_var"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.alloca %8 x !llvm.struct<(f32, f32)> {bindc_name = "comp_var"} : (i64) -> !llvm.ptr + %10 = llvm.mlir.constant(1 : i32) : i32 + %11 = llvm.alloca %10 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %12 = llvm.mlir.constant(1 : i64) : i64 + %13 = llvm.alloca %12 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var"} : (i64) -> !llvm.ptr + %14 = llvm.mlir.constant(0 : index) : i64 + %15 = llvm.mlir.constant(1 : index) : i64 + %16 = llvm.mlir.constant(0 : i64) : i64 + %17 = llvm.mlir.zero : !llvm.ptr + %18 = llvm.mlir.constant(9 : i32) : i32 + %19 = llvm.mlir.zero : !llvm.ptr + %20 = llvm.getelementptr %19[1] : (!llvm.ptr) -> !llvm.ptr, i32 + %21 = llvm.ptrtoint %20 : !llvm.ptr to i64 + %22 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %23 = llvm.insertvalue %21, %22[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %24 = llvm.mlir.constant(20240719 : i32) : i32 + %25 = llvm.insertvalue %24, %23[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %26 = llvm.mlir.constant(0 : i32) : i32 + %27 = llvm.trunc %26 : i32 to i8 + %28 = llvm.insertvalue %27, %25[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %29 = llvm.trunc %18 : i32 to i8 + %30 = llvm.insertvalue %29, %28[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %31 = llvm.mlir.constant(2 : i32) : i32 + %32 = llvm.trunc %31 : i32 to i8 + %33 = llvm.insertvalue %32, %30[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %34 = llvm.mlir.constant(0 : i32) : i32 + %35 = llvm.trunc %34 : i32 to i8 + %36 = llvm.insertvalue %35, %33[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %37 = llvm.insertvalue %17, %36[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %37, %11 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %38 = llvm.load %11 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %38, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %39 = llvm.load %arg2 : !llvm.ptr -> i64 + %40 = llvm.icmp "sgt" %39, %16 : i64 + %41 = llvm.select %40, %39, %16 : i1, i64 + %42 = llvm.mlir.constant(1 : i64) : i64 + %43 = llvm.alloca %41 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr + %44 = llvm.load %arg0 : !llvm.ptr -> i64 + %45 = llvm.load %arg1 : !llvm.ptr -> i64 + %46 = llvm.sub %45, %44 : i64 + %47 = llvm.add %46, %15 : i64 + %48 = llvm.icmp "sgt" %47, %14 : i64 + %49 = llvm.select %48, %47, %14 : i1, i64 + %50 = llvm.mlir.constant(1 : i64) : i64 + %51 = llvm.mul %49, %50 : i64 + %52 = llvm.alloca %51 x f32 {bindc_name = "real_arr"} : (i64) -> !llvm.ptr + %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} + %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr + omp.target map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 : !llvm.ptr) { + %64 = llvm.mlir.constant(1 : i32) : i32 + %65 = llvm.alloca %64 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %66 = llvm.mlir.constant(1 : i64) : i64 + %67 = llvm.alloca %66 x i32 : (i64) -> !llvm.ptr + %68 = llvm.mlir.constant(18 : i32) : i32 + %69 = llvm.mlir.constant(10 : i32) : i32 + %70 = llvm.mlir.constant(5 : i32) : i32 + llvm.store %70, %arg3 : i32, !llvm.ptr + llvm.store %69, %67 : i32, !llvm.ptr + %71 = llvm.mlir.constant(9 : i32) : i32 + %72 = llvm.mlir.zero : !llvm.ptr + %73 = llvm.getelementptr %72[1] : (!llvm.ptr) -> !llvm.ptr, i32 + %74 = llvm.ptrtoint %73 : !llvm.ptr to i64 + %75 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %76 = llvm.insertvalue %74, %75[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %77 = llvm.mlir.constant(20240719 : i32) : i32 + %78 = llvm.insertvalue %77, %76[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %79 = llvm.mlir.constant(0 : i32) : i32 + %80 = llvm.trunc %79 : i32 to i8 + %81 = llvm.insertvalue %80, %78[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %82 = llvm.trunc %71 : i32 to i8 + %83 = llvm.insertvalue %82, %81[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %84 = llvm.mlir.constant(0 : i32) : i32 + %85 = llvm.trunc %84 : i32 to i8 + %86 = llvm.insertvalue %85, %83[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %87 = llvm.mlir.constant(0 : i32) : i32 + %88 = llvm.trunc %87 : i32 to i8 + %89 = llvm.insertvalue %88, %86[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %90 = llvm.insertvalue %67, %89[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %90, %65 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %91 = llvm.mlir.zero : !llvm.ptr + %92 = llvm.call @_FortranAAssign(%arg5, %65, %91, %68) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> + omp.terminator + } + %55 = llvm.load %13 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %55, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %56 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %57 = llvm.load %56 : !llvm.ptr -> !llvm.ptr + %58 = llvm.ptrtoint %57 : !llvm.ptr to i64 + %59 = llvm.icmp "ne" %58, %16 : i64 + llvm.cond_br %59, ^bb1, ^bb2 +^bb1: // pred: ^bb0 + %60 = llvm.load %13 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %60, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + %61 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + %62 = llvm.load %61 : !llvm.ptr -> !llvm.ptr + llvm.call @free(%62) : (!llvm.ptr) -> () + %63 = llvm.load %11 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> + llvm.store %63, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr + llvm.br ^bb2 +^bb2: // 2 preds: ^bb0, ^bb1 + llvm.return +} + + +llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"} + +// The first set of checks ensure that we are calling the offloaded function +// with the right arguments, especially the second argument which needs to +// be a memory reference to the descriptor for the privatized allocatable +// CHECK: define void @target_allocatable_ +// CHECK-NOT: define internal void +// CHECK: %[[DESC_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1 +// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}}, +// CHECK-SAME: ptr %[[DESC_ALLOC]]) + +// The second set of checks ensure that to allocate memory for the +// allocatable, we are, in fact, using the memory reference of the descriptor +// passed as the second argument to the offloaded function. +// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]] +// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG:.*]]) { +// CHECK: %[[HEAP_MEMREF:.*]] = call ptr @malloc(i64 0) +// CHECK: %[[DESC_SETUP_MEMREF:.*]] = insertvalue +// CHECK-SAME: { ptr, i64, i32, i8, i8, i8, i8 } undef, ptr %[[HEAP_MEMREF]], 0 +// Unfortunately, in the way the blocks are arranged, the store to the +// privatized alloctables descriptor is encountered before we allocate +// device-local memory for the descriptor (PRIVATE_DESC) itself +// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[DESC_SETUP_MEMREF]], +// CHECK-SAME: ptr %[[PRIVATE_DESC:.*]], align 8 +// CHECK: %[[PRIVATE_DESC]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK: %[[ORIG_DATA_PTR_MMBR_OF_DESC:.*]] = getelementptr +// CHECK-SAME: { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[DESCRIPTOR_ARG]], +// CHECK-SAME: i32 0, i32 0 +// CHECK: %[[ORIG_DATA_PTR:.*]] = load ptr, ptr %[[ORIG_DATA_PTR_MMBR_OF_DESC]] +// CHECK: %[[PTR_TO_INT:.*]] = ptrtoint ptr %[[ORIG_DATA_PTR]] to i64 +// CHECK: icmp ne i64 %[[PTR_TO_INT]], 0 + + +// CHECK: call {} @_FortranAAssign(ptr %[[DESC_TO_DEALLOC:[^,]+]], + +// Now, check the deallocation of the private var. +// CHECK: call void @free(ptr %[[DATA_PTR_TO_FREE:.*]]) +// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } +// CHECK-SAME: { ptr null, i64 undef, i32 undef, i8 undef, i8 undef, i8 undef, i8 undef }, +// CHECK-SAME: ptr %[[DESC_TO_DEALLOC]] + +// CHECK: %[[DESC_MMBR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, +// CHECK-SAME: ptr %[[DESC_TO_DEALLOC]], i32 0, i32 0 +// CHECK: %[[DATA_PTR_TO_FREE]] = load ptr, ptr %[[DESC_MMBR]] +// CHECK: ptrtoint ptr %[[DATA_PTR_TO_FREE]] to i64 diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir index e41b18f593efe..93bdf5e9ff5c9 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir @@ -94,3 +94,93 @@ llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) { // CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i32 %[[ONE]], align 4 // CHECK: %[[PHI_ALLOCA:.*]] = phi ptr [ %[[PRIV_ALLOC]], {{.*}} ] // CHECK: %[[RESULT:.*]] = load float, ptr %[[PHI_ALLOCA]], align 4 + +// Descriptors are needed for CHARACTER arrays and their type is +// !fir.boxchar. When such arrays are used in the private construct, the +// privatizer takes a !fir.boxchar as input. This type is lowered to +// !llvm.struct<(ptr, i64)>. This is unique because with other types of data, +// typically, the privatizer funtion takes a !llvm.ptr. Now, on the host side, +// we map the descriptor using the map clause of the omp.target op. map clauses +// take only !llvm.ptr types. This means, we have a case where the descriptor is +// mapped by its pointer whereas the privatizer function expects the descriptor +// by value. So, we have this test to ensure that the compiler correctly loads +// from the mapped pointer before passing that to the privatizer function. +omp.private {type = private} @_QFtarget_boxcharEchar_var_private_boxchar_c8xU : !llvm.struct<(ptr, i64)> alloc { +^bb0(%arg0: !llvm.struct<(ptr, i64)>): + %0 = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr, i64)> + %1 = llvm.extractvalue %arg0[1] : !llvm.struct<(ptr, i64)> + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %1 x i8 {bindc_name = "char_var", pinned} : (i64) -> !llvm.ptr + %4 = llvm.mlir.undef : !llvm.struct<(ptr, i64)> + %5 = llvm.insertvalue %3, %4[0] : !llvm.struct<(ptr, i64)> + %6 = llvm.insertvalue %1, %5[1] : !llvm.struct<(ptr, i64)> + omp.yield(%6 : !llvm.struct<(ptr, i64)>) +} +llvm.func @target_boxchar_(%arg0: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_boxchar"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64)> : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(0 : i64) : i64 + %5 = llvm.load %arg0 : !llvm.ptr -> i64 + %6 = llvm.icmp "sgt" %5, %4 : i64 + %7 = llvm.select %6, %5, %4 : i1, i64 + %9 = llvm.alloca %7 x i8 {bindc_name = "char_var"} : (i64) -> !llvm.ptr + %10 = llvm.mlir.undef : !llvm.struct<(ptr, i64)> + %11 = llvm.insertvalue %9, %10[0] : !llvm.struct<(ptr, i64)> + %12 = llvm.insertvalue %7, %11[1] : !llvm.struct<(ptr, i64)> + %13 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} + llvm.store %12, %3 : !llvm.struct<(ptr, i64)>, !llvm.ptr + %14 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) -> !llvm.ptr + omp.target map_entries(%13 -> %arg1, %14 -> %arg2 : !llvm.ptr, !llvm.ptr) private(@_QFtarget_boxcharEchar_var_private_boxchar_c8xU %12 -> %arg3 : !llvm.struct<(ptr, i64)>) { + %15 = llvm.mlir.constant(0 : index) : i64 + %16 = llvm.mlir.constant(32 : i8) : i8 + %17 = llvm.mlir.constant(1 : index) : i64 + %18 = llvm.mlir.constant(false) : i1 + %19 = llvm.mlir.constant(5 : index) : i64 + %20 = llvm.mlir.constant(5 : i32) : i32 + %21 = llvm.extractvalue %arg3[0] : !llvm.struct<(ptr, i64)> + %22 = llvm.extractvalue %arg3[1] : !llvm.struct<(ptr, i64)> + llvm.store %20, %arg1 : i32, !llvm.ptr + %23 = llvm.mlir.addressof @_QQclX68656C6C6F : !llvm.ptr + %24 = llvm.icmp "slt" %22, %19 : i64 + %25 = llvm.select %24, %22, %19 : i1, i64 + llvm.call @llvm.memmove.p0.p0.i64(%21, %23, %25, %18) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + %26 = llvm.sub %22, %17 : i64 + %27 = llvm.mlir.undef : !llvm.array<1 x i8> + %28 = llvm.insertvalue %16, %27[0] : !llvm.array<1 x i8> + %29 = llvm.sub %26, %25 : i64 + %30 = llvm.add %29, %17 : i64 + llvm.br ^bb1(%25, %30 : i64, i64) + ^bb1(%31: i64, %32: i64): // 2 preds: ^bb0, ^bb2 + %33 = llvm.icmp "sgt" %32, %15 : i64 + llvm.cond_br %33, ^bb2, ^bb3 + ^bb2: // pred: ^bb1 + %34 = llvm.getelementptr %21[%31] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1 x i8> + llvm.store %28, %34 : !llvm.array<1 x i8>, !llvm.ptr + %35 = llvm.add %31, %17 : i64 + %36 = llvm.sub %32, %17 : i64 + llvm.br ^bb1(%35, %36 : i64, i64) + ^bb3: // pred: ^bb1 + omp.terminator + } + llvm.return +} +llvm.mlir.global linkonce constant @_QQclX68656C6C6F() comdat(@__llvm_comdat::@_QQclX68656C6C6F) {addr_space = 0 : i32} : !llvm.array<5 x i8> { + %0 = llvm.mlir.constant("hello") : !llvm.array<5 x i8> + llvm.return %0 : !llvm.array<5 x i8> +} +llvm.comdat @__llvm_comdat { + llvm.comdat_selector @_QQclX68656C6C6F any +} +llvm.func @llvm.memmove.p0.p0.i64(!llvm.ptr, !llvm.ptr, i64, i1) attributes {sym_visibility = "private"} + + + +// CHECK: define internal void @__omp_offloading_{{.*}}(ptr %{{[^,]+}}, ptr %[[MAPPED_ARG:.*]]) { +// CHECK: %[[BOXCHAR:.*]] = load { ptr, i64 }, ptr %[[MAPPED_ARG]] +// CHECK: %[[BOXCHAR_PTR:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 0 +// CHECK: %[[BOXCHAR_i64:.*]] = extractvalue { ptr, i64 } %[[BOXCHAR]], 1 +// CHECK: %[[MEM_ALLOC:.*]] = alloca i8, i64 %[[BOXCHAR_i64]] +// CHECK: %[[PRIV_BOXCHAR0:.*]] = insertvalue { ptr, i64 } undef, ptr %[[MEM_ALLOC]], 0 +// CHECK: %[[PRIV_BOXCHAR1:.*]] = insertvalue { ptr, i64 } %[[PRIV_BOXCHAR0]], i64 %[[BOXCHAR_i64]], 1 +