diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp index d41564da20711..951293b133677 100644 --- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp +++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp @@ -277,7 +277,8 @@ class PopulateInitAndCleanupRegionsHelper { /// allocatedPrivVarArg: The allocation for the private /// variable. /// moldArg: The original variable. - /// loadedMoldArg: The original variable, loaded. + /// loadedMoldArg: The original variable, loaded. Access via + /// getLoadedMoldArg(). mlir::Value scalarInitValue, allocatedPrivVarArg, moldArg, loadedMoldArg; /// The first block in the init region. @@ -321,6 +322,14 @@ class PopulateInitAndCleanupRegionsHelper { void initAndCleanupUnboxedDerivedType(bool needsInitialization); fir::IfOp handleNullAllocatable(); + + // Do this lazily so that we don't load it when it is not used. + inline mlir::Value getLoadedMoldArg() { + if (loadedMoldArg) + return loadedMoldArg; + loadedMoldArg = builder.loadIfRef(loc, moldArg); + return loadedMoldArg; + } }; } // namespace @@ -333,7 +342,7 @@ void PopulateInitAndCleanupRegionsHelper::initBoxedPrivatePointer( // we need a shape with the right rank so that the embox op is lowered // to an llvm struct of the right type. This returns nullptr if the types // aren't right. - mlir::Value shape = generateZeroShapeForRank(builder, loc, loadedMoldArg); + mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg); // Just incase, do initialize the box with a null value mlir::Value null = builder.createNullConstant(loc, boxTy.getEleTy()); mlir::Value nullBox; @@ -355,7 +364,7 @@ void PopulateInitAndCleanupRegionsHelper::initBoxedPrivatePointer( /// } /// omp.yield %box_alloca fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() { - mlir::Value addr = builder.create(loc, loadedMoldArg); + mlir::Value addr = builder.create(loc, getLoadedMoldArg()); mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); fir::IfOp ifOp = builder.create(loc, isNotAllocated, /*withElseRegion=*/true); @@ -391,7 +400,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedScalar( loc, valType, valAlloc, /*shape=*/mlir::Value{}, /*slice=*/mlir::Value{}, lenParams); initializeIfDerivedTypeBox( - builder, loc, box, loadedMoldArg, needsInitialization, + builder, loc, box, getLoadedMoldArg(), needsInitialization, /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate); fir::StoreOp lastOp = builder.create(loc, box, allocatedPrivVarArg); @@ -410,7 +419,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( fir::BaseBoxType boxTy, bool needsInitialization) { bool isAllocatableOrPointer = mlir::isa(boxTy.getEleTy()); - getLengthParameters(builder, loc, loadedMoldArg, lenParams); + getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams); fir::IfOp ifUnallocated{nullptr}; if (isAllocatableOrPointer) { @@ -419,7 +428,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( } // Create the private copy from the initial fir.box: - hlfir::Entity source = hlfir::Entity{loadedMoldArg}; + hlfir::Entity source = hlfir::Entity{getLoadedMoldArg()}; // Special case for (possibly allocatable) arrays of polymorphic types // e.g. !fir.class>>> @@ -463,8 +472,9 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( // Put the temporary inside of a box: // hlfir::genVariableBox doesn't handle non-default lower bounds mlir::Value box; - fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedMoldArg); - mlir::Type boxType = loadedMoldArg.getType(); + fir::ShapeShiftOp shapeShift = + getShapeShift(builder, loc, getLoadedMoldArg()); + mlir::Type boxType = getLoadedMoldArg().getType(); if (mlir::isa(temp.getType())) // the box created by the declare form createTempFromMold is missing // lower bounds info @@ -480,7 +490,7 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray( builder.create(loc, scalarInitValue, box); initializeIfDerivedTypeBox( - builder, loc, box, loadedMoldArg, needsInitialization, + builder, loc, box, getLoadedMoldArg(), needsInitialization, /*isFirstPrivate=*/kind == DeclOperationKind::FirstPrivate); builder.create(loc, box, allocatedPrivVarArg); @@ -553,8 +563,7 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() { builder.setInsertionPointToEnd(initBlock); // TODO: don't do this unless it is needed - loadedMoldArg = builder.loadIfRef(loc, moldArg); - getLengthParameters(builder, loc, loadedMoldArg, lenParams); + getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams); if (isPrivatization(kind) && mlir::isa(boxTy.getEleTy())) { @@ -604,4 +613,16 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions( converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg, initBlock, cleanupRegion, kind, sym); helper.populateByRefInitAndCleanupRegions(); + + // Often we load moldArg to check something (e.g. length parameters, shape) + // but then those answers can be gotten statically without accessing the + // runtime value and so the only remaining use is a dead load. These loads can + // force us to insert additional barriers and so should be avoided where + // possible. + if (moldArg.hasOneUse()) { + mlir::Operation *user = *moldArg.getUsers().begin(); + if (auto load = mlir::dyn_cast(user)) + if (load.use_empty()) + load.erase(); + } } diff --git a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 index 1dc345c11568c..f39ac9199e8bd 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 @@ -42,7 +42,6 @@ subroutine delayed_privatization_lenparams(length) ! CHECK-LABEL: omp.private {type = firstprivate} ! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box>]] init { ! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref<[[TYPE]]>, %[[PRIV_ALLOC:.*]]: !fir.ref<[[TYPE]]>): -! CHECK-NEXT: %[[ARG:.*]] = fir.load %[[PRIV_ARG]] ! CHECK-NEXT: %[[NULL:.*]] = fir.zero_bits !fir.ptr ! CHECK-NEXT: %[[INIT:.*]] = fir.embox %[[NULL]] : (!fir.ptr) -> !fir.box> ! CHECK-NEXT: fir.store %[[INIT]] to %[[PRIV_ALLOC]] : !fir.ref>>