Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface,
}];
}

def fir_PrefetchOp : fir_Op<"prefetch", []> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without any side effects, I think this operation could pessimize some analysis/optimizations.

Do you know how LLVM handles the side effects of this operation?

Should it simply be given read/write effects on its arguments depending on the rw attribute?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, ed11774

let summary = "prefetch a memory reference";

let description = [{
The prefetch is a hint to the code generator that the memory reference will
be used in the near future. The prefetch is not guaranteed to be executed.

```
%a = ... -> !fir.ref<i32>
fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
// ...
fir.load %a : !fir.ref<i32> // use the prefetched value
```
}];

/// `memref' is the address to be prefetched
/// `rw' : rw specifier >
/// read is 0, write is 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As this is just boolean, I wonder if this should just be an attribute: e.g. default to read and then if a unitattr called "write" is present then it is a write

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

/// `localityHint': temporal locality specifier >
/// value ranging from 0 - no locality to 3 - extremely local
/// `cacheType' : cache type specifier >
/// instruction cache is 0, data cache is 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is worth explaining that these numbers must match the LLVM langref (and that is where they come from).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly for the rw specifier, for something as simple as this it shouldn't be difficult to make the IR a bit prettier. It is better if the magic numbers are confined as much as possible to the translation from this op to the mlir::LLVM op. It is reasonable to expect anyone who changes the mlir::LLVM op to look at places its create method is used, but they might not look close enough to find this operation definition.

let arguments = (ins AnyReferenceLike:$memref,
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$rw,
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$cacheType);

let assemblyFormat = "$memref attr-dict `:` type(operands)";
}

def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
let summary = "copy constant size memory";

Expand Down
18 changes: 17 additions & 1 deletion flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3276,7 +3276,23 @@ class FirConverter : public Fortran::lower::AbstractConverter {
attachInliningDirectiveToStmt(dir, &eval);
},
[&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) {
TODO(getCurrentLocation(), "!$dir prefetch");
for (const auto &p : prefetch.v) {
Fortran::evaluate::ExpressionAnalyzer ea{
bridge.getSemanticsContext()};
Fortran::lower::SomeExpr expr{*ea.Analyze(
std::get<Fortran::parser::DataRef>(p.value().u))};
Fortran::lower::StatementContext stmtCtx;
mlir::Value memRef{Fortran::lower::convertExprToHLFIR(
genLocation(dir.source), *this, expr,
localSymbols, stmtCtx)
.getBase()};

// TODO: Don't use default value, instead get the following
// info from the directive
uint32_t isWrite{0}, localityHint{3}, isData{1};
fir::PrefetchOp::create(*builder, genLocation(dir.source),
memRef, isWrite, localityHint, isData);
}
},
[&](const auto &) {}},
dir.u);
Expand Down
36 changes: 28 additions & 8 deletions flang/lib/Optimizer/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3346,6 +3346,25 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
}
};

/// `fir.prefetch` --> `llvm.prefetch`
struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
using FIROpConversion::FIROpConversion;

llvm::LogicalResult
matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
llvm::errs() << "prefetch\n";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
llvm::errs() << "prefetch\n";

mlir::IntegerAttr rw = prefetch.getRwAttr();
mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr();
mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
adaptor.getOperands().front(), rw,
localityHint, cacheType);
rewriter.eraseOp(prefetch);
return mlir::success();
}
};

/// `fir.load` --> `llvm.load`
struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
using FIROpConversion::FIROpConversion;
Expand Down Expand Up @@ -4423,14 +4442,15 @@ void fir::populateFIRToLLVMConversionPatterns(
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
SliceOpConversion, StoreOpConversion, StringLitOpConversion,
SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
XReboxOpConversion, ZeroOpConversion>(converter, options);
NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion,
SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion,
SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion,
ShiftOpConversion, SliceOpConversion, StoreOpConversion,
StringLitOpConversion, SubcOpConversion, TypeDescOpConversion,
TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion,
UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion,
XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter,
options);

// Patterns that are populated without a type converter do not trigger
// target materializations for the operands of the root op.
Expand Down
39 changes: 39 additions & 0 deletions flang/test/Integration/prefetch.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM

!===============================================================================
! Test lowering of prefetch directive
!===============================================================================

subroutine test_prefetch_01()
! LLVM: {{.*}} = alloca i32, i64 1, align 4
! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4
! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4
! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4

integer :: i, j
integer :: a(256)

a = 23
! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

%6 should be updated to a variable with a meaningful name

!dir$ prefetch a
i = sum(a)
! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use variable for %5 or %{{.*}}

! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64
! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}}

! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1)
! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1)

do i = 1, (256 - 64)
!dir$ prefetch a(i+64), j
a(i) = a(i-32) + a(i+32) + j
end do
end subroutine test_prefetch_01
63 changes: 63 additions & 0 deletions flang/test/Lower/HLFIR/prefetch.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
! Test lowering of prefetch directive
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR

module test_prefetch_mod
implicit none
type :: t
integer :: a(256, 256)
end type t
end module test_prefetch_mod

subroutine test_prefetch_01()
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref<!fir.array<256xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<256xi32>>, !fir.ref<!fir.array<256xi32>>)
! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

integer :: i, j
integer :: a(256)

a = 23

! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256xi32>>
!dir$ prefetch a
i = sum(a)

! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref<i32>
! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32
! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow<nsw> : i32
! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>

! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>

do i = 1, (256 - 64)
!dir$ prefetch a(i+64), j
a(i) = a(i-32) + a(i+32) + j
end do
end subroutine test_prefetch_01

subroutine test_prefetch_02(t1)
use test_prefetch_mod
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_prefetch_02Ea"}
! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest_prefetch_02Et1"}
type(t), intent(inout) :: t1
integer, allocatable :: a(:, :)

! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}}
! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256x256xi32>>
!dir$ prefetch t1%a
a = t1%a ** 2

do i = 1, 256
! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
!dir$ prefetch a
a(i, :) = a(i, :) + i
do j = 1, 256
! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
!dir$ prefetch t1%a(i, j)
t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
end do
end do
end subroutine test_prefetch_02