Skip to content

Commit 79adc2b

Browse files
[Flang] Add FIR and LLVM lowering support for prefetch directive
* Add PrefetchOp in FirOps * Handle PrefetchOp in FIR Lowering and also pass required default values * Handle PrefetchOp in CodeGen.cpp * Add required tests
1 parent cf1f871 commit 79adc2b

File tree

5 files changed

+178
-9
lines changed

5 files changed

+178
-9
lines changed

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface,
351351
}];
352352
}
353353

354+
def fir_PrefetchOp : fir_Op<"prefetch", []> {
355+
let summary = "prefetch a memory reference";
356+
357+
let description = [{
358+
The prefetch is a hint to the code generator that the memory reference will
359+
be used in the near future. The prefetch is not guaranteed to be executed.
360+
361+
```
362+
%a = ... -> !fir.ref<i32>
363+
fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
364+
// ...
365+
fir.load %a : !fir.ref<i32> // use the prefetched value
366+
```
367+
}];
368+
369+
/// `memref' is the address to be prefetched
370+
/// `rw' : rw specifier >
371+
/// read is 0, write is 1
372+
/// `localityHint': temporal locality specifier >
373+
/// value ranging from 0 - no locality to 3 - extremely local
374+
/// `cacheType' : cache type specifier >
375+
/// instruction cache is 0, data cache is 1
376+
let arguments = (ins AnyReferenceLike:$memref,
377+
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$rw,
378+
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<3>]>:$localityHint,
379+
ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$cacheType);
380+
381+
let assemblyFormat = "$memref attr-dict `:` type(operands)";
382+
}
383+
354384
def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
355385
let summary = "copy constant size memory";
356386

flang/lib/Lower/Bridge.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3276,7 +3276,24 @@ class FirConverter : public Fortran::lower::AbstractConverter {
32763276
attachInliningDirectiveToStmt(dir, &eval);
32773277
},
32783278
[&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) {
3279-
TODO(getCurrentLocation(), "!$dir prefetch");
3279+
for (const auto &p : prefetch.v) {
3280+
Fortran::evaluate::ExpressionAnalyzer ea{
3281+
bridge.getSemanticsContext()};
3282+
Fortran::lower::SomeExpr expr{*ea.Analyze(
3283+
std::get<Fortran::parser::DataRef>(p.value().u))};
3284+
Fortran::lower::StatementContext stmtCtx;
3285+
mlir::Value memRef{Fortran::lower::convertExprToHLFIR(
3286+
genLocation(dir.source), *this, expr,
3287+
localSymbols, stmtCtx)
3288+
.getBase()};
3289+
3290+
// TODO: Don't use default value, instead get the following
3291+
// info from the directive
3292+
uint32_t isWrite{0}, localityHint{3}, isData{1};
3293+
builder->create<fir::PrefetchOp>(genLocation(dir.source),
3294+
memRef, isWrite, localityHint,
3295+
isData);
3296+
}
32803297
},
32813298
[&](const auto &) {}},
32823299
dir.u);

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3346,6 +3346,25 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
33463346
}
33473347
};
33483348

3349+
/// `fir.prefetch` --> `llvm.prefetch`
3350+
struct PrefetchOpConversion : public fir::FIROpConversion<fir::PrefetchOp> {
3351+
using FIROpConversion::FIROpConversion;
3352+
3353+
llvm::LogicalResult
3354+
matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor,
3355+
mlir::ConversionPatternRewriter &rewriter) const override {
3356+
llvm::errs() << "prefetch\n";
3357+
mlir::IntegerAttr rw = prefetch.getRwAttr();
3358+
mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr();
3359+
mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr();
3360+
mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(),
3361+
adaptor.getOperands().front(), rw,
3362+
localityHint, cacheType);
3363+
rewriter.eraseOp(prefetch);
3364+
return mlir::success();
3365+
}
3366+
};
3367+
33493368
/// `fir.load` --> `llvm.load`
33503369
struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
33513370
using FIROpConversion::FIROpConversion;
@@ -4423,14 +4442,15 @@ void fir::populateFIRToLLVMConversionPatterns(
44234442
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
44244443
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
44254444
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
4426-
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
4427-
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,
4428-
ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion,
4429-
SliceOpConversion, StoreOpConversion, StringLitOpConversion,
4430-
SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion,
4431-
UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion,
4432-
UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion,
4433-
XReboxOpConversion, ZeroOpConversion>(converter, options);
4445+
NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion,
4446+
SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion,
4447+
SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion,
4448+
ShiftOpConversion, SliceOpConversion, StoreOpConversion,
4449+
StringLitOpConversion, SubcOpConversion, TypeDescOpConversion,
4450+
TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion,
4451+
UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion,
4452+
XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter,
4453+
options);
44344454

44354455
// Patterns that are populated without a type converter do not trigger
44364456
// target materializations for the operands of the root op.
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM
10+
11+
!===============================================================================
12+
! Test lowering of prefetch directive
13+
!===============================================================================
14+
15+
subroutine test_prefetch_01()
16+
! LLVM: {{.*}} = alloca i32, i64 1, align 4
17+
! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4
18+
! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4
19+
! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4
20+
21+
integer :: i, j
22+
integer :: a(256)
23+
24+
a = 23
25+
! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1)
26+
!dir$ prefetch a
27+
i = sum(a)
28+
! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4
29+
! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64
30+
! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}}
31+
32+
! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1)
33+
! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1)
34+
35+
do i = 1, (256 - 64)
36+
!dir$ prefetch a(i+64), j
37+
a(i) = a(i-32) + a(i+32) + j
38+
end do
39+
end subroutine test_prefetch_01
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
! Test lowering of prefetch directive
2+
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR
3+
4+
module test_prefetch_mod
5+
implicit none
6+
type :: t
7+
integer :: a(256, 256)
8+
end type t
9+
end module test_prefetch_mod
10+
11+
subroutine test_prefetch_01()
12+
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref<!fir.array<256xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<256xi32>>, !fir.ref<!fir.array<256xi32>>)
13+
! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
14+
! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
15+
16+
integer :: i, j
17+
integer :: a(256)
18+
19+
a = 23
20+
21+
! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256xi32>>
22+
!dir$ prefetch a
23+
i = sum(a)
24+
25+
! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref<i32>
26+
! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32
27+
! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow<nsw> : i32
28+
! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64
29+
! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref<!fir.array<256xi32>>, i64) -> !fir.ref<i32>
30+
31+
! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
32+
! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
33+
34+
do i = 1, (256 - 64)
35+
!dir$ prefetch a(i+64), j
36+
a(i) = a(i-32) + a(i+32) + j
37+
end do
38+
end subroutine test_prefetch_01
39+
40+
subroutine test_prefetch_02(t1)
41+
use test_prefetch_mod
42+
! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_prefetch_02Ea"}
43+
! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFtest_prefetch_02Et1"}
44+
type(t), intent(inout) :: t1
45+
integer, allocatable :: a(:, :)
46+
47+
! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}}
48+
! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.array<256x256xi32>>
49+
!dir$ prefetch t1%a
50+
a = t1%a ** 2
51+
52+
do i = 1, 256
53+
! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
54+
!dir$ prefetch a
55+
a(i, :) = a(i, :) + i
56+
do j = 1, 256
57+
! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}}
58+
! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref<i32>
59+
!dir$ prefetch t1%a(i, j)
60+
t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j)
61+
end do
62+
end do
63+
end subroutine test_prefetch_02

0 commit comments

Comments
 (0)