From 79adc2b620b6cbecff043a581412eeab3ca4a255 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Mon, 10 Nov 2025 06:11:37 +0000 Subject: [PATCH 01/14] [Flang] Add FIR and LLVM lowering support for prefetch directive * Add PrefetchOp in FirOps * Handle PrefetchOp in FIR Lowering and also pass required default values * Handle PrefetchOp in CodeGen.cpp * Add required tests --- .../include/flang/Optimizer/Dialect/FIROps.td | 30 +++++++++ flang/lib/Lower/Bridge.cpp | 19 +++++- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 36 ++++++++--- flang/test/Integration/prefetch.f90 | 39 ++++++++++++ flang/test/Lower/HLFIR/prefetch.f90 | 63 +++++++++++++++++++ 5 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 flang/test/Integration/prefetch.f90 create mode 100644 flang/test/Lower/HLFIR/prefetch.f90 diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index bae52d63fda45..84d7ed29292ae 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -351,6 +351,36 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface, }]; } +def fir_PrefetchOp : fir_Op<"prefetch", []> { + let summary = "prefetch a memory reference"; + + let description = [{ + The prefetch is a hint to the code generator that the memory reference will + be used in the near future. The prefetch is not guaranteed to be executed. + + ``` + %a = ... -> !fir.ref + fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + // ... + fir.load %a : !fir.ref // use the prefetched value + ``` + }]; + + /// `memref' is the address to be prefetched + /// `rw' : rw specifier > + /// read is 0, write is 1 + /// `localityHint': temporal locality specifier > + /// value ranging from 0 - no locality to 3 - extremely local + /// `cacheType' : cache type specifier > + /// instruction cache is 0, data cache is 1 + let arguments = (ins AnyReferenceLike:$memref, + ConfinedAttr, IntMaxValue<1>]>:$rw, + ConfinedAttr, IntMaxValue<3>]>:$localityHint, + ConfinedAttr, IntMaxValue<1>]>:$cacheType); + + let assemblyFormat = "$memref attr-dict `:` type(operands)"; +} + def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods]> { let summary = "copy constant size memory"; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5779bcd5d293c..9d8e765e8adea 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3276,7 +3276,24 @@ class FirConverter : public Fortran::lower::AbstractConverter { attachInliningDirectiveToStmt(dir, &eval); }, [&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) { - TODO(getCurrentLocation(), "!$dir prefetch"); + for (const auto &p : prefetch.v) { + Fortran::evaluate::ExpressionAnalyzer ea{ + bridge.getSemanticsContext()}; + Fortran::lower::SomeExpr expr{*ea.Analyze( + std::get(p.value().u))}; + Fortran::lower::StatementContext stmtCtx; + mlir::Value memRef{Fortran::lower::convertExprToHLFIR( + genLocation(dir.source), *this, expr, + localSymbols, stmtCtx) + .getBase()}; + + // TODO: Don't use default value, instead get the following + // info from the directive + uint32_t isWrite{0}, localityHint{3}, isData{1}; + builder->create(genLocation(dir.source), + memRef, isWrite, localityHint, + isData); + } }, [&](const auto &) {}}, dir.u); diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ca4aefb653d2a..69734a2ae443d 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3346,6 +3346,25 @@ struct GlobalOpConversion : public fir::FIROpConversion { } }; +/// `fir.prefetch` --> `llvm.prefetch` +struct PrefetchOpConversion : public fir::FIROpConversion { + using FIROpConversion::FIROpConversion; + + llvm::LogicalResult + matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + llvm::errs() << "prefetch\n"; + mlir::IntegerAttr rw = prefetch.getRwAttr(); + mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr(); + mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr(); + mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(), + adaptor.getOperands().front(), rw, + localityHint, cacheType); + rewriter.eraseOp(prefetch); + return mlir::success(); + } +}; + /// `fir.load` --> `llvm.load` struct LoadOpConversion : public fir::FIROpConversion { using FIROpConversion::FIROpConversion; @@ -4423,14 +4442,15 @@ void fir::populateFIRToLLVMConversionPatterns( FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, - NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, - SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion, - ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion, - SliceOpConversion, StoreOpConversion, StringLitOpConversion, - SubcOpConversion, TypeDescOpConversion, TypeInfoOpConversion, - UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, - UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, - XReboxOpConversion, ZeroOpConversion>(converter, options); + NegcOpConversion, NoReassocOpConversion, PrefetchOpConversion, + SelectCaseOpConversion, SelectOpConversion, SelectRankOpConversion, + SelectTypeOpConversion, ShapeOpConversion, ShapeShiftOpConversion, + ShiftOpConversion, SliceOpConversion, StoreOpConversion, + StringLitOpConversion, SubcOpConversion, TypeDescOpConversion, + TypeInfoOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, + UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion, + XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(converter, + options); // Patterns that are populated without a type converter do not trigger // target materializations for the operands of the root op. diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90 new file mode 100644 index 0000000000000..1f7f6d091cfaa --- /dev/null +++ b/flang/test/Integration/prefetch.f90 @@ -0,0 +1,39 @@ +!===----------------------------------------------------------------------===! +! This directory can be used to add Integration tests involving multiple +! stages of the compiler (for eg. from Fortran to LLVM IR). It should not +! contain executable tests. We should only add tests here sparingly and only +! if there is no other way to test. Repeat this message in each test that is +! added to this directory and sub-directories. +!===----------------------------------------------------------------------===! + +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=LLVM + +!=============================================================================== +! Test lowering of prefetch directive +!=============================================================================== + +subroutine test_prefetch_01() + ! LLVM: {{.*}} = alloca i32, i64 1, align 4 + ! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4 + + integer :: i, j + integer :: a(256) + + a = 23 + ! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1) + !dir$ prefetch a + i = sum(a) + ! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4 + ! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64 + ! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}} + + ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1) + ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1) + + do i = 1, (256 - 64) + !dir$ prefetch a(i+64), j + a(i) = a(i-32) + a(i+32) + j + end do +end subroutine test_prefetch_01 diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 new file mode 100644 index 0000000000000..3fe0a1a18c4c3 --- /dev/null +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -0,0 +1,63 @@ +! Test lowering of prefetch directive +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s --check-prefixes=HLFIR + +module test_prefetch_mod + implicit none + type :: t + integer :: a(256, 256) + end type t +end module test_prefetch_mod + +subroutine test_prefetch_01() + ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ea"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + ! HLFIR: %[[H_I:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! HLFIR: %[[H_J:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "_QFtest_prefetch_01Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) + + integer :: i, j + integer :: a(256) + + a = 23 + + ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref> + !dir$ prefetch a + i = sum(a) + + ! HLFIR: %[[H_LOAD:.*]] = fir.load %[[H_I]]#0 : !fir.ref + ! HLFIR: %[[H_C64:.*]] = arith.constant 64 : i32 + ! HLFIR: %[[H_ADD:.*]] = arith.addi %[[H_LOAD]], %[[H_C64]] overflow : i32 + ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64 + ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref>, i64) -> !fir.ref + + ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + + do i = 1, (256 - 64) + !dir$ prefetch a(i+64), j + a(i) = a(i-32) + a(i+32) + j + end do +end subroutine test_prefetch_01 + +subroutine test_prefetch_02(t1) + use test_prefetch_mod + ! HLFIR: %[[H_A:.*]]:2 = hlfir.declare {{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_prefetch_02Ea"} + ! HLFIR: %[[H_ARG0:.*]]:2 = hlfir.declare {{.*}} dummy_scope {{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_prefetch_02Et1"} + type(t), intent(inout) :: t1 + integer, allocatable :: a(:, :) + + ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}} + ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref> + !dir$ prefetch t1%a + a = t1%a ** 2 + + do i = 1, 256 + ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref>>> + !dir$ prefetch a + a(i, :) = a(i, :) + i + do j = 1, 256 + ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}} + ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + !dir$ prefetch t1%a(i, j) + t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j) + end do + end do +end subroutine test_prefetch_02 From 32738eb0c716e7d8e19a28f28c861577af804c14 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Mon, 10 Nov 2025 15:49:33 +0000 Subject: [PATCH 02/14] Fix the build failure --- flang/lib/Lower/Bridge.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 9d8e765e8adea..ce6b9f58bbfd7 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3290,9 +3290,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { // TODO: Don't use default value, instead get the following // info from the directive uint32_t isWrite{0}, localityHint{3}, isData{1}; - builder->create(genLocation(dir.source), - memRef, isWrite, localityHint, - isData); + fir::PrefetchOp::create(*builder, genLocation(dir.source), + memRef, isWrite, localityHint, isData); } }, [&](const auto &) {}}, From 9557cd0f4b118d9b95e932a0d7c61b238c49d4d0 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 11 Nov 2025 03:08:15 +0000 Subject: [PATCH 03/14] Remove debug print --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 69734a2ae443d..adc5a50c45fff 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3353,7 +3353,6 @@ struct PrefetchOpConversion : public fir::FIROpConversion { llvm::LogicalResult matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - llvm::errs() << "prefetch\n"; mlir::IntegerAttr rw = prefetch.getRwAttr(); mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr(); mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr(); From c28bbc29f184f024ea31e113be9a718c252d64c7 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 11 Nov 2025 08:38:05 +0000 Subject: [PATCH 04/14] Use UnitAttr instead of I32Attr --- flang/include/flang/Optimizer/Dialect/FIROps.td | 12 ++++++------ flang/lib/Optimizer/CodeGen/CodeGen.cpp | 6 ++++-- flang/test/Lower/HLFIR/prefetch.f90 | 12 ++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 84d7ed29292ae..4f7abb1d052b7 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -368,15 +368,15 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { /// `memref' is the address to be prefetched /// `rw' : rw specifier > - /// read is 0, write is 1 + /// read is 0 (default), write is 1 /// `localityHint': temporal locality specifier > /// value ranging from 0 - no locality to 3 - extremely local /// `cacheType' : cache type specifier > - /// instruction cache is 0, data cache is 1 - let arguments = (ins AnyReferenceLike:$memref, - ConfinedAttr, IntMaxValue<1>]>:$rw, + /// instruction cache is 0 (default), data cache is 1 + /// NOTE: The numerical values used here is in reference to the LLVM LangRef + let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw, ConfinedAttr, IntMaxValue<3>]>:$localityHint, - ConfinedAttr, IntMaxValue<1>]>:$cacheType); + UnitAttr:$cacheType); let assemblyFormat = "$memref attr-dict `:` type(operands)"; } @@ -974,7 +974,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank", Example: ``` fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box>) -> !fir.box> - ``` + ``` }]; let arguments = (ins diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index adc5a50c45fff..8a1fe5fc5d988 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3353,9 +3353,11 @@ struct PrefetchOpConversion : public fir::FIROpConversion { llvm::LogicalResult matchAndRewrite(fir::PrefetchOp prefetch, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - mlir::IntegerAttr rw = prefetch.getRwAttr(); + mlir::IntegerAttr rw = mlir::IntegerAttr::get(rewriter.getI32Type(), + prefetch.getRwAttr() ? 1 : 0); mlir::IntegerAttr localityHint = prefetch.getLocalityHintAttr(); - mlir::IntegerAttr cacheType = prefetch.getCacheTypeAttr(); + mlir::IntegerAttr cacheType = mlir::IntegerAttr::get( + rewriter.getI32Type(), prefetch.getCacheTypeAttr() ? 1 : 0); mlir::LLVM::Prefetch::create(rewriter, prefetch.getLoc(), adaptor.getOperands().front(), rw, localityHint, cacheType); diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 index 3fe0a1a18c4c3..2f33a78b0b396 100644 --- a/flang/test/Lower/HLFIR/prefetch.f90 +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -18,7 +18,7 @@ subroutine test_prefetch_01() a = 23 - ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref> + ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref> !dir$ prefetch a i = sum(a) @@ -28,8 +28,8 @@ subroutine test_prefetch_01() ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64 ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref>, i64) -> !fir.ref - ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref - ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType, localityHint = 3 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref do i = 1, (256 - 64) !dir$ prefetch a(i+64), j @@ -45,17 +45,17 @@ subroutine test_prefetch_02(t1) integer, allocatable :: a(:, :) ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}} - ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref> + ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType, localityHint = 3 : i32} : !fir.ref> !dir$ prefetch t1%a a = t1%a ** 2 do i = 1, 256 - ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref>>> + ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref>>> !dir$ prefetch a a(i, :) = a(i, :) + i do j = 1, 256 ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}} - ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType, localityHint = 3 : i32} : !fir.ref !dir$ prefetch t1%a(i, j) t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j) end do From 7c921f805899c8d46cec3c5a268373bf0174e01c Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 11 Nov 2025 08:38:30 +0000 Subject: [PATCH 05/14] Fix the tests --- flang/test/Integration/prefetch.f90 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flang/test/Integration/prefetch.f90 b/flang/test/Integration/prefetch.f90 index 1f7f6d091cfaa..f3fb7a950e328 100644 --- a/flang/test/Integration/prefetch.f90 +++ b/flang/test/Integration/prefetch.f90 @@ -14,24 +14,24 @@ subroutine test_prefetch_01() ! LLVM: {{.*}} = alloca i32, i64 1, align 4 - ! LLVM: %[[L_J:.*]] = alloca i32, i64 1, align 4 - ! LLVM: %[[L_I:.*]] = alloca i32, i64 1, align 4 - ! LLVM: %[[L_A:.*]] = alloca [256 x i32], i64 1, align 4 + ! LLVM: %[[VAR_J:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[VAR_I:.*]] = alloca i32, i64 1, align 4 + ! LLVM: %[[VAR_A:.*]] = alloca [256 x i32], i64 1, align 4 integer :: i, j integer :: a(256) a = 23 - ! LLVM: call void @llvm.prefetch.p0(ptr %6, i32 0, i32 3, i32 1) + ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_A]], i32 0, i32 3, i32 1) !dir$ prefetch a i = sum(a) - ! LLVM: %[[L_LOAD:.*]] = load i32, ptr %5, align 4 - ! LLVM: %[[L_ADD:.*]] = add nsw i32 %[[L_LOAD]], 64 - ! LLVM: %[[L_GEP:.*]] = getelementptr i32, ptr %[[L_A]], i64 {{.*}} - ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_GEP]], i32 0, i32 3, i32 1) - ! LLVM: call void @llvm.prefetch.p0(ptr %[[L_J]], i32 0, i32 3, i32 1) + ! LLVM: %[[LOAD_I:.*]] = load i32, ptr %[[VAR_I]], align 4 + ! LLVM: %{{.*}} = add nsw i32 %[[LOAD_I]], 64 + ! LLVM: %[[GEP_A:.*]] = getelementptr i32, ptr %[[VAR_A]], i64 {{.*}} + ! LLVM: call void @llvm.prefetch.p0(ptr %[[GEP_A]], i32 0, i32 3, i32 1) + ! LLVM: call void @llvm.prefetch.p0(ptr %[[VAR_J]], i32 0, i32 3, i32 1) do i = 1, (256 - 64) !dir$ prefetch a(i+64), j a(i) = a(i-32) + a(i+32) + j From 7c98c645fe7c4978a4d4ede15f65c3b04a25fbe5 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Thu, 13 Nov 2025 06:12:20 +0000 Subject: [PATCH 06/14] Revert a space fix --- flang/include/flang/Optimizer/Dialect/FIROps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 4f7abb1d052b7..4b6eb98bc3530 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -974,7 +974,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank", Example: ``` fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box>) -> !fir.box> - ``` + ``` }]; let arguments = (ins From 1da8a9517cec8ba2babad0750408a8f811c951b2 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Thu, 13 Nov 2025 15:52:08 +0000 Subject: [PATCH 07/14] Use Data instead of Box for allocatable array --- flang/lib/Lower/Bridge.cpp | 13 +++++++++---- flang/test/Lower/HLFIR/prefetch.f90 | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index ce6b9f58bbfd7..d6062bbabf821 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3282,16 +3282,21 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::SomeExpr expr{*ea.Analyze( std::get(p.value().u))}; Fortran::lower::StatementContext stmtCtx; + mlir::Location loc = genLocation(dir.source); mlir::Value memRef{Fortran::lower::convertExprToHLFIR( - genLocation(dir.source), *this, expr, - localSymbols, stmtCtx) + loc, *this, expr, localSymbols, stmtCtx) .getBase()}; + if (mlir::isa( + fir::unwrapRefType(memRef.getType()))) { + memRef = fir::LoadOp::create(*builder, loc, memRef); + memRef = fir::BoxAddrOp::create(*builder, loc, memRef); + } // TODO: Don't use default value, instead get the following // info from the directive uint32_t isWrite{0}, localityHint{3}, isData{1}; - fir::PrefetchOp::create(*builder, genLocation(dir.source), - memRef, isWrite, localityHint, isData); + fir::PrefetchOp::create(*builder, loc, memRef, isWrite, + localityHint, isData); } }, [&](const auto &) {}}, diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 index 2f33a78b0b396..2a30584d94563 100644 --- a/flang/test/Lower/HLFIR/prefetch.f90 +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -50,7 +50,9 @@ subroutine test_prefetch_02(t1) a = t1%a ** 2 do i = 1, 256 - ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref>>> + ! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref>>> + ! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box>>) -> !fir.heap> + ! HLFIR: fir.prefetch %[[A_BOX]] {cacheType, localityHint = 3 : i32} : !fir.heap> !dir$ prefetch a a(i, :) = a(i, :) + i do j = 1, 256 From ed117745c512deea862dd923287a130310f52bb7 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Wed, 26 Nov 2025 12:15:04 +0530 Subject: [PATCH 08/14] Fix the FIROps prefetch description --- flang/include/flang/Optimizer/Dialect/FIROps.td | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 4b6eb98bc3530..78376a3ce17bc 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -360,7 +360,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { ``` %a = ... -> !fir.ref - fir.prefetch %a {cacheType = 1 : i32, localityHint = 3 : i32, rw = 0 : i32} : !fir.ref + fir.prefetch %a {cacheType, localityHint = 3 : i32} : !fir.ref // ... fir.load %a : !fir.ref // use the prefetched value ``` @@ -372,11 +372,14 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { /// `localityHint': temporal locality specifier > /// value ranging from 0 - no locality to 3 - extremely local /// `cacheType' : cache type specifier > - /// instruction cache is 0 (default), data cache is 1 + /// instruction cache is 0, data cache is 1 (default) /// NOTE: The numerical values used here is in reference to the LLVM LangRef - let arguments = (ins AnyReferenceLike:$memref, UnitAttr:$rw, - ConfinedAttr, IntMaxValue<3>]>:$localityHint, - UnitAttr:$cacheType); + let arguments = + (ins Arg : $memref, + UnitAttr : $rw, + ConfinedAttr, IntMaxValue<3>]> : $localityHint, + UnitAttr : $cacheType); let assemblyFormat = "$memref attr-dict `:` type(operands)"; } @@ -974,7 +977,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank", Example: ``` fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box>) -> !fir.box> - ``` + ``` }]; let arguments = (ins From e7ab97474bf1514a9f3b8455e1555917cbc8dd25 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 9 Dec 2025 09:47:32 +0530 Subject: [PATCH 09/14] Improve the arguments repesentation in FIR --- .../include/flang/Optimizer/Dialect/FIROps.td | 4 +- flang/lib/Optimizer/Dialect/FIROps.cpp | 81 +++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 78376a3ce17bc..55ce0be027e59 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -360,7 +360,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { ``` %a = ... -> !fir.ref - fir.prefetch %a {cacheType, localityHint = 3 : i32} : !fir.ref + fir.prefetch %a {read, data, localityHint = 3 : i32} : !fir.ref // ... fir.load %a : !fir.ref // use the prefetched value ``` @@ -381,7 +381,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { ConfinedAttr, IntMaxValue<3>]> : $localityHint, UnitAttr : $cacheType); - let assemblyFormat = "$memref attr-dict `:` type(operands)"; + let hasCustomAssemblyFormat = 1; } def fir_CopyOp : fir_Op<"copy", [DeclareOpInterfaceMethods]> { diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 4f97acaa88b7a..20bcbedda7fc9 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4264,6 +4264,87 @@ void fir::StoreOp::getEffects( addVolatileMemoryEffects({getMemref().getType()}, effects); } +//===----------------------------------------------------------------------===// +// PrefetchOp +//===----------------------------------------------------------------------===// + +mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + mlir::OpAsmParser::UnresolvedOperand memref; + // mlir::Type type = parser.getBuilder().getIntegerType(64); + if (parser.parseOperand(memref)) + return mlir::failure(); + + if (mlir::succeeded(parser.parseLBrace())) { + llvm::StringRef kw; + if (parser.parseKeyword(&kw)) + return mlir::failure(); + + if (kw == "read") + result.addAttribute("rw", parser.getBuilder().getBoolAttr(false)); + else if (kw == "write") + result.addAttribute("rw", parser.getBuilder().getUnitAttr()); + else + return parser.emitError(parser.getCurrentLocation(), + "Expected either read or write keyword"); + + if (parser.parseComma()) + return mlir::failure(); + + if (parser.parseKeyword(&kw)) + return mlir::failure(); + if (kw == "instruction") { + llvm::errs() << "intruc\n"; + result.addAttribute("cacheType", parser.getBuilder().getBoolAttr(false)); + } else if (kw == "data") { + llvm::errs() << "data\n"; + result.addAttribute("cacheType", parser.getBuilder().getUnitAttr()); + } else + return parser.emitError(parser.getCurrentLocation(), + "Expected either read or write keyword"); + + if (parser.parseComma()) + return mlir::failure(); + + if (mlir::succeeded(parser.parseKeyword("localityHint"))) { + if (parser.parseEqual()) + return mlir::failure(); + mlir::Attribute intAttr; + if (parser.parseAttribute(intAttr)) + return mlir::failure(); + result.addAttribute("localityHint", intAttr); + } + if (parser.parseRBrace()) + return mlir::failure(); + } + mlir::Type type; + if (parser.parseColonType(type)) + return mlir::failure(); + + if (parser.resolveOperand(memref, type, result.operands)) + return mlir::failure(); + return ::mlir::success(); +} + +void fir::PrefetchOp::print(mlir::OpAsmPrinter &p) { + p << " "; + p.printOperand(getMemref()); + p << " {"; + if (getRw()) + p << "write"; + else + p << "read"; + p << ", "; + if (getCacheType()) + p << "data"; + else + p << "instruction"; + p << ", localityHint = "; + p << getLocalityHint(); + p << " : " << getLocalityHintAttr().getType(); + p << "} : " << getMemref().getType(); +} + //===----------------------------------------------------------------------===// // CopyOp //===----------------------------------------------------------------------===// From d5aedb76d7587d22f15f415c5c2ccc9706144696 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 9 Dec 2025 11:44:50 +0530 Subject: [PATCH 10/14] Revert the space fix --- flang/include/flang/Optimizer/Dialect/FIROps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 55ce0be027e59..ee76606820b9e 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -977,7 +977,7 @@ def fir_ReboxAssumedRankOp : fir_Op<"rebox_assumed_rank", Example: ``` fir.rebox_assumed_rank %1 lbs zeroes : (!fir.box>) -> !fir.box> - ``` + ``` }]; let arguments = (ins From bc1f153c119c422fe8bdeae4130fd95aa66450ec Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 9 Dec 2025 11:55:09 +0530 Subject: [PATCH 11/14] Fix variable name --- flang/lib/Lower/Bridge.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index d6062bbabf821..5718f631a5e50 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3294,8 +3294,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { // TODO: Don't use default value, instead get the following // info from the directive - uint32_t isWrite{0}, localityHint{3}, isData{1}; - fir::PrefetchOp::create(*builder, loc, memRef, isWrite, + uint32_t isRead{0}, localityHint{3}, isData{1}; + fir::PrefetchOp::create(*builder, loc, memRef, isRead, localityHint, isData); } }, From 85789d9f75d4a52efc46869aef90244f02f4e04a Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 9 Dec 2025 11:55:26 +0530 Subject: [PATCH 12/14] Fix test failure --- flang/test/Lower/HLFIR/prefetch.f90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 index 2a30584d94563..974454d15903f 100644 --- a/flang/test/Lower/HLFIR/prefetch.f90 +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -18,7 +18,7 @@ subroutine test_prefetch_01() a = 23 - ! HLFIR: fir.prefetch %[[H_A]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref> + ! HLFIR: fir.prefetch %[[H_A]]#0 {read, data, localityHint = 3 : i32} : !fir.ref> !dir$ prefetch a i = sum(a) @@ -28,8 +28,8 @@ subroutine test_prefetch_01() ! HLFIR: %[[H_CON:.*]] = fir.convert %[[H_ADD]] : (i32) -> i64 ! HLFIR: %[[H_DESIG:.*]] = hlfir.designate %[[H_A]]#0 (%[[H_CON]]) : (!fir.ref>, i64) -> !fir.ref - ! HLFIR: fir.prefetch %[[H_DESIG]] {cacheType, localityHint = 3 : i32} : !fir.ref - ! HLFIR: fir.prefetch %[[H_J]]#0 {cacheType, localityHint = 3 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_DESIG]] {read, data, localityHint = 3 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_J]]#0 {read, data, localityHint = 3 : i32} : !fir.ref do i = 1, (256 - 64) !dir$ prefetch a(i+64), j @@ -45,19 +45,19 @@ subroutine test_prefetch_02(t1) integer, allocatable :: a(:, :) ! HLFIR: %[[H_DESIG_01:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} shape {{.*}} - ! HLFIR: fir.prefetch %[[H_DESIG_01]] {cacheType, localityHint = 3 : i32} : !fir.ref> + ! HLFIR: fir.prefetch %[[H_DESIG_01]] {read, data, localityHint = 3 : i32} : !fir.ref> !dir$ prefetch t1%a a = t1%a ** 2 do i = 1, 256 ! HLFIR: %[[A_LOAD:.*]] = fir.load %[[H_A]]#0 : !fir.ref>>> ! HLFIR: %[[A_BOX:.*]] = fir.box_addr %[[A_LOAD]] : (!fir.box>>) -> !fir.heap> - ! HLFIR: fir.prefetch %[[A_BOX]] {cacheType, localityHint = 3 : i32} : !fir.heap> + ! HLFIR: fir.prefetch %[[A_BOX]] {read, data, localityHint = 3 : i32} : !fir.heap> !dir$ prefetch a a(i, :) = a(i, :) + i do j = 1, 256 ! HLFIR: %[[H_DESIG_02:.*]] = hlfir.designate %[[H_ARG0]]#0{"a"} {{.*}} - ! HLFIR: fir.prefetch %[[H_DESIG_02]] {cacheType, localityHint = 3 : i32} : !fir.ref + ! HLFIR: fir.prefetch %[[H_DESIG_02]] {read, data, localityHint = 3 : i32} : !fir.ref !dir$ prefetch t1%a(i, j) t1%a(i, j) = (a(i, j) + i*j) / t1%a(i, j) end do From 2085e9295e4a1fd6dfc550eb7d6410641ec16905 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Tue, 9 Dec 2025 12:18:35 +0530 Subject: [PATCH 13/14] Fix the code --- flang/lib/Optimizer/Dialect/FIROps.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 20bcbedda7fc9..e509b7cc302f1 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4271,7 +4271,6 @@ void fir::StoreOp::getEffects( mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser, mlir::OperationState &result) { mlir::OpAsmParser::UnresolvedOperand memref; - // mlir::Type type = parser.getBuilder().getIntegerType(64); if (parser.parseOperand(memref)) return mlir::failure(); @@ -4294,14 +4293,12 @@ mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser, if (parser.parseKeyword(&kw)) return mlir::failure(); if (kw == "instruction") { - llvm::errs() << "intruc\n"; result.addAttribute("cacheType", parser.getBuilder().getBoolAttr(false)); } else if (kw == "data") { - llvm::errs() << "data\n"; result.addAttribute("cacheType", parser.getBuilder().getUnitAttr()); } else return parser.emitError(parser.getCurrentLocation(), - "Expected either read or write keyword"); + "Expected either intruction or data keyword"); if (parser.parseComma()) return mlir::failure(); From 6a2bd3ecf4d227c8c0e7874ae511ec55dd43db82 Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Wed, 10 Dec 2025 12:35:55 +0530 Subject: [PATCH 14/14] Address the review comments --- flang/include/flang/Optimizer/Dialect/FIROps.td | 2 +- flang/lib/Lower/Bridge.cpp | 4 ++-- flang/lib/Optimizer/Dialect/FIROps.cpp | 2 +- flang/test/Lower/HLFIR/prefetch.f90 | 8 ++++++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index ee76606820b9e..bcbf1e1b9e5f5 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -376,7 +376,7 @@ def fir_PrefetchOp : fir_Op<"prefetch", []> { /// NOTE: The numerical values used here is in reference to the LLVM LangRef let arguments = (ins Arg : $memref, + "prefetch memory address", [MemRead]> : $memref, UnitAttr : $rw, ConfinedAttr, IntMaxValue<3>]> : $localityHint, UnitAttr : $cacheType); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5718f631a5e50..6829f8d6c6c07 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3288,8 +3288,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { .getBase()}; if (mlir::isa( fir::unwrapRefType(memRef.getType()))) { - memRef = fir::LoadOp::create(*builder, loc, memRef); - memRef = fir::BoxAddrOp::create(*builder, loc, memRef); + memRef = fir::BoxAddrOp::create( + *builder, loc, builder->loadIfRef(loc, memRef)); } // TODO: Don't use default value, instead get the following diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index e509b7cc302f1..e04a47e82dd53 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4320,7 +4320,7 @@ mlir::ParseResult fir::PrefetchOp::parse(mlir::OpAsmParser &parser, if (parser.resolveOperand(memref, type, result.operands)) return mlir::failure(); - return ::mlir::success(); + return mlir::success(); } void fir::PrefetchOp::print(mlir::OpAsmPrinter &p) { diff --git a/flang/test/Lower/HLFIR/prefetch.f90 b/flang/test/Lower/HLFIR/prefetch.f90 index 974454d15903f..b51babc522ff1 100644 --- a/flang/test/Lower/HLFIR/prefetch.f90 +++ b/flang/test/Lower/HLFIR/prefetch.f90 @@ -63,3 +63,11 @@ subroutine test_prefetch_02(t1) end do end do end subroutine test_prefetch_02 + +subroutine test_prefetch_03(a) + integer :: a(:) + ! HLFIR: %[[BOX:.*]] = fir.box_addr {{.*}} : (!fir.box>) -> !fir.ref> + ! HLFIR: fir.prefetch %[[BOX]] {read, data, localityHint = 3 : i32} : !fir.ref> + !dir$ prefetch a + a = sum(a) +end subroutine test_prefetch_03