Skip to content

Conversation

@Jaddyen
Copy link
Contributor

@Jaddyen Jaddyen commented Jul 29, 2025

This patch lowers memref.copy to emitc.call_opaque "memcpy".
From:

func.func @copying(%arg0 : memref<9x4x5x7xf32>, %arg1 : memref<9x4x5x7xf32>) {
  memref.copy %arg0, %arg1 : memref<9x4x5x7xf32> to memref<9x4x5x7xf32>
  return
}

To:

#include <cstring>
void copying(float v1[9][4][5][7], float v2[9][4][5][7]) {
  size_t v3 = 0;
  float* v4 = &v2[v3][v3][v3][v3];
  float* v5 = &v1[v3][v3][v3][v3];
  size_t v6 = sizeof(float);
  size_t v7 = 1260;
  size_t v8 = v6 * v7;
  memcpy(v5, v4, v8);
  return;
}

@Jaddyen Jaddyen marked this pull request as ready for review July 29, 2025 18:23
@llvmbot
Copy link
Member

llvmbot commented Jul 29, 2025

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-emitc

Author: Jaden Angella (Jaddyen)

Changes

This patch lowers memref.copy to emitc.call_opaque "memcpy".
From:

func.func @<!-- -->copying(%arg0 : memref&lt;9x4xf32&gt;) {
  memref.copy %arg0, %arg0 : memref&lt;9x4xf32&gt; to memref&lt;9x4xf32&gt;
  return
}

To:

void copying(float v1[9][4]) {
  size_t v2 = 0;
  size_t v3 = 0;
  float* v4 = &amp;v1[v2][v3];
  size_t v5 = 0;
  size_t v6 = 0;
  float* v7 = &amp;v1[v5][v6];
  size_t v8 = sizeof(float);
  size_t v9 = 36;
  size_t v10 = v8 * v9;
  memcpy(v7, v4, v10);
  return;
}

Full diff: https://github.com/llvm/llvm-project/pull/151206.diff

5 Files Affected:

  • (modified) mlir/include/mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h (+2)
  • (modified) mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp (+75-2)
  • (modified) mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp (+20-7)
  • (added) mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-copy.mlir (+24)
  • (modified) mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir (-8)
diff --git a/mlir/include/mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h b/mlir/include/mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h
index b595b6a308bea..4ea6649d64a92 100644
--- a/mlir/include/mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h
+++ b/mlir/include/mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h
@@ -10,8 +10,10 @@
 
 constexpr const char *alignedAllocFunctionName = "aligned_alloc";
 constexpr const char *mallocFunctionName = "malloc";
+constexpr const char *memcpyFunctionName = "memcpy";
 constexpr const char *cppStandardLibraryHeader = "cstdlib";
 constexpr const char *cStandardLibraryHeader = "stdlib.h";
+constexpr const char *stringLibraryHeader = "string.h";
 
 namespace mlir {
 class DialectRegistry;
diff --git a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp
index 6bd0e2d4d4b08..adb0eb77fdf35 100644
--- a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp
+++ b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp
@@ -97,6 +97,29 @@ Type convertMemRefType(MemRefType opTy, const TypeConverter *typeConverter) {
   return resultTy;
 }
 
+Value calculateMemrefTotalSizeBytes(Location loc, MemRefType memrefType,
+                                    ConversionPatternRewriter &rewriter) {
+  emitc::CallOpaqueOp elementSize = rewriter.create<emitc::CallOpaqueOp>(
+      loc, emitc::SizeTType::get(rewriter.getContext()),
+      rewriter.getStringAttr("sizeof"), ValueRange{},
+      ArrayAttr::get(rewriter.getContext(),
+                     {TypeAttr::get(memrefType.getElementType())}));
+
+  IndexType indexType = rewriter.getIndexType();
+  int64_t numElements = 1;
+  for (int64_t dimSize : memrefType.getShape()) {
+    numElements *= dimSize;
+  }
+  emitc::ConstantOp numElementsValue = rewriter.create<emitc::ConstantOp>(
+      loc, indexType, rewriter.getIndexAttr(numElements));
+
+  Type sizeTType = emitc::SizeTType::get(rewriter.getContext());
+  emitc::MulOp totalSizeBytes = rewriter.create<emitc::MulOp>(
+      loc, sizeTType, elementSize.getResult(0), numElementsValue);
+
+  return totalSizeBytes.getResult();
+}
+
 struct ConvertAlloc final : public OpConversionPattern<memref::AllocOp> {
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
@@ -159,6 +182,55 @@ struct ConvertAlloc final : public OpConversionPattern<memref::AllocOp> {
   }
 };
 
+struct ConvertCopy final : public OpConversionPattern<memref::CopyOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::CopyOp copyOp, OpAdaptor operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    Location loc = copyOp.getLoc();
+    MemRefType srcMemrefType =
+        dyn_cast<MemRefType>(copyOp.getSource().getType());
+    MemRefType targetMemrefType =
+        dyn_cast<MemRefType>(copyOp.getTarget().getType());
+
+    if (!isMemRefTypeLegalForEmitC(srcMemrefType) ||
+        !isMemRefTypeLegalForEmitC(targetMemrefType)) {
+      return rewriter.notifyMatchFailure(
+          loc, "incompatible memref type for EmitC conversion");
+    }
+
+    emitc::ConstantOp zeroIndex = rewriter.create<emitc::ConstantOp>(
+        loc, rewriter.getIndexType(), rewriter.getIndexAttr(0));
+    auto srcArrayValue =
+        dyn_cast<TypedValue<emitc::ArrayType>>(operands.getSource());
+
+    emitc::SubscriptOp srcSubPtr = rewriter.create<emitc::SubscriptOp>(
+        loc, srcArrayValue, ValueRange{zeroIndex, zeroIndex});
+    emitc::ApplyOp srcPtr = rewriter.create<emitc::ApplyOp>(
+        loc, emitc::PointerType::get(srcMemrefType.getElementType()),
+        rewriter.getStringAttr("&"), srcSubPtr);
+
+    auto arrayValue =
+        dyn_cast<TypedValue<emitc::ArrayType>>(operands.getTarget());
+    emitc::SubscriptOp targetSubPtr = rewriter.create<emitc::SubscriptOp>(
+        loc, arrayValue, ValueRange{zeroIndex, zeroIndex});
+    emitc::ApplyOp targetPtr = rewriter.create<emitc::ApplyOp>(
+        loc, emitc::PointerType::get(targetMemrefType.getElementType()),
+        rewriter.getStringAttr("&"), targetSubPtr);
+
+    emitc::CallOpaqueOp memCpyCall = rewriter.create<emitc::CallOpaqueOp>(
+        loc, TypeRange{}, "memcpy",
+        ValueRange{
+            targetPtr.getResult(), srcPtr.getResult(),
+            calculateMemrefTotalSizeBytes(loc, srcMemrefType, rewriter)});
+
+    rewriter.replaceOp(copyOp, memCpyCall.getResults());
+
+    return success();
+  }
+};
+
 struct ConvertGlobal final : public OpConversionPattern<memref::GlobalOp> {
   using OpConversionPattern::OpConversionPattern;
 
@@ -320,6 +392,7 @@ void mlir::populateMemRefToEmitCTypeConversion(TypeConverter &typeConverter) {
 
 void mlir::populateMemRefToEmitCConversionPatterns(
     RewritePatternSet &patterns, const TypeConverter &converter) {
-  patterns.add<ConvertAlloca, ConvertAlloc, ConvertGlobal, ConvertGetGlobal,
-               ConvertLoad, ConvertStore>(converter, patterns.getContext());
+  patterns.add<ConvertAlloca, ConvertAlloc, ConvertCopy, ConvertGlobal,
+               ConvertGetGlobal, ConvertLoad, ConvertStore>(
+      converter, patterns.getContext());
 }
diff --git a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp
index e78dd76d6e256..8e965b42f1043 100644
--- a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp
+++ b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp
@@ -18,6 +18,7 @@
 #include "mlir/IR/Attributes.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace mlir {
 #define GEN_PASS_DEF_CONVERTMEMREFTOEMITC
@@ -27,6 +28,15 @@ namespace mlir {
 using namespace mlir;
 
 namespace {
+
+emitc::IncludeOp addHeader(OpBuilder &builder, ModuleOp module,
+                           StringRef headerName) {
+  StringAttr includeAttr = builder.getStringAttr(headerName);
+  return builder.create<emitc::IncludeOp>(
+      module.getLoc(), includeAttr,
+      /*is_standard_include=*/builder.getUnitAttr());
+}
+
 struct ConvertMemRefToEmitCPass
     : public impl::ConvertMemRefToEmitCBase<ConvertMemRefToEmitCPass> {
   using Base::Base;
@@ -57,7 +67,8 @@ struct ConvertMemRefToEmitCPass
     mlir::ModuleOp module = getOperation();
     module.walk([&](mlir::emitc::CallOpaqueOp callOp) {
       if (callOp.getCallee() != alignedAllocFunctionName &&
-          callOp.getCallee() != mallocFunctionName) {
+          callOp.getCallee() != mallocFunctionName &&
+          callOp.getCallee() != memcpyFunctionName) {
         return mlir::WalkResult::advance();
       }
 
@@ -76,12 +87,14 @@ struct ConvertMemRefToEmitCPass
       }
 
       mlir::OpBuilder builder(module.getBody(), module.getBody()->begin());
-      StringAttr includeAttr =
-          builder.getStringAttr(options.lowerToCpp ? cppStandardLibraryHeader
-                                                   : cStandardLibraryHeader);
-      builder.create<mlir::emitc::IncludeOp>(
-          module.getLoc(), includeAttr,
-          /*is_standard_include=*/builder.getUnitAttr());
+      StringRef headerName;
+      if (callOp.getCallee() == memcpyFunctionName)
+        headerName = stringLibraryHeader;
+      else
+        headerName = options.lowerToCpp ? cppStandardLibraryHeader
+                                        : cStandardLibraryHeader;
+
+      addHeader(builder, module, headerName);
       return mlir::WalkResult::interrupt();
     });
   }
diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-copy.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-copy.mlir
new file mode 100644
index 0000000000000..4b6eb50807513
--- /dev/null
+++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-copy.mlir
@@ -0,0 +1,24 @@
+// RUN: mlir-opt -convert-memref-to-emitc %s  | FileCheck %s
+
+func.func @copying(%arg0 : memref<2x4xf32>) {
+  memref.copy %arg0, %arg0 : memref<2x4xf32> to memref<2x4xf32>
+  return
+}
+
+// CHECK: module {
+// CHECK-NEXT:  emitc.include <"string.h">
+// CHECK-LABEL:  copying
+// CHECK-NEXT: %0 = builtin.unrealized_conversion_cast %arg0 : memref<2x4xf32> to !emitc.array<2x4xf32>
+// CHECK-NEXT: %1 = "emitc.constant"() <{value = 0 : index}> : () -> index
+// CHECK-NEXT: %2 = emitc.subscript %0[%1, %1] : (!emitc.array<2x4xf32>, index, index) -> !emitc.lvalue<f32>
+// CHECK-NEXT: %3 = emitc.apply "&"(%2) : (!emitc.lvalue<f32>) -> !emitc.ptr<f32>
+// CHECK-NEXT: %4 = emitc.subscript %0[%1, %1] : (!emitc.array<2x4xf32>, index, index) -> !emitc.lvalue<f32>
+// CHECK-NEXT: %5 = emitc.apply "&"(%4) : (!emitc.lvalue<f32>) -> !emitc.ptr<f32>
+// CHECK-NEXT: %6 = emitc.call_opaque "sizeof"() {args = [f32]} : () -> !emitc.size_t
+// CHECK-NEXT: %7 = "emitc.constant"() <{value = 8 : index}> : () -> index
+// CHECK-NEXT: %8 = emitc.mul %6, %7 : (!emitc.size_t, index) -> !emitc.size_t
+// CHECK-NEXT: emitc.call_opaque "memcpy"(%5, %3, %8) : (!emitc.ptr<f32>, !emitc.ptr<f32>, !emitc.size_t) -> ()
+// CHECK-NEXT:    return
+// CHECK-NEXT:  }
+// CHECK-NEXT:}
+
diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir
index fda01974d3fc8..b6eccfc8f0050 100644
--- a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir
+++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-failed.mlir
@@ -1,13 +1,5 @@
 // RUN: mlir-opt -convert-memref-to-emitc %s -split-input-file -verify-diagnostics
 
-func.func @memref_op(%arg0 : memref<2x4xf32>) {
-  // expected-error@+1 {{failed to legalize operation 'memref.copy'}}
-  memref.copy %arg0, %arg0 : memref<2x4xf32> to memref<2x4xf32>
-  return
-}
-
-// -----
-
 func.func @alloca_with_dynamic_shape() {
   %0 = index.constant 1
   // expected-error@+1 {{failed to legalize operation 'memref.alloca'}}

@Jaddyen Jaddyen requested review from ilovepi, jpienaar and mtrofin July 29, 2025 18:24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can take the more generic OpBuilder here instead of a Rewriter?

Comment on lines 197 to 201
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd split this to two checks to specify in the message which memref was incompatible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only works for 2D arrays

Comment on lines 216 to 220
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicates the work for srcArrayValue, can be folded into a lambda.

Comment on lines 192 to 195
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use cast (these arguments must be MemRefs by op definition).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function should be probably be named addStandardHeader as it always sets is_standard_include.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not follow the C/C++ distinction as with stdlib, i.e. cstring vs string.h?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should probably be a c++ test too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you only have a test for <string.h>, right? in the c++ mode, you'd have , so you probably need a test for the cpp mode too. Sorry for not being more explicit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

@Jaddyen Jaddyen requested a review from aniragil August 4, 2025 22:05
Comment on lines 209 to 212
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • You're already capturing rewriter and zeroIndex, no need to pass them as arguments (if you move zeroIndex creation above the lambda).
  • Usually better to capture exactly lambda uses than capture everything (&).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not take element type from arrayValue (as done for the rank)? Would save passing memrefType as arg.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use cast rather than dyn_cast when you expect cast to succeed (it will assert on failure).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewriter is a builder, you can just pass it as argument (and should, as you're working within a conversion pattern).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change itself is good (removing braces from single-line blocks) but should be done on a separate PR to avoid cluttering this one with unrelated modifications.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally, yes.
but im already modifying this portion of code and this would be a single line change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally, yes. but im already modifying this portion of code and this would be a single line change.

Practically too. It's not about the number of lines or their proximity to other changes. LLVM's contribution policy requires patches to be minimal. More specifically:

* not contain any unrelated changes
* be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.
Also, shouldn't the code here also check for c/cppStringLibraryHeader?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i should!
thanks for the pointer!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally, yes. but im already modifying this portion of code and this would be a single line change.

Practically too. It's not about the number of lines or their proximity to other changes. LLVM's contribution policy requires patches to be minimal. More specifically:

* not contain any unrelated changes
* be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure this works as expected: the same module may contain both malloc and memcpy calls. The code seems to settle for a single call that needs an include directive, and IINM not necessarily the right one (as isExpectedIncludeOp doesn't compare includeOp against the specific callOp.getCallee()). Am I missing something? (In any case please add such a test).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

Comment on lines 9 to 12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the lines are common between the two prefixes, and can/should stay CHECK lines, since those are always checked. The lines that differ can have their own prefixes. You can remove the -NEXT suffix as needed to make the test pass, but I'd only expect you to need to change at most one line. So from a quick skim it seems like you'll only need to change 3 lines from the old version to make the addition check work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, thanks for the feedback!

@Jaddyen Jaddyen requested review from aniragil and ilovepi August 7, 2025 04:27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// CPP-LABEL: copying
// CHECK-LABEL: copying

Wasn't this part of the output in both groups in the previous version?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was!
thanks for the feedback.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// NOCPP: module {
// CHECK: module {

This is common between both.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// NOCPP-NEXT: emitc.include <"string.h">
// NOCPP: emitc.include <"string.h">

Comment on lines 13 to 14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// CPP: module {
// CPP-NEXT: emitc.include <"cstring">
// CPP: emitc.include <"cstring">

Comment on lines 11 to 17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// NOCPP: module {
// NOCPP-NEXT: emitc.include <"string.h">
// NOCPP-NEXT: emitc.include <"stdlib.h">
// CPP: module {
// CPP-NEXT: emitc.include <"cstring">
// CPP-NEXT: emitc.include <"cstdlib">
// CHECK: module {
// NOCPP: emitc.include <"string.h">
// NOCPP-NEXT: emitc.include <"stdlib.h">
// CPP: emitc.include <"cstring">
// CPP-NEXT: emitc.include <"cstdlib">

Comment on lines 112 to 115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe?

Suggested change
int64_t numElements = 1;
for (int64_t dimSize : memrefType.getShape()) {
numElements *= dimSize;
}
int64_t numElements = std::accumulate(memrefType.getShape().begin(), memrefType.getShape().end(), int64_t{1}, std::multiplies<int64_t>());

Comment on lines 214 to 217
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use the SmallVector (size_t Size, const T &Value) constructor to avoid the loop and subsequent push_back()s. If nothing else, its a good practice to reserve(rank) elements before the loop starts, and avoid potential allocations.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Value calculateMemrefTotalSizeBytes(Location loc, MemRefType memrefType,
static Value calculateMemrefTotalSizeBytes(Location loc, MemRefType memrefType,

I don't think this gets used outside this TU, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Simpler to push_back directly on each case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modules may have multiple calls to malloc/memcpy. Use a set instead? (IINM this currently doesn't cause double header inclusion because of the repeated scan below, but worth adding such a test case).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This scans all module-level ops for each expected header. Can instead scan once in advance to find all available headers, then when scanning call_opaques can skip those whose requested header is already included.

@Jaddyen Jaddyen force-pushed the memref-ops-copyop branch from 726059d to ec252fc Compare August 8, 2025 00:16
@Jaddyen Jaddyen requested review from aniragil and ilovepi August 8, 2025 12:58
Copy link
Contributor

@ilovepi ilovepi left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Comment on lines 10 to 12
// NOCPP: emitc.include <"string.h">

// CPP: emitc.include <"cstring">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// NOCPP: emitc.include <"string.h">
// CPP: emitc.include <"cstring">
// NOCPP: emitc.include <"string.h">
// CPP: emitc.include <"cstring">

I think this a more readable formatting, since its makes it a bit obvious that these are variants of the same line. This is a rather minor nit though, so up to you.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've addressed in new changes.

return totalSizeBytes.getResult();
}

static emitc::ApplyOp
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm making this a function instead a lambda so that we can use it later when lowering other memref ops like extract_strided_metadata and reinterpret_cast that need a pointer to the first element of the array.

Copy link
Contributor

@aniragil aniragil left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more nit

Comment on lines 133 to 140
int64_t rank = arrayValue.getType().getRank();
llvm::SmallVector<mlir::Value> indices(rank, zeroIndex);

emitc::SubscriptOp subPtr =
builder.create<emitc::SubscriptOp>(loc, arrayValue, ValueRange(indices));
emitc::ApplyOp ptr = builder.create<emitc::ApplyOp>(
loc, emitc::PointerType::get(arrayValue.getType().getElementType()),
builder.getStringAttr("&"), subPtr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int64_t rank = arrayValue.getType().getRank();
llvm::SmallVector<mlir::Value> indices(rank, zeroIndex);
emitc::SubscriptOp subPtr =
builder.create<emitc::SubscriptOp>(loc, arrayValue, ValueRange(indices));
emitc::ApplyOp ptr = builder.create<emitc::ApplyOp>(
loc, emitc::PointerType::get(arrayValue.getType().getElementType()),
builder.getStringAttr("&"), subPtr);
ArrayType arrayType = arrayValue.getType();
llvm::SmallVector<mlir::Value> indices(arrayType.getRank(), zeroIndex);
emitc::SubscriptOp subPtr =
builder.create<emitc::SubscriptOp>(loc, arrayValue, ValueRange(indices));
emitc::ApplyOp ptr = builder.create<emitc::ApplyOp>(
loc, emitc::PointerType::get(arrayType.getElementType()),
builder.getStringAttr("&"), subPtr);

@Jaddyen Jaddyen requested a review from aniragil August 13, 2025 17:33
Copy link
Contributor

@aniragil aniragil left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

@Jaddyen Jaddyen merged commit bfda0e7 into llvm:main Aug 14, 2025
9 checks passed
@Jaddyen Jaddyen deleted the memref-ops-copyop branch September 2, 2025 20:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants