Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 15, 2025

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.

@llvmbot
Copy link
Member

llvmbot commented Aug 15, 2025

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-binary-utilities

Author: Matt Arsenault (arsenm)

Changes

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.


Full diff: https://github.com/llvm/llvm-project/pull/153801.diff

1 Files Affected:

  • (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27)
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
 
 /// Helper struct for the name hash table.
 struct LookupEntry {
-  StringRef FuncName;
   uint64_t Hash = 0;
   unsigned TableValue = 0;
 };
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair<int, int>
 computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap<uint64_t, int> Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-    for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+    for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes;
+         N <<= 1) {
       Table.clear();
 
       bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
 
 static std::vector<LookupEntry>
 constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
-                          ArrayRef<uint64_t> Hashes, int Size, int Collisions,
-                          StringToOffsetTable &OffsetTable) {
-  DenseSet<StringRef> Seen;
+                          ArrayRef<uint64_t> Hashes,
+                          ArrayRef<unsigned> TableValues, int Size,
+                          int Collisions, StringToOffsetTable &OffsetTable) {
   std::vector<LookupEntry> Lookup(Size * Collisions);
 
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-    StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-    // We do not want to add repeated entries for cases with the same name, only
-    // an entry for the first, with the name collision enum values immediately
-    // following.
-    if (!Seen.insert(ImplName).second)
-      continue;
-
-    uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
     uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
                    static_cast<uint64_t>(Collisions);
 
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
     for (int J = 0; J < Collisions; ++J) {
       LookupEntry &Entry = Lookup[Idx + J];
       if (Entry.TableValue == 0) {
-        Entry.FuncName = ImplName;
-        Entry.TableValue = LibCallImpl.getEnumVal();
+        Entry.TableValue = TableValue;
         Entry.Hash = HashValue;
         Found = true;
         break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
     }
 
     if (!Found)
-      reportFatalInternalError("failure to hash " + ImplName);
+      reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
     raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
+  std::vector<unsigned> TableValues(RuntimeLibcallImplDefList.size());
+  DenseSet<StringRef> SeenFuncNames;
 
   size_t MaxFuncNameSize = 0;
   size_t Index = 0;
+
   for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
     StringRef ImplName = LibCallImpl.getLibcallFuncName();
-    MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-    Hashes[Index++] = hash(ImplName);
+    if (SeenFuncNames.insert(ImplName).second) {
+      MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+      TableValues[Index] = LibCallImpl.getEnumVal();
+      Hashes[Index++] = hash(ImplName);
+    }
   }
 
+  // Trim excess elements from non-unique entries.
+  Hashes.resize(SeenFuncNames.size());
+  TableValues.resize(SeenFuncNames.size());
+
   LLVM_DEBUG({
     for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
       StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
         "#endif\n";
 
   auto [Size, Collisions] = computePerfectHashParameters(Hashes);
-  std::vector<LookupEntry> Lookup = constructPerfectHashTable(
-      RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable);
+  std::vector<LookupEntry> Lookup =
+      constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues,
+                                Size, Collisions, OffsetTable);
 
   LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = "
                     << Size << ", maximum collisions = " << Collisions << '\n');
@@ -464,10 +466,13 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
   OS << "  static constexpr uint16_t HashTableNameToEnum[" << Lookup.size()
      << "] = {\n";
 
-  for (auto [FuncName, Hash, TableVal] : Lookup) {
+  for (auto [Hash, TableVal] : Lookup) {
     OS << "    " << TableVal << ',';
-    if (TableVal != 0)
+    if (TableVal != 0) {
+      StringRef FuncName =
+          RuntimeLibcallImplDefList[TableVal].getLibcallFuncName();
       OS << " // " << format_hex(Hash, 16) << ", " << FuncName;
+    }
 
     OS << '\n';
   }

@llvmbot
Copy link
Member

llvmbot commented Aug 15, 2025

@llvm/pr-subscribers-tablegen

Author: Matt Arsenault (arsenm)

Changes

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.


Full diff: https://github.com/llvm/llvm-project/pull/153801.diff

1 Files Affected:

  • (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27)
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
 
 /// Helper struct for the name hash table.
 struct LookupEntry {
-  StringRef FuncName;
   uint64_t Hash = 0;
   unsigned TableValue = 0;
 };
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair<int, int>
 computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap<uint64_t, int> Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-    for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+    for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes;
+         N <<= 1) {
       Table.clear();
 
       bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
 
 static std::vector<LookupEntry>
 constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
-                          ArrayRef<uint64_t> Hashes, int Size, int Collisions,
-                          StringToOffsetTable &OffsetTable) {
-  DenseSet<StringRef> Seen;
+                          ArrayRef<uint64_t> Hashes,
+                          ArrayRef<unsigned> TableValues, int Size,
+                          int Collisions, StringToOffsetTable &OffsetTable) {
   std::vector<LookupEntry> Lookup(Size * Collisions);
 
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-    StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-    // We do not want to add repeated entries for cases with the same name, only
-    // an entry for the first, with the name collision enum values immediately
-    // following.
-    if (!Seen.insert(ImplName).second)
-      continue;
-
-    uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
     uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
                    static_cast<uint64_t>(Collisions);
 
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
     for (int J = 0; J < Collisions; ++J) {
       LookupEntry &Entry = Lookup[Idx + J];
       if (Entry.TableValue == 0) {
-        Entry.FuncName = ImplName;
-        Entry.TableValue = LibCallImpl.getEnumVal();
+        Entry.TableValue = TableValue;
         Entry.Hash = HashValue;
         Found = true;
         break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
     }
 
     if (!Found)
-      reportFatalInternalError("failure to hash " + ImplName);
+      reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
     raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
+  std::vector<unsigned> TableValues(RuntimeLibcallImplDefList.size());
+  DenseSet<StringRef> SeenFuncNames;
 
   size_t MaxFuncNameSize = 0;
   size_t Index = 0;
+
   for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
     StringRef ImplName = LibCallImpl.getLibcallFuncName();
-    MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-    Hashes[Index++] = hash(ImplName);
+    if (SeenFuncNames.insert(ImplName).second) {
+      MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+      TableValues[Index] = LibCallImpl.getEnumVal();
+      Hashes[Index++] = hash(ImplName);
+    }
   }
 
+  // Trim excess elements from non-unique entries.
+  Hashes.resize(SeenFuncNames.size());
+  TableValues.resize(SeenFuncNames.size());
+
   LLVM_DEBUG({
     for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
       StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
         "#endif\n";
 
   auto [Size, Collisions] = computePerfectHashParameters(Hashes);
-  std::vector<LookupEntry> Lookup = constructPerfectHashTable(
-      RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable);
+  std::vector<LookupEntry> Lookup =
+      constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues,
+                                Size, Collisions, OffsetTable);
 
   LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = "
                     << Size << ", maximum collisions = " << Collisions << '\n');
@@ -464,10 +466,13 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
   OS << "  static constexpr uint16_t HashTableNameToEnum[" << Lookup.size()
      << "] = {\n";
 
-  for (auto [FuncName, Hash, TableVal] : Lookup) {
+  for (auto [Hash, TableVal] : Lookup) {
     OS << "    " << TableVal << ',';
-    if (TableVal != 0)
+    if (TableVal != 0) {
+      StringRef FuncName =
+          RuntimeLibcallImplDefList[TableVal].getLibcallFuncName();
       OS << " // " << format_hex(Hash, 16) << ", " << FuncName;
+    }
 
     OS << '\n';
   }

@arsenm arsenm marked this pull request as ready for review August 15, 2025 12:59
@arsenm arsenm force-pushed the users/arsenm/runtime-libcalls/store-libcall-string-lengths-table branch from b6af40f to f0c374c Compare August 15, 2025 13:39
@arsenm arsenm force-pushed the users/arsenm/runtime-libcalls/shrink-libcall-impl-name-hash-table-unique-entries branch from f445a16 to 6e2b170 Compare August 15, 2025 13:39
Base automatically changed from users/arsenm/runtime-libcalls/store-libcall-string-lengths-table to main August 15, 2025 14:29
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
@arsenm arsenm force-pushed the users/arsenm/runtime-libcalls/shrink-libcall-impl-name-hash-table-unique-entries branch from 6e2b170 to aaec1db Compare August 21, 2025 00:12
@arsenm
Copy link
Contributor Author

arsenm commented Aug 22, 2025

ping

@nikic nikic requested a review from efriedma-quic August 23, 2025 14:41
Copy link
Collaborator

@efriedma-quic efriedma-quic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a power-of-two hashtable size means you're really relying on a strong hash function; allowing non-power-of-two functions gives a lot more chances to cover up any imperfection. We might want to consider adding a diagnostic if MaxCollisions gets excessively high.

Otherwise LGTM

@arsenm arsenm merged commit 41aba9e into main Aug 25, 2025
9 checks passed
@arsenm arsenm deleted the users/arsenm/runtime-libcalls/shrink-libcall-impl-name-hash-table-unique-entries branch August 25, 2025 11:56
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 25, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia-gcc7 running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/17418

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x57de44a04610 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x57de44a04610 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants