[msl] Fix PackedVec3 for atomic builtins

jrprice · Dawn LUCI CQ · commit 1b75d8b318e1 · 2024-09-13T15:19:04.000Z
Only unwrap pointers before load instructions, instead of before calling `UpdateUsage()`, so that the `packed == unpacked` check at the start of `UpdateUsage()` can correctly determine when the target type is not in fact packed. Fixed: 366314931 Change-Id: Ie8e3c0bf87a0afd0d13199227fc9687e57fe0809 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/206395 Auto-Submit: James Price <jrprice@google.com> Reviewed-by: Antonio Maiorano <amaiorano@google.com> Commit-Queue: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/tint/lang/msl/writer/raise/packed_vec3.cc b/src/tint/lang/msl/writer/raise/packed_vec3.cc
@@ -291,12 +291,12 @@ struct State {
                 auto* packed_result_type = RewriteType(unpacked_result_type);
                 let->Result(0)->SetType(packed_result_type);
                 let->Result(0)->ForEachUseSorted([&](core::ir::Usage let_use) {  //
-                    UpdateUsage(let_use, unpacked_result_type->UnwrapPtr(), packed_result_type);
+                    UpdateUsage(let_use, unpacked_result_type, packed_result_type);
                 });
             },
             [&](core::ir::Load* load) {
                 b.InsertAfter(load, [&] {
-                    auto* result = LoadPackedToUnpacked(unpacked_type, load->From());
+                    auto* result = LoadPackedToUnpacked(unpacked_type->UnwrapPtr(), load->From());
                     load->Result(0)->ReplaceAllUsesWith(result);
                 });
                 load->Destroy();
@@ -327,7 +327,7 @@ struct State {
         // Rebuild the indices of the access instruction.
         // Walk through the intermediate types that the access chain will be traversing, and
         // check for packed vectors that would be wrapped in structures.
-        auto* obj_type = unpacked_type;
+        auto* obj_type = unpacked_type->UnwrapPtr();
         Vector<core::ir::Value*, 4> operands;
         operands.Push(access->Object());
         for (auto* idx : access->Indices()) {
@@ -354,7 +354,7 @@ struct State {
         access->SetOperands(std::move(operands));
         access->Result(0)->SetType(packed_result_type);
         access->Result(0)->ForEachUseSorted([&](core::ir::Usage access_use) {  //
-            UpdateUsage(access_use, unpacked_result_type->UnwrapPtr(), packed_result_type);
+            UpdateUsage(access_use, unpacked_result_type, packed_result_type);
         });
     }
 
diff --git a/src/tint/lang/msl/writer/raise/packed_vec3_test.cc b/src/tint/lang/msl/writer/raise/packed_vec3_test.cc
@@ -3692,5 +3692,137 @@ S_packed_vec3 = struct @align(16) {
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(MslWriter_PackedVec3Test, AtomicOnPackedStructMember) {
+    auto* s = ty.Struct(mod.symbols.New("S"), {
+                                                  {mod.symbols.Register("vec"), ty.vec3<u32>()},
+                                                  {mod.symbols.Register("u"), ty.atomic<u32>()},
+                                              });
+
+    auto* var = b.Var("v", ty.ptr<workgroup>(s));
+    mod.root_block->Append(var);
+
+    auto* func = b.Function("foo", ty.u32());
+    b.Append(func->Block(), [&] {  //
+        auto* p = b.Access<ptr<workgroup, atomic<u32>>>(var, 1_u);
+        auto* result = b.Call<u32>(core::BuiltinFn::kAtomicLoad, p);
+        b.Return(func, result);
+    });
+
+    auto* src = R"(
+S = struct @align(16) {
+  vec:vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+$B1: {  # root
+  %v:ptr<workgroup, S, read_write> = var
+}
+
+%foo = func():u32 {
+  $B2: {
+    %3:ptr<workgroup, atomic<u32>, read_write> = access %v, 1u
+    %4:u32 = atomicLoad %3
+    ret %4
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+S = struct @align(16) {
+  vec:vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+S_packed_vec3 = struct @align(16) {
+  vec:__packed_vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+$B1: {  # root
+  %v:ptr<workgroup, S_packed_vec3, read_write> = var
+}
+
+%foo = func():u32 {
+  $B2: {
+    %3:ptr<workgroup, atomic<u32>, read_write> = access %v, 1u
+    %4:u32 = atomicLoad %3
+    ret %4
+  }
+}
+)";
+
+    Run(PackedVec3);
+
+    EXPECT_EQ(expect, str());
+}
+
+TEST_F(MslWriter_PackedVec3Test, AtomicOnPackedStructMember_ViaLet) {
+    auto* s = ty.Struct(mod.symbols.New("S"), {
+                                                  {mod.symbols.Register("vec"), ty.vec3<u32>()},
+                                                  {mod.symbols.Register("u"), ty.atomic<u32>()},
+                                              });
+
+    auto* var = b.Var("v", ty.ptr<workgroup>(s));
+    mod.root_block->Append(var);
+
+    auto* func = b.Function("foo", ty.u32());
+    b.Append(func->Block(), [&] {  //
+        auto* p = b.Let("p", b.Access<ptr<workgroup, atomic<u32>>>(var, 1_u));
+        auto* result = b.Call<u32>(core::BuiltinFn::kAtomicLoad, p);
+        b.Return(func, result);
+    });
+
+    auto* src = R"(
+S = struct @align(16) {
+  vec:vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+$B1: {  # root
+  %v:ptr<workgroup, S, read_write> = var
+}
+
+%foo = func():u32 {
+  $B2: {
+    %3:ptr<workgroup, atomic<u32>, read_write> = access %v, 1u
+    %p:ptr<workgroup, atomic<u32>, read_write> = let %3
+    %5:u32 = atomicLoad %p
+    ret %5
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+S = struct @align(16) {
+  vec:vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+S_packed_vec3 = struct @align(16) {
+  vec:__packed_vec3<u32> @offset(0)
+  u:atomic<u32> @offset(12)
+}
+
+$B1: {  # root
+  %v:ptr<workgroup, S_packed_vec3, read_write> = var
+}
+
+%foo = func():u32 {
+  $B2: {
+    %3:ptr<workgroup, atomic<u32>, read_write> = access %v, 1u
+    %p:ptr<workgroup, atomic<u32>, read_write> = let %3
+    %5:u32 = atomicLoad %p
+    ret %5
+  }
+}
+)";
+
+    Run(PackedVec3);
+
+    EXPECT_EQ(expect, str());
+}
+
 }  // namespace
 }  // namespace tint::msl::writer::raise
diff --git a/test/tint/bug/tint/366314931.wgsl b/test/tint/bug/tint/366314931.wgsl
@@ -0,0 +1,15 @@
+struct S {
+  v : vec3u,
+  u : atomic<u32>,
+}
+
+var<workgroup> wgvar: S;
+
+@group(0) @binding(0)
+var<storage, read_write> output: S;
+
+@compute @workgroup_size(1,1,1)
+fn main() {
+  let x = atomicLoad(&wgvar.u);
+  atomicStore(&output.u, x);
+}
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.dxc.hlsl b/test/tint/bug/tint/366314931.wgsl.expected.dxc.hlsl
@@ -0,0 +1,41 @@
+struct S {
+  uint3 v;
+  uint u;
+};
+
+groupshared S wgvar;
+
+void tint_zero_workgroup_memory(uint local_idx) {
+  if ((local_idx < 1u)) {
+    wgvar.v = (0u).xxx;
+    uint atomic_result = 0u;
+    InterlockedExchange(wgvar.u, 0u, atomic_result);
+  }
+  GroupMemoryBarrierWithGroupSync();
+}
+
+RWByteAddressBuffer output : register(u0);
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+void outputatomicStore(uint offset, uint value) {
+  uint ignored;
+  output.InterlockedExchange(offset, value, ignored);
+}
+
+
+void main_inner(uint local_invocation_index) {
+  tint_zero_workgroup_memory(local_invocation_index);
+  uint atomic_result_1 = 0u;
+  InterlockedOr(wgvar.u, 0, atomic_result_1);
+  uint x = atomic_result_1;
+  outputatomicStore(12u, x);
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.fxc.hlsl b/test/tint/bug/tint/366314931.wgsl.expected.fxc.hlsl
@@ -0,0 +1,41 @@
+struct S {
+  uint3 v;
+  uint u;
+};
+
+groupshared S wgvar;
+
+void tint_zero_workgroup_memory(uint local_idx) {
+  if ((local_idx < 1u)) {
+    wgvar.v = (0u).xxx;
+    uint atomic_result = 0u;
+    InterlockedExchange(wgvar.u, 0u, atomic_result);
+  }
+  GroupMemoryBarrierWithGroupSync();
+}
+
+RWByteAddressBuffer output : register(u0);
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+void outputatomicStore(uint offset, uint value) {
+  uint ignored;
+  output.InterlockedExchange(offset, value, ignored);
+}
+
+
+void main_inner(uint local_invocation_index) {
+  tint_zero_workgroup_memory(local_invocation_index);
+  uint atomic_result_1 = 0u;
+  InterlockedOr(wgvar.u, 0, atomic_result_1);
+  uint x = atomic_result_1;
+  outputatomicStore(12u, x);
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.local_invocation_index);
+  return;
+}
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.glsl b/test/tint/bug/tint/366314931.wgsl.expected.glsl
@@ -0,0 +1,31 @@
+#version 310 es
+
+struct S {
+  uvec3 v;
+  uint u;
+};
+
+shared S wgvar;
+void tint_zero_workgroup_memory(uint local_idx) {
+  if ((local_idx < 1u)) {
+    wgvar.v = uvec3(0u);
+    atomicExchange(wgvar.u, 0u);
+  }
+  barrier();
+}
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  S inner;
+} tint_symbol;
+
+void tint_symbol_1(uint local_invocation_index) {
+  tint_zero_workgroup_memory(local_invocation_index);
+  uint x = atomicOr(wgvar.u, 0u);
+  atomicExchange(tint_symbol.inner.u, x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_1(gl_LocalInvocationIndex);
+  return;
+}
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.ir.dxc.hlsl b/test/tint/bug/tint/366314931.wgsl.expected.ir.dxc.hlsl
@@ -0,0 +1,31 @@
+struct S {
+  uint3 v;
+  uint u;
+};
+
+struct main_inputs {
+  uint tint_local_index : SV_GroupIndex;
+};
+
+
+groupshared S wgvar;
+RWByteAddressBuffer output : register(u0);
+void main_inner(uint tint_local_index) {
+  if ((tint_local_index == 0u)) {
+    wgvar.v = (0u).xxx;
+    uint v_1 = 0u;
+    InterlockedExchange(wgvar.u, 0u, v_1);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  uint v_2 = 0u;
+  InterlockedOr(wgvar.u, 0u, v_2);
+  uint x = v_2;
+  uint v_3 = 0u;
+  output.InterlockedExchange(uint(12u), x, v_3);
+}
+
+[numthreads(1, 1, 1)]
+void main(main_inputs inputs) {
+  main_inner(inputs.tint_local_index);
+}
+
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.ir.fxc.hlsl b/test/tint/bug/tint/366314931.wgsl.expected.ir.fxc.hlsl
@@ -0,0 +1,31 @@
+struct S {
+  uint3 v;
+  uint u;
+};
+
+struct main_inputs {
+  uint tint_local_index : SV_GroupIndex;
+};
+
+
+groupshared S wgvar;
+RWByteAddressBuffer output : register(u0);
+void main_inner(uint tint_local_index) {
+  if ((tint_local_index == 0u)) {
+    wgvar.v = (0u).xxx;
+    uint v_1 = 0u;
+    InterlockedExchange(wgvar.u, 0u, v_1);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  uint v_2 = 0u;
+  InterlockedOr(wgvar.u, 0u, v_2);
+  uint x = v_2;
+  uint v_3 = 0u;
+  output.InterlockedExchange(uint(12u), x, v_3);
+}
+
+[numthreads(1, 1, 1)]
+void main(main_inputs inputs) {
+  main_inner(inputs.tint_local_index);
+}
+
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.ir.glsl b/test/tint/bug/tint/366314931.wgsl.expected.ir.glsl
@@ -0,0 +1,11 @@
+SKIP: FAILED
+
+../../src/tint/lang/glsl/writer/printer/printer.cc:1394 internal compiler error: TINT_UNREACHABLE unhandled core builtin: atomicStore
+********************************************************************
+*  The tint shader compiler has encountered an unexpected error.   *
+*                                                                  *
+*  Please help us fix this issue by submitting a bug report at     *
+*  crbug.com/tint with the source program that triggered the bug.  *
+********************************************************************
+
+tint executable returned error: signal: trace/BPT trap
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.ir.msl b/test/tint/bug/tint/366314931.wgsl.expected.ir.msl
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.msl b/test/tint/bug/tint/366314931.wgsl.expected.msl
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.spvasm b/test/tint/bug/tint/366314931.wgsl.expected.spvasm
diff --git a/test/tint/bug/tint/366314931.wgsl.expected.wgsl b/test/tint/bug/tint/366314931.wgsl.expected.wgsl