coop: fixes and changelog

kvark · kvark · commit 61aca28ecef9 · 2025-09-27T18:31:36.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -44,7 +44,7 @@ Bottom level categories:
 
 #### Deferred command buffer actions: `map_buffer_on_submit` and `on_submitted_work_done`
 
-You may schedule buffer mapping and a submission-complete callback to run automatically after you submit, directly from encoders, command buffers, and passes. 
+You may schedule buffer mapping and a submission-complete callback to run automatically after you submit, directly from encoders, command buffers, and passes.
 
 ```rust
 // Record some GPU work so the submission isn't empty and touches `buffer`.
@@ -150,7 +150,7 @@ By @cwfitzgerald in [#8163](https://github.com/gfx-rs/wgpu/pull/8163).
 
 #### Multi-draw indirect is now unconditionally supported when indirect draws are supported
 
-We have removed `Features::MULTI_DRAW_INDIRECT` as it was unconditionally available on all platforms. 
+We have removed `Features::MULTI_DRAW_INDIRECT` as it was unconditionally available on all platforms.
 `RenderPass::multi_draw_indirect` is now available if the device supports downlevel flag `DownlevelFlags::INDIRECT_EXECUTION`.
 
 If you are using spirv-passthrough with multi-draw indirect and `gl_DrawID`, you can know if `MULTI_DRAW_INDIRECT` is being emulated
@@ -166,6 +166,8 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
 
 - Added support for external textures based on WebGPU's [`GPUExternalTexture`](https://www.w3.org/TR/webgpu/#gpuexternaltexture). These allow shaders to transparently operate on potentially multiplanar source texture data in either RGB or YCbCr formats via WGSL's `texture_external` type. This is gated behind the `Features::EXTERNAL_TEXTURE` feature, which is currently only supported on DX12. By @jamienicol in [#4386](https://github.com/gfx-rs/wgpu/issues/4386).
 
+- Added support for cooperative load/store operations in shaders. Currently only WGSL on the input and SPIR-V with METAL on the output are supported. By @kvark in [#8251](https://github.com/gfx-rs/wgpu/issues/8251).
+
 ### Changes
 
 #### General
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
@@ -3691,6 +3691,18 @@ impl BlockContext<'_> {
                     stride,
                     row_major,
                 } => {
+                    let pointer_id = match self.write_access_chain(
+                        pointer,
+                        &mut block,
+                        AccessTypeAdjustment::None,
+                    )? {
+                        ExpressionPointer::Ready { pointer_id } => pointer_id,
+                        ExpressionPointer::Conditional { .. } => {
+                            return Err(Error::FeatureNotImplemented(
+                                "Copperative load/store out-of-bounds handling",
+                            ));
+                        }
+                    };
                     let layout = if row_major {
                         spirv::CooperativeMatrixLayout::RowMajorKHR
                     } else {
@@ -3701,7 +3713,7 @@ impl BlockContext<'_> {
                     if store {
                         block.body.push(Instruction::coop_store(
                             self.cached[target],
-                            self.cached[pointer],
+                            pointer_id,
                             layout_id,
                             stride_id,
                         ));
@@ -3711,7 +3723,7 @@ impl BlockContext<'_> {
                         block.body.push(Instruction::coop_load(
                             result_type_id,
                             id,
-                            self.cached[pointer],
+                            pointer_id,
                             layout_id,
                             stride_id,
                         ));
diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs
@@ -1641,32 +1641,20 @@ impl super::Validator {
                             }
                         };
 
-                    let ty_inner =
-                        context.resolve_type_inner(pointer, &self.valid_expression_set)?;
+                    let ty_inner = context.resolve_pointer_type(pointer);
                     //TODO: validate stride
-                    let (pty_array, space) = match *ty_inner {
+                    let (pty_scalar, space) = match *ty_inner {
                         crate::TypeInner::Pointer { base, space } => (base, space),
                         _ => {
                             return Err(FunctionError::InvalidCooperativeDataPointer(pointer)
-                                .with_span_handle(pointer, context.expressions))
-                        }
-                    };
-                    let pty_scalar = match context.types[pty_array].inner {
-                        crate::TypeInner::Array {
-                            base,
-                            size: _,
-                            stride: _,
-                        } => base,
-                        _ => {
-                            return Err(FunctionError::InvalidCooperativeDataPointer(pointer)
-                                .with_span_handle(pointer, context.expressions))
+                                .with_span_handle(pointer, context.expressions));
                         }
                     };
                     let space = match context.types[pty_scalar].inner {
                         crate::TypeInner::Scalar(s) if s == target_scalar => space,
                         _ => {
                             return Err(FunctionError::InvalidCooperativeDataPointer(pointer)
-                                .with_span_handle(pointer, context.expressions))
+                                .with_span_handle(pointer, context.expressions));
                         }
                     };
 
diff --git a/naga/tests/in/wgsl/cooperative-matrix.wgsl b/naga/tests/in/wgsl/cooperative-matrix.wgsl
@@ -6,7 +6,7 @@ var<storage, read_write> ext: array<f32>;
 @compute @workgroup_size(8, 8, 1)
 fn main() {
     var c = coop_mat8x8<f32, C>();
-    coopLoad(c, &ext);
+    coopLoad(c, &ext[4]);
     var d = coopMultiplyAdd(a, b, c);
-    coopStore(c, &ext);
+    coopStore(c, &ext[0]);
 }
diff --git a/naga/tests/out/ir/wgsl-cooperative-matrix.compact.ron b/naga/tests/out/ir/wgsl-cooperative-matrix.compact.ron
@@ -119,40 +119,56 @@
                     ZeroValue(4),
                     LocalVariable(0),
                     GlobalVariable(2),
+                    AccessIndex(
+                        base: 2,
+                        index: 4,
+                    ),
                     GlobalVariable(0),
                     GlobalVariable(1),
                     CooperativeMultiplyAdd(
-                        a: 3,
-                        b: 4,
+                        a: 4,
+                        b: 5,
                         c: 1,
                     ),
                     LocalVariable(1),
                     GlobalVariable(2),
+                    AccessIndex(
+                        base: 8,
+                        index: 0,
+                    ),
                 ],
                 named_expressions: {},
                 body: [
                     CooperativeLoadStore(
                         store: false,
                         target: 1,
-                        pointer: 2,
+                        pointer: 3,
                         stride: None,
                         row_major: false,
                     ),
                     Emit((
-                        start: 5,
-                        end: 6,
+                        start: 3,
+                        end: 4,
+                    )),
+                    Emit((
+                        start: 6,
+                        end: 7,
                     )),
                     Store(
-                        pointer: 6,
-                        value: 5,
+                        pointer: 7,
+                        value: 6,
                     ),
                     CooperativeLoadStore(
                         store: true,
                         target: 1,
-                        pointer: 7,
+                        pointer: 9,
                         stride: None,
                         row_major: false,
                     ),
+                    Emit((
+                        start: 9,
+                        end: 10,
+                    )),
                     Return(
                         value: None,
                     ),
diff --git a/naga/tests/out/ir/wgsl-cooperative-matrix.ron b/naga/tests/out/ir/wgsl-cooperative-matrix.ron
@@ -119,40 +119,56 @@
                     ZeroValue(4),
                     LocalVariable(0),
                     GlobalVariable(2),
+                    AccessIndex(
+                        base: 2,
+                        index: 4,
+                    ),
                     GlobalVariable(0),
                     GlobalVariable(1),
                     CooperativeMultiplyAdd(
-                        a: 3,
-                        b: 4,
+                        a: 4,
+                        b: 5,
                         c: 1,
                     ),
                     LocalVariable(1),
                     GlobalVariable(2),
+                    AccessIndex(
+                        base: 8,
+                        index: 0,
+                    ),
                 ],
                 named_expressions: {},
                 body: [
                     CooperativeLoadStore(
                         store: false,
                         target: 1,
-                        pointer: 2,
+                        pointer: 3,
                         stride: None,
                         row_major: false,
                     ),
                     Emit((
-                        start: 5,
-                        end: 6,
+                        start: 3,
+                        end: 4,
+                    )),
+                    Emit((
+                        start: 6,
+                        end: 7,
                     )),
                     Store(
-                        pointer: 6,
-                        value: 5,
+                        pointer: 7,
+                        value: 6,
                     ),
                     CooperativeLoadStore(
                         store: true,
                         target: 1,
-                        pointer: 7,
+                        pointer: 9,
                         stride: None,
                         row_major: false,
                     ),
+                    Emit((
+                        start: 9,
+                        end: 10,
+                    )),
                     Return(
                         value: None,
                     ),
diff --git a/naga/tests/out/msl/wgsl-cooperative-matrix.msl b/naga/tests/out/msl/wgsl-cooperative-matrix.msl
@@ -0,0 +1,31 @@
+// language: metal1.0
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct _mslBufferSizes {
+    uint size2;
+};
+
+typedef float type_3[1];
+metal::simdgroup_float8x8 NagaCooperativeMultiplyAdd(const metal::simdgroup_float8x8& a, const metal::simdgroup_float8x8& b, const metal::simdgroup_float8x8& c) {
+    metal::simdgroup_float8x8 d;
+    metal::simdgroup_multiply_accumulate(d,a,b,c);
+    return d;
+}
+
+
+kernel void main_(
+  device type_3 const& ext [[user(fake0)]]
+, constant _mslBufferSizes& _buffer_sizes [[user(fake0)]]
+) {
+    metal::simdgroup_float8x8 a = {};
+    metal::simdgroup_float8x8 b = {};
+    metal::simdgroup_float8x8 c = metal::simdgroup_float8x8 {};
+    metal::simdgroup_float8x8 d = {};
+    metal::simdgroup_load(c, ext[4]);
+    d = NagaCooperativeMultiplyAdd(a, b, c);
+    metal::simdgroup_store(c, ext[0]);
+    return;
+}
diff --git a/naga/tests/out/spv/wgsl-cooperative-matrix.spvasm b/naga/tests/out/spv/wgsl-cooperative-matrix.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.4
 ; Generator: rspirv
-; Bound: 37
+; Bound: 41
 OpCapability Shader
 OpCapability CooperativeMatrixKHR
 OpCapability VulkanMemoryModel
@@ -20,9 +20,9 @@ var<storage, read_write> ext: array<f32>;
 @compute @workgroup_size(8, 8, 1)
 fn main() {
     var c = coop_mat8x8<f32, C>();
-    coopLoad(c, &ext);
+    coopLoad(c, &ext[4]);
     var d = coopMultiplyAdd(a, b, c);
-    coopStore(c, &ext);
+    coopStore(c, &ext[0]);
 }
 "
 OpName %15 "a"
@@ -62,6 +62,8 @@ OpMemberDecorate %22 0 Offset 0
 %29 = OpConstantNull  %13
 %31 = OpTypePointer Function %13
 %33 = OpConstantNull  %13
+%35 = OpTypePointer StorageBuffer %4
+%36 = OpConstant  %7  4
 %25 = OpFunction  %2  None %26
 %24 = OpLabel
 %30 = OpVariable  %31  Function %29
@@ -70,13 +72,17 @@ OpMemberDecorate %22 0 Offset 0
 OpBranch %34
 %34 = OpLabel
 OpLine %3 9 5
-%35 = OpCooperativeMatrixLoadKHR  %13  %28 %11
-OpStore %30 %35
+%37 = OpAccessChain  %35  %28 %36
+%38 = OpCooperativeMatrixLoadKHR  %13  %37 %11
+OpStore %30 %38
+OpLine %3 9 18
 OpLine %3 10 13
-%36 = OpCooperativeMatrixMulAddKHR  %13  %15 %18 %30
+%39 = OpCooperativeMatrixMulAddKHR  %13  %15 %18 %30
 OpLine %3 10 5
-OpStore %32 %36
+OpStore %32 %39
 OpLine %3 11 5
-OpCooperativeMatrixStoreKHR %28 %30 %11
+%40 = OpAccessChain  %35  %28 %9
+OpCooperativeMatrixStoreKHR %40 %30 %11
+OpLine %3 11 19
 OpReturn
 OpFunctionEnd