shader-slang
diff --git a/‎slangpy/tests/slangpy_tests/test_neural_bindless.py‎
Lines changed: 157 additions & 0 deletions b/‎slangpy/tests/slangpy_tests/test_neural_bindless.py‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎slangpy/tests/slangpy_tests/test_neural_bindless_descriptor_handle.slang‎
Lines changed: 29 additions & 0 deletions b/‎slangpy/tests/slangpy_tests/test_neural_bindless_descriptor_handle.slang‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎slangpy/tests/slangpy_tests/test_neural_bindless_pointer.slang‎
Lines changed: 13 additions & 0 deletions b/‎slangpy/tests/slangpy_tests/test_neural_bindless_pointer.slang‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎slangpy/tests/slangpy_tests/test_neural_bwd_diff_smoke.py‎
Lines changed: 68 additions & 0 deletions b/‎slangpy/tests/slangpy_tests/test_neural_bwd_diff_smoke.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎slangpy/tests/slangpy_tests/test_neural_frontend_training.py‎
Lines changed: 108 additions & 0 deletions b/‎slangpy/tests/slangpy_tests/test_neural_frontend_training.py‎
Lines changed: 108 additions & 0 deletions
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""
+Neural integration tests for bindless resource types.
+
+Reviewer-requested coverage:
+- Bindless "pointer type" (raw pointer parameters passed via Buffer.device_address)
+- Bindless DescriptorHandle resources (StructuredBuffer<T>.Handle / RWStructuredBuffer<T>.Handle)
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import slangpy as spy
+from slangpy.core.calldata import SLANG_PATH
+from slangpy.testing import helpers
+
+
+def _get_device_with_native_neural(device_type: spy.DeviceType) -> spy.Device:
+    if helpers.should_skip_test_for_device(device_type):
+        pytest.skip(f"Device type {device_type.name} not selected for this test run")
+
+    test_dir = Path(__file__).resolve().parent
+    compiler_options = spy.SlangCompilerOptions(
+        {
+            "include_paths": [test_dir, SLANG_PATH],
+            "debug_info": spy.SlangDebugInfoLevel.standard,
+            "enable_experimental_features": True,
+        }
+    )
+
+    return spy.Device(
+        type=device_type,
+        enable_debug_layers=True,
+        compiler_options=compiler_options,
+        label=f"uncached-slangpy-neural-bindless-{device_type.name}",
+    )
+
+
+# Pointer-style bindless params are supported on Vulkan. Keep this test on Vulkan only
+# to avoid backend-specific CUDA toolchain requirements for this integration test.
+POINTER_DEVICE_TYPES: list[spy.DeviceType] = [
+    x for x in helpers.DEFAULT_DEVICE_TYPES if x in [spy.DeviceType.vulkan]
+]
+
+
+@pytest.mark.parametrize("device_type", POINTER_DEVICE_TYPES)
+def test_neural_bindless_pointer_type(device_type: spy.DeviceType) -> None:
+    device = _get_device_with_native_neural(device_type)
+    try:
+        module = spy.Module(device.load_module("test_neural_bindless_pointer.slang"))
+
+        buf = device.create_buffer(
+            size=4,
+            usage=spy.BufferUsage.shader_resource,
+            data=np.array([42], dtype=np.int32),
+        )
+
+        res = int(module.read_int_ptr(buf.device_address))
+        assert res == 42
+    finally:
+        device.close()
+
+
+@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
+def test_neural_bindless_descriptor_handle_type(device_type: spy.DeviceType) -> None:
+    if device_type == spy.DeviceType.cuda:
+        pytest.skip("Bindless DescriptorHandle resources not supported with CUDA yet.")
+
+    device = _get_device_with_native_neural(device_type)
+    try:
+        if not device.has_feature(spy.Feature.bindless):
+            pytest.skip("Bindless not supported on this device.")
+
+        module = device.load_module("test_neural_bindless_descriptor_handle.slang")
+        program = device.link_program(
+            modules=[module], entry_points=[module.entry_point("compute_main")]
+        )
+        kernel = device.create_compute_kernel(program)
+
+        buffer_count = 6
+
+        ro_buffers: list[spy.Buffer] = []
+        rw_buffers: list[spy.Buffer] = []
+        for i in range(buffer_count):
+            ro_buffers.append(
+                device.create_buffer(
+                    size=4 * 4,
+                    usage=spy.BufferUsage.shader_resource,
+                    data=np.array([i * 10, i * 10 + 1, i * 10 + 2, i * 10 + 3], dtype=np.float32),
+                )
+            )
+            rw_buffers.append(
+                device.create_buffer(
+                    size=4 * 4,
+                    usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+                    data=np.zeros(4, dtype=np.float32),
+                )
+            )
+
+        buffer_info_layout = module.layout.get_type_layout(
+            module.layout.find_type_by_name("StructuredBuffer<BufferInfo>")
+        ).element_type_layout
+
+        buffer_infos_buffer = device.create_buffer(
+            size=buffer_count * buffer_info_layout.stride,
+            usage=spy.BufferUsage.shader_resource,
+        )
+        results_buffer = device.create_buffer(
+            size=buffer_count * 4,
+            usage=spy.BufferUsage.unordered_access,
+        )
+
+        c = spy.BufferCursor(buffer_info_layout, buffer_infos_buffer, load_before_write=False)
+        for i in range(buffer_count):
+            c[i].ro_buffer = ro_buffers[i].descriptor_handle_ro
+            c[i].rw_buffer = rw_buffers[i].descriptor_handle_rw
+            c[i].offset = i % 4
+        c.apply()
+
+        kernel.dispatch(
+            thread_count=[buffer_count, 1, 1],
+            buffer_infos=buffer_infos_buffer,
+            results=results_buffer,
+        )
+
+        results = results_buffer.to_numpy().view(np.float32)
+        expected_results = np.array(
+            [
+                0,  # buffer 0, offset 0
+                11,  # buffer 1, offset 1
+                22,  # buffer 2, offset 2
+                33,  # buffer 3, offset 3
+                40,  # buffer 4, offset 0
+                51,  # buffer 5, offset 1
+            ],
+            dtype=np.float32,
+        )
+        assert np.allclose(results, expected_results)
+
+        # Verify RW buffers were written.
+        for i in range(buffer_count):
+            rw_data = rw_buffers[i].to_numpy().view(np.float32)
+            offset = i % 4
+            expected_value = (i * 10 + offset) + 100.0
+            assert np.isclose(rw_data[offset], expected_value)
+    finally:
+        device.close()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
+
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Neural integration smoke test for bindless DescriptorHandle style resources.
+// We import `neural` to ensure experimental module compilation works alongside bindless.
+
+import neural;
+
+struct BufferInfo
+{
+    StructuredBuffer<float>.Handle ro_buffer;
+    RWStructuredBuffer<float>.Handle rw_buffer;
+    uint offset;
+};
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void compute_main(
+    uint3 tid : SV_DispatchThreadID,
+    StructuredBuffer<BufferInfo> buffer_infos,
+    RWStructuredBuffer<float> results)
+{
+    uint index = tid.x;
+    BufferInfo info = buffer_infos[index];
+
+    float value = info.ro_buffer[info.offset];
+    info.rw_buffer[info.offset] = value + 100.0;
+    results[index] = value;
+}
+
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Neural integration smoke test for "bindless pointer" style parameters.
+// This uses raw pointer parameters (passed from Python via Buffer.device_address).
+
+import slangpy;
+import neural;
+
+int read_int_ptr(int* ptr)
+{
+    return ptr[0];
+}
+
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+Neural smoke test that actually exercises Slang autodiff (`bwd_diff(...)`).
+
+Important constraints:
+- No dependency on sample apps under `samples/`.
+- No dependency on external assets (e.g. image files).
+
+This uses the test-local Slang module `fflayer-bug-repro.slang` which imports the
+experimental `neural` module and calls `bwd_diff(loss)(DifferentialPtrPair<Storage>(...), ...)`.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import slangpy as spy
+from slangpy.core.calldata import SLANG_PATH
+from slangpy.testing import helpers
+
+
+def _get_device_with_native_neural(device_type: spy.DeviceType) -> spy.Device:
+    if helpers.should_skip_test_for_device(device_type):
+        pytest.skip(f"Device type {device_type.name} not selected for this test run")
+
+    test_dir = Path(__file__).resolve().parent
+    compiler_options = spy.SlangCompilerOptions(
+        {
+            "include_paths": [test_dir, SLANG_PATH],
+            "debug_info": spy.SlangDebugInfoLevel.standard,
+            "enable_experimental_features": True,
+        }
+    )
+
+    return spy.Device(
+        type=device_type,
+        enable_debug_layers=True,
+        compiler_options=compiler_options,
+        label=f"uncached-slangpy-neural-bwd-diff-{device_type.name}",
+    )
+
+
+@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
+def test_neural_bwd_diff_writes_param_grads(device_type: spy.DeviceType) -> None:
+    device = _get_device_with_native_neural(device_type)
+    try:
+        module = spy.Module(device.load_module("fflayer-bug-repro.slang"))
+
+        # 2*2 weights + 2 biases = 6 floats (matches `fflayer-bug-repo.py`)
+        params = device.create_buffer(
+            data=np.ones((6,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+        dparams = device.create_buffer(
+            data=np.zeros((6,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+
+        module.calculate_grad(input=spy.float2(1, 1), params=params, dparams=dparams)
+
+        dparams_np = dparams.to_numpy().view(np.float32)
+        assert np.any(dparams_np != 0.0)
+    finally:
+        device.close()
+
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""
+SlangPy integration test for neural module FFLayer (Option 2 design).
+
+Tests training convergence for a simple quadratic regression task using:
+- FFLayer with storage passed as parameter to eval<S>()
+- Manual gradient computation (analytic gradients)
+- Simple SGD optimization
+
+We fit a quadratic polynomial y = 2*x^2 - 0.5*x + 0.25 and verify convergence.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import slangpy as spy
+from slangpy.core.calldata import SLANG_PATH
+from slangpy.testing import helpers
+
+
+def _get_device_with_native_neural(device_type: spy.DeviceType) -> spy.Device:
+    if helpers.should_skip_test_for_device(device_type):
+        pytest.skip(f"Device type {device_type.name} not selected for this test run")
+
+    test_dir = Path(__file__).resolve().parent
+
+    # Use pre-built neural module from slang (not compiled from source)
+    # The neural module is built as part of slang-neural-module target
+    # Enable experimental features since neural is an experimental module
+    compiler_options = spy.SlangCompilerOptions(
+        {
+            "include_paths": [test_dir, SLANG_PATH],
+            "debug_info": spy.SlangDebugInfoLevel.standard,
+            "enable_experimental_features": True,
+        }
+    )
+
+    return spy.Device(
+        type=device_type,
+        enable_debug_layers=True,
+        compiler_options=compiler_options,
+        label=f"uncached-slangpy-neural-frontend-{device_type.name}",
+    )
+
+
+@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
+def test_neural_frontend_training_converges(device_type: spy.DeviceType) -> None:
+    """
+    Test that training converges for a simple quadratic regression task.
+    
+    Uses FFLayer with Option 2 design (storage as parameter to eval<S>).
+    """
+    device = _get_device_with_native_neural(device_type)
+    try:
+        module = spy.Module(device.load_module("test_neural_frontend_training.slang"))
+
+        param_count = int(module.get_param_count())
+        assert param_count == 3
+
+        # Fit: y = 2*x^2 - 0.5*x + 0.25
+        sample_count = 256
+        xs = np.linspace(-1.0, 1.0, sample_count, dtype=np.float32)
+        ys = (2.0 * xs * xs - 0.5 * xs + 0.25).astype(np.float32)
+
+        xs_buf = device.create_buffer(data=xs, usage=spy.BufferUsage.shader_resource)
+        ys_buf = device.create_buffer(data=ys, usage=spy.BufferUsage.shader_resource)
+
+        rng = np.random.default_rng(0)
+        params_init = (0.01 * rng.standard_normal(size=(param_count,))).astype(np.float32)
+
+        params = device.create_buffer(
+            data=params_init,
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+        grads = device.create_buffer(
+            data=np.zeros((param_count,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+
+        initial_loss = float(module.eval_loss(params, xs_buf, ys_buf, sample_count))
+
+        learning_rate = 0.1
+        steps = 200
+        for _ in range(steps):
+            module.train_step(params, grads, xs_buf, ys_buf, sample_count, learning_rate)
+
+        final_loss = float(module.eval_loss(params, xs_buf, ys_buf, sample_count))
+
+        # Convergence: should significantly reduce MSE and reach a small absolute error.
+        assert final_loss < initial_loss * 1e-2
+        assert final_loss < 1e-3
+
+        # Parameter packing: [w0, w1, bias] for y = w0*x + w1*x^2 + bias
+        learned = params.to_numpy().view(np.float32)[:param_count]
+        expected = np.array([-0.5, 2.0, 0.25], dtype=np.float32)
+        assert np.allclose(learned, expected, rtol=0.1, atol=0.1)
+
+    finally:
+        device.close()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])