add slangPy neural slang integarion tests

NV-xiaoyongs · NV-xiaoyongs · commit 14bc228d59c9 · 2026-01-23T10:53:06.000-08:00
diff --git a/slangpy/tests/slangpy_tests/test_neural_bwd_diff_smoke.py b/slangpy/tests/slangpy_tests/test_neural_bwd_diff_smoke.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""
+Neural smoke test that actually exercises Slang autodiff (`bwd_diff(...)`).
+
+Important constraints:
+- No dependency on sample apps under `samples/`.
+- No dependency on external assets (e.g. image files).
+
+This uses the test-local Slang module `fflayer-bug-repro.slang` which imports the
+experimental `neural` module and calls `bwd_diff(loss)(DifferentialPtrPair<Storage>(...), ...)`.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import slangpy as spy
+from slangpy.core.calldata import SLANG_PATH
+from slangpy.testing import helpers
+
+
+def _get_device_with_native_neural(device_type: spy.DeviceType) -> spy.Device:
+    if helpers.should_skip_test_for_device(device_type):
+        pytest.skip(f"Device type {device_type.name} not selected for this test run")
+
+    test_dir = Path(__file__).resolve().parent
+    compiler_options = spy.SlangCompilerOptions(
+        {
+            "include_paths": [test_dir, SLANG_PATH],
+            "debug_info": spy.SlangDebugInfoLevel.standard,
+            "enable_experimental_features": True,
+        }
+    )
+
+    return spy.Device(
+        type=device_type,
+        enable_debug_layers=True,
+        compiler_options=compiler_options,
+        label=f"uncached-slangpy-neural-bwd-diff-{device_type.name}",
+    )
+
+
+@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
+def test_neural_bwd_diff_writes_param_grads(device_type: spy.DeviceType) -> None:
+    device = _get_device_with_native_neural(device_type)
+    try:
+        module = spy.Module(device.load_module("fflayer-bug-repro.slang"))
+
+        # 2*2 weights + 2 biases = 6 floats (matches `fflayer-bug-repo.py`)
+        params = device.create_buffer(
+            data=np.ones((6,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+        dparams = device.create_buffer(
+            data=np.zeros((6,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+
+        module.calculate_grad(input=spy.float2(1, 1), params=params, dparams=dparams)
+
+        dparams_np = dparams.to_numpy().view(np.float32)
+        assert np.any(dparams_np != 0.0)
+    finally:
+        device.close()
+
diff --git a/slangpy/tests/slangpy_tests/test_neural_frontend_training.py b/slangpy/tests/slangpy_tests/test_neural_frontend_training.py
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""
+SlangPy integration test for neural module FFLayer (Option 2 design).
+
+Tests training convergence for a simple quadratic regression task using:
+- FFLayer with storage passed as parameter to eval<S>()
+- Manual gradient computation (analytic gradients)
+- Simple SGD optimization
+
+We fit a quadratic polynomial y = 2*x^2 - 0.5*x + 0.25 and verify convergence.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import slangpy as spy
+from slangpy.core.calldata import SLANG_PATH
+from slangpy.testing import helpers
+
+
+def _get_device_with_native_neural(device_type: spy.DeviceType) -> spy.Device:
+    if helpers.should_skip_test_for_device(device_type):
+        pytest.skip(f"Device type {device_type.name} not selected for this test run")
+
+    test_dir = Path(__file__).resolve().parent
+
+    # Use pre-built neural module from slang (not compiled from source)
+    # The neural module is built as part of slang-neural-module target
+    # Enable experimental features since neural is an experimental module
+    compiler_options = spy.SlangCompilerOptions(
+        {
+            "include_paths": [test_dir, SLANG_PATH],
+            "debug_info": spy.SlangDebugInfoLevel.standard,
+            "enable_experimental_features": True,
+        }
+    )
+
+    return spy.Device(
+        type=device_type,
+        enable_debug_layers=True,
+        compiler_options=compiler_options,
+        label=f"uncached-slangpy-neural-frontend-{device_type.name}",
+    )
+
+
+@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
+def test_neural_frontend_training_converges(device_type: spy.DeviceType) -> None:
+    """
+    Test that training converges for a simple quadratic regression task.
+    
+    Uses FFLayer with Option 2 design (storage as parameter to eval<S>).
+    """
+    device = _get_device_with_native_neural(device_type)
+    try:
+        module = spy.Module(device.load_module("test_neural_frontend_training.slang"))
+
+        param_count = int(module.get_param_count())
+        assert param_count == 3
+
+        # Fit: y = 2*x^2 - 0.5*x + 0.25
+        sample_count = 256
+        xs = np.linspace(-1.0, 1.0, sample_count, dtype=np.float32)
+        ys = (2.0 * xs * xs - 0.5 * xs + 0.25).astype(np.float32)
+
+        xs_buf = device.create_buffer(data=xs, usage=spy.BufferUsage.shader_resource)
+        ys_buf = device.create_buffer(data=ys, usage=spy.BufferUsage.shader_resource)
+
+        rng = np.random.default_rng(0)
+        params_init = (0.01 * rng.standard_normal(size=(param_count,))).astype(np.float32)
+
+        params = device.create_buffer(
+            data=params_init,
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+        grads = device.create_buffer(
+            data=np.zeros((param_count,), dtype=np.float32),
+            usage=spy.BufferUsage.shader_resource | spy.BufferUsage.unordered_access,
+        )
+
+        initial_loss = float(module.eval_loss(params, xs_buf, ys_buf, sample_count))
+
+        learning_rate = 0.1
+        steps = 200
+        for _ in range(steps):
+            module.train_step(params, grads, xs_buf, ys_buf, sample_count, learning_rate)
+
+        final_loss = float(module.eval_loss(params, xs_buf, ys_buf, sample_count))
+
+        # Convergence: should significantly reduce MSE and reach a small absolute error.
+        assert final_loss < initial_loss * 1e-2
+        assert final_loss < 1e-3
+
+        # Parameter packing: [w0, w1, bias] for y = w0*x + w1*x^2 + bias
+        learned = params.to_numpy().view(np.float32)[:param_count]
+        expected = np.array([-0.5, 2.0, 0.25], dtype=np.float32)
+        assert np.allclose(learned, expected, rtol=0.1, atol=0.1)
+
+    finally:
+        device.close()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
diff --git a/slangpy/tests/slangpy_tests/test_neural_frontend_training.slang b/slangpy/tests/slangpy_tests/test_neural_frontend_training.slang
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// SlangPy test for FFLayer autodiff backward pass (Option 2 design).
+// Verifies that autodiff correctly computes gradients through eval<S>().
+//
+// We fit a quadratic polynomial y = 2*x^2 - 0.5*x + 0.25 using a single linear layer over
+// features [x, x^2], and verify training converges.
+
+import slangpy;
+import neural;
+
+typealias Storage = StructuredBufferStorage<float>;
+typealias V2 = InlineVector<float, 2>;
+typealias V1 = InlineVector<float, 1>;
+typealias Act = IdentityActivation<float>;
+
+// Linear layer: Input=2 (x, x^2), Output=1 (y), with bias
+// Parameters: weights (1x2) + bias (1) = 3 params
+typealias LinearLayer = FFLayer<float, V2, V1, Storage, Act, true>;
+
+static const int PARAM_COUNT = LinearLayer.ParameterCount;
+
+int get_param_count()
+{
+    return PARAM_COUNT;
+}
+
+float eval_loss(
+    RWStructuredBuffer<float> params,
+    StructuredBuffer<float> xs,
+    StructuredBuffer<float> ys,
+    int count)
+{
+    let storage = Storage(params);
+    // Option 2: only addresses in constructor
+    // weights at 0 (2 floats), bias at 2 (1 float)
+    let layer = LinearLayer(0, 2);
+
+    float sum = 0.0;
+    [MaxIters(1024)]
+    for (int i = 0; i < count; i++)
+    {
+        let x = xs[i];
+
+        float featsArr[2] = { x, x * x };
+        let feats = V2(featsArr);
+
+        // Option 2: storage passed to eval<S>()
+        let predV = layer.eval<Storage>(storage, NoParam(), feats);
+        let pred = predV[0];
+        let target = ys[i];
+
+        let err = pred - target;
+        sum += err * err;
+    }
+
+    return sum / float(count);
+}
+
+float train_step(
+    RWStructuredBuffer<float> params,
+    RWStructuredBuffer<float> grads,
+    no_diff StructuredBuffer<float> xs,
+    no_diff StructuredBuffer<float> ys,
+    no_diff int count,
+    no_diff float learningRate)
+{
+    let pStorage = Storage(params);
+    let gStorage = Storage(grads);
+
+    // Clear gradient buffer
+    [MaxIters(1024)]
+    for (int i = 0; i < PARAM_COUNT; i++)
+        grads[i] = 0.0;
+
+    // Option 2: only addresses in constructor
+    let layer = LinearLayer(0, 2);
+
+    // Accumulate analytic grads for y = w0*x + w1*x^2 + b, loss = mean((y - t)^2)
+    float g0 = 0.0;
+    float g1 = 0.0;
+    float gb = 0.0;
+
+    float lossSum = 0.0;
+
+    [MaxIters(1024)]
+    for (int i = 0; i < count; i++)
+    {
+        let x = xs[i];
+        let t = ys[i];
+
+        float featsArr[2] = { x, x * x };
+        let feats = V2(featsArr);
+
+        // Option 2: storage passed to eval<S>()
+        let predV = layer.eval<Storage>(pStorage, NoParam(), feats);
+        let pred = predV[0];
+
+        let err = pred - t;
+        lossSum += err * err;
+
+        g0 += 2.0 * err * x;
+        g1 += 2.0 * err * (x * x);
+        gb += 2.0 * err;
+    }
+
+    let invN = 1.0 / float(count);
+    grads[0] = g0 * invN;
+    grads[1] = g1 * invN;
+    grads[2] = gb * invN;
+
+    // Simple SGD update: params -= lr * grads
+    [MaxIters(1024)]
+    for (int i = 0; i < PARAM_COUNT; i++)
+    {
+        params[i] = params[i] - learningRate * grads[i];
+    }
+
+    return lossSum * invN;
+}
diff --git a/src/sgl/device/shader.cpp b/src/sgl/device/shader.cpp
@@ -333,6 +333,10 @@ void SlangSession::create_session(SlangSessionBuild& build)
     session_options.add(slang::CompilerOptionName::DumpIntermediates, options.dump_intermediates);
     session_options.add(slang::CompilerOptionName::DumpIntermediatePrefix, options.dump_intermediates_prefix);
 
+    // Enable experimental features (e.g., experimental modules like neural).
+    if (options.enable_experimental_features)
+        session_options.add(slang::CompilerOptionName::ExperimentalFeature, true);
+
     // Add hlsl_nvapi capability.
     session_options.add(
         slang::CompilerOptionName::Capability,
diff --git a/src/sgl/device/shader.h b/src/sgl/device/shader.h
@@ -183,6 +183,9 @@ struct SlangCompilerOptions {
     /// Specifies a list of additional arguments to be passed to the downstream compiler.
     std::vector<std::string> downstream_args;
 
+    /// Enable experimental features (e.g., experimental modules like neural).
+    bool enable_experimental_features{false};
+
     /// When set will dump the intermediate source output.
     bool dump_intermediates{false};
 
diff --git a/src/slangpy_ext/device/shader.cpp b/src/slangpy_ext/device/shader.cpp
@@ -24,6 +24,7 @@ SGL_DICT_TO_DESC_FIELD(floating_point_mode, SlangFloatingPointMode)
 SGL_DICT_TO_DESC_FIELD(debug_info, SlangDebugInfoLevel)
 SGL_DICT_TO_DESC_FIELD(optimization, SlangOptimizationLevel)
 SGL_DICT_TO_DESC_FIELD_LIST(downstream_args, std::string)
+SGL_DICT_TO_DESC_FIELD(enable_experimental_features, bool)
 SGL_DICT_TO_DESC_FIELD(dump_intermediates, bool)
 SGL_DICT_TO_DESC_FIELD(dump_intermediates_prefix, std::string)
 SGL_DICT_TO_DESC_END()
@@ -135,6 +136,11 @@ SGL_PY_EXPORT(device_shader)
         .def_rw("debug_info", &SlangCompilerOptions::debug_info, D(SlangCompilerOptions, debug_info))
         .def_rw("optimization", &SlangCompilerOptions::optimization, D(SlangCompilerOptions, optimization))
         .def_rw("downstream_args", &SlangCompilerOptions::downstream_args, D(SlangCompilerOptions, downstream_args))
+        .def_rw(
+            "enable_experimental_features",
+            &SlangCompilerOptions::enable_experimental_features,
+            "Enable experimental features (e.g., experimental modules like neural)."
+        )
         .def_rw(
             "dump_intermediates",
             &SlangCompilerOptions::dump_intermediates,