Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions tests/expected-failure-via-glsl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ tests/cooperative-vector/matrix-mul-rwbyteaddress-packed.slang (vk)
tests/cooperative-vector/matrix-mul-structuredbuffer-packed.slang (vk)
tests/cooperative-vector/outer-product-structuredbuffer.slang (vk)
tests/cooperative-vector/outer-product.slang (vk)
tests/language-feature/dynamic-dispatch/layout-64bit-scalar.slang.2 (vk)
tests/language-feature/dynamic-dispatch/layout-64bit-vector.slang.2 (vk)
tests/language-feature/dynamic-dispatch/layout-mixed-bitwidths.slang.2 (vk)
tests/language-feature/saturated-cooperation/fuse3.slang (vk)
tests/language-feature/saturated-cooperation/fuse-product.slang (vk)
tests/language-feature/saturated-cooperation/fuse.slang (vk)
Expand Down
104 changes: 104 additions & 0 deletions tests/language-feature/dynamic-dispatch/layout-16bit-vectors.slang
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Vectors of 16-bit types pack two elements per uint32 field in AnyValue.
// A half4 occupies 2 uint32 fields; a vector<uint16_t, 3> uses 2 uint32
// fields with the second half-utilized. This tests the 2-byte alignment
// logic with different element counts.

//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -render-feature int16,half
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-dx12 -compute -profile sm_6_2 -shaderobj -output-using-type
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -output-using-type

//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<float> outputBuffer;

//TEST_INPUT:ubuffer(data=[0 1 2], stride=4):name=kindBuffer
StructuredBuffer<int> kindBuffer;

interface I16BitVec
{
float getSum();
float getFirst();
}

struct Half4Impl : I16BitVec
{
half4 vals;

float getSum()
{
return float(vals[0]) + float(vals[1]) + float(vals[2]) + float(vals[3]);
}
float getFirst() { return float(vals[0]); }
}

struct Int16_2Impl : I16BitVec
{
int16_t2 vals;

float getSum()
{
return float(vals[0]) + float(vals[1]);
}
float getFirst() { return float(vals[0]); }
}

struct Uint16_3Impl : I16BitVec
{
vector<uint16_t, 3> vals;

float getSum()
{
return float(vals[0]) + float(vals[1]) + float(vals[2]);
}
float getFirst() { return float(vals[0]); }
}

I16BitVec make16BitVec(int kind)
{
if (kind == 0)
{
Half4Impl h;
h.vals = half4(half(1.5), half(2.5), half(3.5), half(4.5));
return h;
}
if (kind == 1)
{
Int16_2Impl i;
i.vals = int16_t2(int16_t(-10), int16_t(20));
return i;
}
Uint16_3Impl u;
u.vals = vector<uint16_t, 3>(uint16_t(100), uint16_t(200), uint16_t(300));
return u;
}

[numthreads(1, 1, 1)]
void computeMain()
{
I16BitVec h4 = make16BitVec(kindBuffer[0]);
// 1.5 + 2.5 + 3.5 + 4.5 = 12.0
// CHECK: 12.0
outputBuffer[0] = h4.getSum();
// CHECK: 1.5
outputBuffer[1] = h4.getFirst();

I16BitVec i2 = make16BitVec(kindBuffer[1]);
// -10 + 20 = 10
// CHECK: 10.0
outputBuffer[2] = i2.getSum();
// CHECK: -10.0
outputBuffer[3] = i2.getFirst();

I16BitVec u3 = make16BitVec(kindBuffer[2]);
// 100 + 200 + 300 = 600
// CHECK: 600.0
outputBuffer[4] = u3.getSum();
// CHECK: 100.0
outputBuffer[5] = u3.getFirst();

// Re-read half4 to verify no corruption
I16BitVec h4b = make16BitVec(kindBuffer[0]);
// CHECK: 12.0
outputBuffer[6] = h4b.getSum();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// 64-bit scalar types (double, int64_t, uint64_t) are marshalled into
// AnyValue as two consecutive uint32 fields with 8-byte alignment.
// This test verifies that the split-and-reconstruct preserves values
// correctly through dynamic dispatch.

//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -render-feature double,int64

//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<float> outputBuffer;

//TEST_INPUT:ubuffer(data=[0 1 2], stride=4):name=kindBuffer
StructuredBuffer<int> kindBuffer;

interface IValue
{
float asFloat();
int lowBits();
int highBits();
}

struct DoubleImpl : IValue
{
double val;

float asFloat() { return float(val); }
int lowBits() { return int(int64_t(val) & 0xFFFFFFFF); }
int highBits() { return int(int64_t(val) >> 32); }
}

struct Int64Impl : IValue
{
int64_t val;

float asFloat() { return float(val); }
int lowBits() { return int(val & 0xFFFFFFFF); }
int highBits() { return int(val >> 32); }
}

struct Uint64Impl : IValue
{
uint64_t val;

float asFloat() { return float(val); }
int lowBits() { return int(val & 0xFFFFFFFF); }
int highBits() { return int(val >> 32); }
}

IValue makeValue(int kind)
{
if (kind == 0)
{
DoubleImpl d;
d.val = 3.25;
return d;
}
if (kind == 1)
{
Int64Impl i;
i.val = -42;
return i;
}
Uint64Impl u;
u.val = (uint64_t(5) << 32) | uint64_t(99);
return u;
}

[numthreads(1, 1, 1)]
void computeMain()
{
IValue dbl = makeValue(kindBuffer[0]);
// CHECK: 3.25
outputBuffer[0] = dbl.asFloat();
// CHECK: 3
outputBuffer[1] = dbl.lowBits();

IValue i64 = makeValue(kindBuffer[1]);
// CHECK: -42.0
outputBuffer[2] = i64.asFloat();
// -42 & 0xFFFFFFFF = -42 (lower 32 bits preserved)
// CHECK: -42
outputBuffer[3] = i64.lowBits();
// -42 >> 32 = -1 (sign extension)
// CHECK: -1
outputBuffer[4] = i64.highBits();

IValue u64 = makeValue(kindBuffer[2]);
// (5 << 32) | 99: lowBits = 99
// CHECK: 99
outputBuffer[5] = u64.lowBits();
// highBits = 5 (non-zero upper word proves no 32-bit truncation)
// CHECK: 5
outputBuffer[6] = u64.highBits();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Vectors of 64-bit types are marshalled element-by-element, with each
// element requiring 8-byte alignment. A double2 occupies 4 uint32 fields
// in the AnyValue buffer.

//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -render-feature double,int64

//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<float> outputBuffer;

//TEST_INPUT:ubuffer(data=[0 1], stride=4):name=kindBuffer
StructuredBuffer<int> kindBuffer;

interface IVec
{
float getX();
float getY();
}

struct Double2Impl : IVec
{
double2 val;

float getX() { return float(val.x); }
float getY() { return float(val.y); }
}

struct Int64_2Impl : IVec
{
vector<int64_t, 2> val;

float getX() { return float(val.x); }
float getY() { return float(val.y); }
}

IVec makeVec(int kind)
{
if (kind == 0)
{
Double2Impl d;
d.val = double2(1.5, 2.75);
return d;
}
Int64_2Impl i;
i.val = vector<int64_t, 2>(-10, 20);
return i;
}

[numthreads(1, 1, 1)]
void computeMain()
{
IVec dblVec = makeVec(kindBuffer[0]);
// CHECK: 1.5
outputBuffer[0] = dblVec.getX();
// CHECK: 2.75
outputBuffer[1] = dblVec.getY();

IVec i64Vec = makeVec(kindBuffer[1]);
// CHECK: -10.0
outputBuffer[2] = i64Vec.getX();
// CHECK: 20.0
outputBuffer[3] = i64Vec.getY();

// Cross-check: re-read double impl to verify no stale data
IVec dbl2 = makeVec(kindBuffer[0]);
// CHECK: 1.5
outputBuffer[4] = dbl2.getX();
// CHECK: 2.75
outputBuffer[5] = dbl2.getY();
}
103 changes: 103 additions & 0 deletions tests/language-feature/dynamic-dispatch/layout-8bit-vectors.slang
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Vectors of 8-bit types are packed via bitfield insert into uint32 fields.
// A vector<int8_t, 4> fills exactly one uint32; a vector<int8_t, 3> uses
// 3 bytes leaving 1 byte unused. This tests the intraFieldOffset
// arithmetic in AnyValue marshalling for sub-byte-aligned elements.

//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cpu -compute -shaderobj -output-using-type
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj -output-using-type
//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-vk -compute -shaderobj -output-using-type -render-feature int16
//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-metal -compute -shaderobj -output-using-type

//TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0], stride=4):out,name=outputBuffer
RWStructuredBuffer<int> outputBuffer;

//TEST_INPUT:ubuffer(data=[0 1 2], stride=4):name=kindBuffer
StructuredBuffer<int> kindBuffer;

interface I8BitVec
{
int getSum();
int getFirst();
}

struct Vec4i8Impl : I8BitVec
{
vector<int8_t, 4> vals;

int getSum()
{
return int(vals[0]) + int(vals[1]) + int(vals[2]) + int(vals[3]);
}
int getFirst() { return int(vals[0]); }
}

struct Vec2u8Impl : I8BitVec
{
vector<uint8_t, 2> vals;

int getSum()
{
return int(vals[0]) + int(vals[1]);
}
int getFirst() { return int(vals[0]); }
}

struct Vec3i8Impl : I8BitVec
{
vector<int8_t, 3> vals;

int getSum()
{
return int(vals[0]) + int(vals[1]) + int(vals[2]);
}
int getFirst() { return int(vals[0]); }
}

I8BitVec make8BitVec(int kind)
{
if (kind == 0)
{
Vec4i8Impl v;
v.vals = vector<int8_t, 4>(int8_t(10), int8_t(20), int8_t(30), int8_t(40));
return v;
}
if (kind == 1)
{
Vec2u8Impl v;
v.vals = vector<uint8_t, 2>(uint8_t(100), uint8_t(200));
return v;
}
Vec3i8Impl v;
v.vals = vector<int8_t, 3>(int8_t(-1), int8_t(-2), int8_t(-3));
return v;
}

[numthreads(1, 1, 1)]
void computeMain()
{
I8BitVec v4 = make8BitVec(kindBuffer[0]);
// 10 + 20 + 30 + 40 = 100
// CHECK: 100
outputBuffer[0] = v4.getSum();
// CHECK: 10
outputBuffer[1] = v4.getFirst();

I8BitVec v2 = make8BitVec(kindBuffer[1]);
// 100 + 200 = 300
// CHECK: 300
outputBuffer[2] = v2.getSum();
// CHECK: 100
outputBuffer[3] = v2.getFirst();

I8BitVec v3 = make8BitVec(kindBuffer[2]);
// (-1) + (-2) + (-3) = -6
// CHECK: -6
outputBuffer[4] = v3.getSum();
// CHECK: -1
outputBuffer[5] = v3.getFirst();

// Re-read vec4 to verify no corruption from smaller vector impls
I8BitVec v4b = make8BitVec(kindBuffer[0]);
// CHECK: 100
outputBuffer[6] = v4b.getSum();
}
Loading
Loading