|
| 1 | +/** |
| 2 | + * meshletdec.slang - an example GPU decoder for meshlet data encoded using meshopt_encodeMeshlet |
| 3 | + * This is intended to be used as a starting point for applications that want to decode meshlet data on the GPU. |
| 4 | + * |
| 5 | + * The shader exposes an entrypoint, decodeMeshlets, that decodes a set of meshlets; each meshlet is decoded independently, |
| 6 | + * and the output vertex/triangle data is written as uint32 per element (triangle data is written as 0xccbbaa). |
| 7 | + * This matches the output format for meshopt_decodeMeshlet with vertex_size=4 triangle_size=4. If alternative formats are |
| 8 | + * needed, the code should be changed to output them; note that for triangle data, it may make sense to output data to shared |
| 9 | + * memory to be able to use larger aligned 32-bit writes to global memory after that. |
| 10 | + * |
| 11 | + * Copyright (C) 2016-2026, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) |
| 12 | + * This code is distributed under the MIT License. See notice at the end of this file. |
| 13 | + */ |
| 14 | + |
| 15 | +struct MeshletDesc |
| 16 | +{ |
| 17 | + uint stream_offset; |
| 18 | + uint output_offset; |
| 19 | + uint16_t encoded_size; |
| 20 | + uint8_t vertex_count; |
| 21 | + uint8_t triangle_count; |
| 22 | +}; |
| 23 | + |
| 24 | +[[vk::binding(0)]] |
| 25 | +StructuredBuffer<uint8_t> gStream : register(t0); |
| 26 | +[[vk::binding(1)]] |
| 27 | +StructuredBuffer<MeshletDesc> gMeshlets : register(t1); |
| 28 | +[[vk::binding(2)]] |
| 29 | +RWStructuredBuffer<uint> gOutput : register(u2); |
| 30 | +[[vk::binding(3)]] |
| 31 | +cbuffer MeshletConfigCB : register(b3) { uint gMeshletCount; } |
| 32 | + |
| 33 | +uint decodeVertices(uint out_vertices, uint ctrl, uint data, uint bound, uint vertex_count) |
| 34 | +{ |
| 35 | + uint last = ~0u; |
| 36 | + |
| 37 | + for (uint i = 0; i < vertex_count; i += 4) |
| 38 | + { |
| 39 | + if (data > bound) |
| 40 | + return ~0u; |
| 41 | + |
| 42 | + uint code4 = uint(gStream[ctrl + i / 4]); |
| 43 | + |
| 44 | + for (int k = 0; k < 4; ++k) |
| 45 | + { |
| 46 | + int code = ((code4 >> k) & 1) | ((code4 >> (k + 3)) & 2); |
| 47 | + int length = code4 == 0xff ? 4 : code; |
| 48 | + |
| 49 | + // branchlessly read up to 4 bytes |
| 50 | + uint mask = (length == 4) ? ~0u : (1 << (8 * length)) - 1; |
| 51 | + uint v = (uint(gStream[data + 0]) | (uint(gStream[data + 1]) << 8) | (uint(gStream[data + 2]) << 16) | (uint(gStream[data + 3]) << 24)) & mask; |
| 52 | + |
| 53 | + // unzigzag + 1 |
| 54 | + uint d = (v >> 1) ^ -int(v & 1); |
| 55 | + uint r = last + d + 1; |
| 56 | + |
| 57 | + if (i + k < vertex_count) |
| 58 | + gOutput[out_vertices + i + k] = r; |
| 59 | + |
| 60 | + data += length; |
| 61 | + last = r; |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + return data; |
| 66 | +} |
| 67 | + |
| 68 | +uint decodeTriangle(uint code, uint extra0, uint extra1, uint extra2, inout uint fifo0, inout uint fifo1, inout uint fifo2, inout uint next, inout uint extra) |
| 69 | +{ |
| 70 | + // reuse: 0-1 extra vertices |
| 71 | + uint fifo = code < 4 ? fifo0 : (code < 8 ? fifo1 : fifo2); |
| 72 | + uint edge = fifo >> ((code << 3) & 16); // shift by 16 if bit 1 is set (odd edge for each triangle) |
| 73 | + uint c_reuse = (code & 1) == 1 ? extra0 : next; |
| 74 | + |
| 75 | + // restart: 0-3 extra vertices |
| 76 | + uint extran = code & 3; |
| 77 | + uint a = extran > 0 ? extra0 : next; |
| 78 | + uint b = extran > 1 ? extra1 : next + (1 - extran); |
| 79 | + uint c = extran > 2 ? extra2 : next + (2 - extran); |
| 80 | + |
| 81 | + // select between reuse and restart and repack triangle into edge format (0xcbac) |
| 82 | + a = code >= 12 ? a : (edge >> 8) & 0xff; |
| 83 | + b = code >= 12 ? b : edge & 0xff; |
| 84 | + c = code >= 12 ? c : c_reuse; |
| 85 | + |
| 86 | + uint tri = c | (a << 8) | (b << 16) | (c << 24); |
| 87 | + |
| 88 | + // advance next/extra; reuse codes use 1 lsb for extra count, restart codes use 2 lsbs |
| 89 | + uint extrab = code < 12 ? 1 : 3; |
| 90 | + next += extrab - code & extrab; |
| 91 | + extra += code & extrab; |
| 92 | + |
| 93 | + // rotate fifo |
| 94 | + fifo2 = fifo1; |
| 95 | + fifo1 = fifo0; |
| 96 | + fifo0 = tri; |
| 97 | + |
| 98 | + // output triangle is stored without extra edge vertex (0xcbac => 0xcba) |
| 99 | + return tri >> 8; |
| 100 | +} |
| 101 | + |
| 102 | +uint decodeTriangles(uint out_triangles, uint codes, uint extra, uint bound, uint triangle_count) |
| 103 | +{ |
| 104 | + uint next = 0; |
| 105 | + uint fifo0 = 0, fifo1 = 0, fifo2 = 0; // two edge fifo entries in one uint: 0xcbac |
| 106 | + |
| 107 | + for (uint i = 0; i < triangle_count; i += 2) |
| 108 | + { |
| 109 | + if (extra > bound) |
| 110 | + return ~0u; |
| 111 | + |
| 112 | + uint codeg = uint(gStream[codes + i / 2]); |
| 113 | + |
| 114 | + // first triangle |
| 115 | + uint extra0 = uint(gStream[extra + 0]); |
| 116 | + uint extra1 = uint(gStream[extra + 1]); |
| 117 | + uint extra2 = uint(gStream[extra + 2]); |
| 118 | + uint tri = decodeTriangle(codeg & 15, extra0, extra1, extra2, fifo0, fifo1, fifo2, next, extra); |
| 119 | + |
| 120 | + gOutput[out_triangles + i] = tri; |
| 121 | + |
| 122 | + // second triangle, if any |
| 123 | + extra0 = uint(gStream[extra + 0]); |
| 124 | + extra1 = uint(gStream[extra + 1]); |
| 125 | + extra2 = uint(gStream[extra + 2]); |
| 126 | + tri = decodeTriangle(codeg >> 4, extra0, extra1, extra2, fifo0, fifo1, fifo2, next, extra); |
| 127 | + |
| 128 | + if (i + 1 < triangle_count) |
| 129 | + gOutput[out_triangles + i + 1] = tri; |
| 130 | + } |
| 131 | + |
| 132 | + return extra; |
| 133 | +} |
| 134 | + |
| 135 | +int decodeMeshlet(uint out_vertices, uint vertex_count, uint out_triangles, uint triangle_count, uint buffer, uint buffer_size) |
| 136 | +{ |
| 137 | + uint codes_size = (triangle_count + 1) / 2; |
| 138 | + uint ctrl_size = (vertex_count + 3) / 4; |
| 139 | + uint gap_size = (codes_size + ctrl_size < 16) ? 16 - (codes_size + ctrl_size) : 0; |
| 140 | + |
| 141 | + if (buffer_size < codes_size + ctrl_size + gap_size) |
| 142 | + return -2; |
| 143 | + |
| 144 | + uint end = buffer + buffer_size; |
| 145 | + uint codes = end - codes_size; |
| 146 | + uint ctrl = codes - ctrl_size; |
| 147 | + uint data = buffer; |
| 148 | + |
| 149 | + // gap ensures we have at least 16 bytes available after bound; this allows decoder to over-read safely |
| 150 | + uint bound = ctrl - gap_size; |
| 151 | + |
| 152 | + data = decodeVertices(out_vertices, ctrl, data, bound, vertex_count); |
| 153 | + if (data == ~0u) |
| 154 | + return -2; |
| 155 | + |
| 156 | + data = decodeTriangles(out_triangles, codes, data, bound, triangle_count); |
| 157 | + if (data == ~0u) |
| 158 | + return -2; |
| 159 | + |
| 160 | + return (data == bound) ? 0 : -3; |
| 161 | +} |
| 162 | + |
| 163 | +[shader("compute")] |
| 164 | +[numthreads(32, 1, 1)] |
| 165 | +void decodeMeshlets(uint3 dispatch_thread_id: SV_DispatchThreadID) |
| 166 | +{ |
| 167 | + uint meshlet_count = gMeshletCount; |
| 168 | + |
| 169 | + uint tid = dispatch_thread_id.x; |
| 170 | + if (tid >= meshlet_count) |
| 171 | + return; |
| 172 | + |
| 173 | + MeshletDesc desc = gMeshlets[tid]; |
| 174 | + uint out_vertices = desc.output_offset; |
| 175 | + uint out_triangles = desc.output_offset + uint(desc.vertex_count); |
| 176 | + |
| 177 | + int rc = decodeMeshlet(out_vertices, uint(desc.vertex_count), out_triangles, uint(desc.triangle_count), desc.stream_offset, uint(desc.encoded_size)); |
| 178 | + |
| 179 | + // if decoding failed, we write 0xff.. to the first word of the output data |
| 180 | + // this can be adjusted arbitrarily; for example, a separate buffer with a single status for the entire stream could be used |
| 181 | + // note that decoding fails only if the input data is corrupt; so this may not be required at all depending on the requirements |
| 182 | + if (rc < 0) |
| 183 | + gOutput[desc.output_offset] = ~0u; |
| 184 | +} |
| 185 | + |
| 186 | +/** |
| 187 | + * Copyright (c) 2016-2026 Arseny Kapoulkine |
| 188 | + * |
| 189 | + * Permission is hereby granted, free of charge, to any person |
| 190 | + * obtaining a copy of this software and associated documentation |
| 191 | + * files (the "Software"), to deal in the Software without |
| 192 | + * restriction, including without limitation the rights to use, |
| 193 | + * copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 194 | + * copies of the Software, and to permit persons to whom the |
| 195 | + * Software is furnished to do so, subject to the following |
| 196 | + * conditions: |
| 197 | + * |
| 198 | + * The above copyright notice and this permission notice shall be |
| 199 | + * included in all copies or substantial portions of the Software. |
| 200 | + * |
| 201 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 202 | + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 203 | + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 204 | + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 205 | + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 206 | + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 207 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 208 | + * OTHER DEALINGS IN THE SOFTWARE. |
| 209 | + */ |
0 commit comments