|
158 | 158 | <span id="150">150</span>
|
159 | 159 | <span id="151">151</span>
|
160 | 160 | <span id="152">152</span>
|
| 161 | +<span id="153">153</span> |
| 162 | +<span id="154">154</span> |
| 163 | +<span id="155">155</span> |
| 164 | +<span id="156">156</span> |
| 165 | +<span id="157">157</span> |
| 166 | +<span id="158">158</span> |
| 167 | +<span id="159">159</span> |
| 168 | +<span id="160">160</span> |
| 169 | +<span id="161">161</span> |
| 170 | +<span id="162">162</span> |
| 171 | +<span id="163">163</span> |
| 172 | +<span id="164">164</span> |
| 173 | +<span id="165">165</span> |
| 174 | +<span id="166">166</span> |
| 175 | +<span id="167">167</span> |
| 176 | +<span id="168">168</span> |
| 177 | +<span id="169">169</span> |
| 178 | +<span id="170">170</span> |
| 179 | +<span id="171">171</span> |
| 180 | +<span id="172">172</span> |
| 181 | +<span id="173">173</span> |
161 | 182 | </pre><pre class="rust"><code><span class="kw">use</span> <span class="kw">crate</span>::{
|
162 | 183 | <span class="ident">util::align_to</span>, <span class="ident">Buffer</span>, <span class="ident">BufferAddress</span>, <span class="ident">BufferDescriptor</span>, <span class="ident">BufferSize</span>, <span class="ident">BufferUsages</span>,
|
163 | 184 | <span class="ident">BufferViewMut</span>, <span class="ident">CommandEncoder</span>, <span class="ident">Device</span>, <span class="ident">MapMode</span>,
|
|
171 | 192 | <span class="ident">offset</span>: <span class="ident">BufferAddress</span>,
|
172 | 193 | }
|
173 | 194 |
|
174 |
| -<span class="doccomment">/// Staging belt is a machine that uploads data.</span> |
| 195 | +<span class="doccomment">/// Efficiently performs many buffer writes by sharing and reusing temporary buffers.</span> |
175 | 196 | <span class="doccomment">///</span>
|
176 | 197 | <span class="doccomment">/// Internally it uses a ring-buffer of staging buffers that are sub-allocated.</span>
|
177 |
| -<span class="doccomment">/// It has an advantage over [`Queue::write_buffer`] in a way that it returns a mutable slice,</span> |
| 198 | +<span class="doccomment">/// It has an advantage over [`Queue::write_buffer()`] in a way that it returns a mutable slice,</span> |
178 | 199 | <span class="doccomment">/// which you can fill to avoid an extra data copy.</span>
|
179 | 200 | <span class="doccomment">///</span>
|
180 | 201 | <span class="doccomment">/// Using a staging belt is slightly complicated, and generally goes as follows:</span>
|
181 |
| -<span class="doccomment">/// - Write to buffers that need writing to using [`StagingBelt::write_buffer`].</span> |
182 |
| -<span class="doccomment">/// - Call `finish`.</span> |
183 |
| -<span class="doccomment">/// - Submit all command encoders used with `StagingBelt::write_buffer`.</span> |
184 |
| -<span class="doccomment">/// - Call `recall`</span> |
| 202 | +<span class="doccomment">/// 1. Write to buffers that need writing to using [`StagingBelt::write_buffer()`].</span> |
| 203 | +<span class="doccomment">/// 2. Call [`StagingBelt::finish()`].</span> |
| 204 | +<span class="doccomment">/// 3. Submit all command encoders that were used in step 1.</span> |
| 205 | +<span class="doccomment">/// 4. Call [`StagingBelt::recall()`].</span> |
185 | 206 | <span class="doccomment">///</span>
|
186 |
| -<span class="doccomment">/// [`Queue::write_buffer`]: crate::Queue::write_buffer</span> |
| 207 | +<span class="doccomment">/// [`Queue::write_buffer()`]: crate::Queue::write_buffer</span> |
187 | 208 | <span class="kw">pub</span> <span class="kw">struct</span> <span class="ident">StagingBelt</span> {
|
188 | 209 | <span class="ident">chunk_size</span>: <span class="ident">BufferAddress</span>,
|
189 |
| - <span class="doccomment">/// Chunks that we are actively using for pending transfers at this moment.</span> |
| 210 | + <span class="doccomment">/// Chunks into which we are accumulating data to be transferred.</span> |
190 | 211 | <span class="ident">active_chunks</span>: <span class="ident">Vec</span><span class="op"><</span><span class="ident">Chunk</span><span class="op">></span>,
|
191 |
| - <span class="doccomment">/// Chunks that have scheduled transfers already.</span> |
| 212 | + <span class="doccomment">/// Chunks that have scheduled transfers already; they are unmapped and some</span> |
| 213 | + <span class="doccomment">/// command encoder has one or more `copy_buffer_to_buffer` commands with them</span> |
| 214 | + <span class="doccomment">/// as source.</span> |
192 | 215 | <span class="ident">closed_chunks</span>: <span class="ident">Vec</span><span class="op"><</span><span class="ident">Chunk</span><span class="op">></span>,
|
193 |
| - <span class="doccomment">/// Chunks that are back from the GPU and ready to be used.</span> |
| 216 | + <span class="doccomment">/// Chunks that are back from the GPU and ready to be mapped for write and put</span> |
| 217 | + <span class="doccomment">/// into `active_chunks`.</span> |
194 | 218 | <span class="ident">free_chunks</span>: <span class="ident">Vec</span><span class="op"><</span><span class="ident">Chunk</span><span class="op">></span>,
|
| 219 | + <span class="doccomment">/// When closed chunks are mapped again, the map callback sends them here.</span> |
195 | 220 | <span class="ident">sender</span>: <span class="ident">mpsc::Sender</span><span class="op"><</span><span class="ident">Chunk</span><span class="op">></span>,
|
| 221 | + <span class="doccomment">/// Free chunks are received here to be put on `self.free_chunks`.</span> |
196 | 222 | <span class="ident">receiver</span>: <span class="ident">mpsc::Receiver</span><span class="op"><</span><span class="ident">Chunk</span><span class="op">></span>,
|
197 | 223 | }
|
198 | 224 |
|
199 | 225 | <span class="kw">impl</span> <span class="ident">StagingBelt</span> {
|
200 | 226 | <span class="doccomment">/// Create a new staging belt.</span>
|
201 | 227 | <span class="doccomment">///</span>
|
202 |
| - <span class="doccomment">/// The `chunk_size` is the unit of internal buffer allocation.</span> |
203 |
| - <span class="doccomment">/// It's better when it's big, but ideally still 1-4 times less than</span> |
204 |
| - <span class="doccomment">/// the total amount of data uploaded per submission.</span> |
| 228 | + <span class="doccomment">/// The `chunk_size` is the unit of internal buffer allocation; writes will be</span> |
| 229 | + <span class="doccomment">/// sub-allocated within each chunk. Therefore, for optimal use of memory, the</span> |
| 230 | + <span class="doccomment">/// chunk size should be:</span> |
| 231 | + <span class="doccomment">///</span> |
| 232 | + <span class="doccomment">/// * larger than the largest single [`StagingBelt::write_buffer()`] operation;</span> |
| 233 | + <span class="doccomment">/// * 1-4 times less than the total amount of data uploaded per submission</span> |
| 234 | + <span class="doccomment">/// (per [`StagingBelt::finish()`]); and</span> |
| 235 | + <span class="doccomment">/// * bigger is better, within these bounds.</span> |
205 | 236 | <span class="kw">pub</span> <span class="kw">fn</span> <span class="ident">new</span>(<span class="ident">chunk_size</span>: <span class="ident">BufferAddress</span>) -> <span class="self">Self</span> {
|
206 | 237 | <span class="kw">let</span> (<span class="ident">sender</span>, <span class="ident">receiver</span>) <span class="op">=</span> <span class="ident">mpsc::channel</span>();
|
207 | 238 | <span class="ident">StagingBelt</span> {
|
|
218 | 249 | <span class="doccomment">/// at the specified offset.</span>
|
219 | 250 | <span class="doccomment">///</span>
|
220 | 251 | <span class="doccomment">/// The upload will be placed into the provided command encoder. This encoder</span>
|
221 |
| - <span class="doccomment">/// must be submitted after `finish` is called and before `recall` is called.</span> |
| 252 | + <span class="doccomment">/// must be submitted after [`StagingBelt::finish()`] is called and before</span> |
| 253 | + <span class="doccomment">/// [`StagingBelt::recall()`] is called.</span> |
| 254 | + <span class="doccomment">///</span> |
| 255 | + <span class="doccomment">/// If the `size` is greater than the size of any free internal buffer, a new buffer</span> |
| 256 | + <span class="doccomment">/// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]</span> |
| 257 | + <span class="doccomment">/// should ideally be larger than every such size.</span> |
222 | 258 | <span class="kw">pub</span> <span class="kw">fn</span> <span class="ident">write_buffer</span>(
|
223 | 259 | <span class="kw-2">&mut</span> <span class="self">self</span>,
|
224 | 260 | <span class="ident">encoder</span>: <span class="kw-2">&mut</span> <span class="ident">CommandEncoder</span>,
|
|
268 | 304 |
|
269 | 305 | <span class="doccomment">/// Prepare currently mapped buffers for use in a submission.</span>
|
270 | 306 | <span class="doccomment">///</span>
|
271 |
| - <span class="doccomment">/// At this point, all the partially used staging buffers are closed until</span> |
272 |
| - <span class="doccomment">/// the GPU is done copying the data from them.</span> |
| 307 | + <span class="doccomment">/// This must be called before the command encoder(s) provided to</span> |
| 308 | + <span class="doccomment">/// [`StagingBelt::write_buffer()`] are submitted.</span> |
| 309 | + <span class="doccomment">///</span> |
| 310 | + <span class="doccomment">/// At this point, all the partially used staging buffers are closed (cannot be used for</span> |
| 311 | + <span class="doccomment">/// further writes) until after [`StagingBelt::recall()`] is called *and* the GPU is done</span> |
| 312 | + <span class="doccomment">/// copying the data from them.</span> |
273 | 313 | <span class="kw">pub</span> <span class="kw">fn</span> <span class="ident">finish</span>(<span class="kw-2">&mut</span> <span class="self">self</span>) {
|
274 | 314 | <span class="kw">for</span> <span class="ident">chunk</span> <span class="kw">in</span> <span class="self">self</span>.<span class="ident">active_chunks</span>.<span class="ident">drain</span>(..) {
|
275 | 315 | <span class="ident">chunk</span>.<span class="ident">buffer</span>.<span class="ident">unmap</span>();
|
|
279 | 319 |
|
280 | 320 | <span class="doccomment">/// Recall all of the closed buffers back to be reused.</span>
|
281 | 321 | <span class="doccomment">///</span>
|
282 |
| - <span class="doccomment">/// This has to be called after the command encoders written to `write_buffer` are submitted!</span> |
| 322 | + <span class="doccomment">/// This must only be called after the command encoder(s) provided to</span> |
| 323 | + <span class="doccomment">/// [`StagingBelt::write_buffer()`] are submitted. Additional calls are harmless.</span> |
| 324 | + <span class="doccomment">/// Not calling this as soon as possible may result in increased buffer memory usage.</span> |
283 | 325 | <span class="kw">pub</span> <span class="kw">fn</span> <span class="ident">recall</span>(<span class="kw-2">&mut</span> <span class="self">self</span>) {
|
284 | 326 | <span class="kw">while</span> <span class="kw">let</span> <span class="prelude-val">Ok</span>(<span class="kw-2">mut</span> <span class="ident">chunk</span>) <span class="op">=</span> <span class="self">self</span>.<span class="ident">receiver</span>.<span class="ident">try_recv</span>() {
|
285 | 327 | <span class="ident">chunk</span>.<span class="ident">offset</span> <span class="op">=</span> <span class="number">0</span>;
|
|
0 commit comments