File tree Expand file tree Collapse file tree 2 files changed +9
-11
lines changed
vortex-cuda/src/dynamic_dispatch Expand file tree Collapse file tree 2 files changed +9
-11
lines changed Original file line number Diff line number Diff line change @@ -53,11 +53,11 @@ include!(concat!(env!("OUT_DIR"), "/dynamic_dispatch.rs"));
5353///
5454/// # Safety
5555///
56- /// The caller must ensure `T` is a `#[repr(C)]` type with no padding that
57- /// contains uninitialised bytes . All the types we serialise (`PlanHeader`,
58- /// `PackedStage`, `ScalarOp`) satisfy this because they are bindgen-generated
59- /// `#[repr(C)]` structs whose padding bytes are always written before
60- /// serialisation .
56+ /// The caller must ensure `T` is a `#[repr(C)]` type whose layout is
57+ /// compatible with the C ABI . All the types we serialise (`PlanHeader`,
58+ /// `PackedStage`, `ScalarOp`) are bindgen-generated `#[repr(C)]` structs.
59+ /// Padding bytes may be uninitialised on the Rust side, but the C reader
60+ /// never inspects them, so the values are irrelevant .
6161fn as_bytes < T : Sized > ( val : & T ) -> & [ u8 ] {
6262 unsafe { from_raw_parts ( std:: ptr:: addr_of!( * val) . cast ( ) , size_of :: < T > ( ) ) }
6363}
@@ -514,7 +514,7 @@ mod tests {
514514 #[ crate :: test]
515515 fn test_plan_structure ( ) {
516516 // Stage 0: input dict values (BP→FoR) into smem[0..256)
517- // Stage 1: output codes (BP→FoR→DICT) into smem[256..2304 ), gather from smem[0]
517+ // Stage 1: output codes (BP→FoR→DICT) into smem[256..1280 ), gather from smem[0]
518518 let plan = CudaDispatchPlan :: new ( [
519519 MaterializedStage :: new (
520520 0xAAAA ,
Original file line number Diff line number Diff line change @@ -118,7 +118,6 @@ pub enum DispatchPlan {
118118 /// Entire encoding tree is fusable into a single kernel launch.
119119 Fused ( FusedPlan ) ,
120120 /// Some subtrees need separate execution before the fused plan can run.
121- /// Shared memory has already been validated.
122121 PartiallyFused {
123122 /// The fused plan (with placeholder buffer slots for pending subtrees).
124123 plan : FusedPlan ,
@@ -165,11 +164,10 @@ pub enum DispatchPlan {
165164/// exceed `stage.len` by up to 1023 elements. This overflow is absorbed by
166165/// the scratch region (`SMEM_TILE_SIZE` ≥ `FL_CHUNK_SIZE`).
167166pub struct FusedPlan {
168- /// Stages in kernel execution order. All stages except the last decode
169- /// fully into persistent shared memory; the final stage produces the
170- /// output.
167+ /// Stages in kernel execution order; all but the last decode into
168+ /// shared memory, the last produces the output.
171169 stages : Vec < ( Stage , SmemOffset , StageLen ) > ,
172- /// Shared memory elements reserved by the preceding ( non-output) stages.
170+ /// Shared memory reserved by the non-output stages.
173171 smem_cursor : SmemOffset ,
174172 /// Source buffers. `None` entries are placeholder slots for pending subtrees,
175173 /// filled by [`materialize_with_subtrees`] before device copy.
You can’t perform that action at this time.
0 commit comments