Skip to content

Commit ed6a886

Browse files
committed
docs
Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 2283116 commit ed6a886

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

vortex-cuda/src/dynamic_dispatch/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ include!(concat!(env!("OUT_DIR"), "/dynamic_dispatch.rs"));
5353
///
5454
/// # Safety
5555
///
56-
/// The caller must ensure `T` is a `#[repr(C)]` type with no padding that
57-
/// contains uninitialised bytes. All the types we serialise (`PlanHeader`,
58-
/// `PackedStage`, `ScalarOp`) satisfy this because they are bindgen-generated
59-
/// `#[repr(C)]` structs whose padding bytes are always written before
60-
/// serialisation.
56+
/// The caller must ensure `T` is a `#[repr(C)]` type whose layout is
57+
/// compatible with the C ABI. All the types we serialise (`PlanHeader`,
58+
/// `PackedStage`, `ScalarOp`) are bindgen-generated `#[repr(C)]` structs.
59+
/// Padding bytes may be uninitialised on the Rust side, but the C reader
60+
/// never inspects them, so the values are irrelevant.
6161
fn as_bytes<T: Sized>(val: &T) -> &[u8] {
6262
unsafe { from_raw_parts(std::ptr::addr_of!(*val).cast(), size_of::<T>()) }
6363
}
@@ -514,7 +514,7 @@ mod tests {
514514
#[crate::test]
515515
fn test_plan_structure() {
516516
// Stage 0: input dict values (BP→FoR) into smem[0..256)
517-
// Stage 1: output codes (BP→FoR→DICT) into smem[256..2304), gather from smem[0]
517+
// Stage 1: output codes (BP→FoR→DICT) into smem[256..1280), gather from smem[0]
518518
let plan = CudaDispatchPlan::new([
519519
MaterializedStage::new(
520520
0xAAAA,

vortex-cuda/src/dynamic_dispatch/plan_builder.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ pub enum DispatchPlan {
118118
/// Entire encoding tree is fusable into a single kernel launch.
119119
Fused(FusedPlan),
120120
/// Some subtrees need separate execution before the fused plan can run.
121-
/// Shared memory has already been validated.
122121
PartiallyFused {
123122
/// The fused plan (with placeholder buffer slots for pending subtrees).
124123
plan: FusedPlan,
@@ -165,11 +164,10 @@ pub enum DispatchPlan {
165164
/// exceed `stage.len` by up to 1023 elements. This overflow is absorbed by
166165
/// the scratch region (`SMEM_TILE_SIZE` ≥ `FL_CHUNK_SIZE`).
167166
pub struct FusedPlan {
168-
/// Stages in kernel execution order. All stages except the last decode
169-
/// fully into persistent shared memory; the final stage produces the
170-
/// output.
167+
/// Stages in kernel execution order; all but the last decode into
168+
/// shared memory, the last produces the output.
171169
stages: Vec<(Stage, SmemOffset, StageLen)>,
172-
/// Shared memory elements reserved by the preceding (non-output) stages.
170+
/// Shared memory reserved by the non-output stages.
173171
smem_cursor: SmemOffset,
174172
/// Source buffers. `None` entries are placeholder slots for pending subtrees,
175173
/// filled by [`materialize_with_subtrees`] before device copy.

0 commit comments

Comments
 (0)