Skip to content

Commit 15b8c9e

Browse files
committed
validate paths before we insert
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 5640bae commit 15b8c9e

File tree

4 files changed

+106
-11
lines changed

4 files changed

+106
-11
lines changed

vortex-dtype/src/field.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,12 @@ impl FieldPath {
206206
// Indexing a struct type always allocates anyway.
207207
self.resolve(dtype).is_some()
208208
}
209+
210+
/// Returns true if this path overlaps with another path (i.e., one is a prefix of the other).
211+
pub fn overlap(&self, other: &FieldPath) -> bool {
212+
let min_len = self.0.len().min(other.0.len());
213+
self.0.iter().take(min_len).eq(other.0.iter().take(min_len))
214+
}
209215
}
210216

211217
impl FromIterator<Field> for FieldPath {
@@ -388,4 +394,28 @@ mod tests {
388394
assert!(path.resolve(dtype.clone()).is_none());
389395
assert!(!path.exists_in(dtype));
390396
}
397+
398+
#[test]
399+
fn test_overlap_positive() {
400+
let path1 = field_path!(a.b.c);
401+
let path2 = field_path!(a.b);
402+
assert!(path1.overlap(&path2));
403+
assert!(path2.overlap(&path1));
404+
405+
let path3 = field_path!(a);
406+
assert!(path1.overlap(&path3));
407+
assert!(path3.overlap(&path1));
408+
}
409+
410+
#[test]
411+
fn test_overlap_negative() {
412+
let path1 = field_path!(a.b.c);
413+
let path2 = field_path!(a.x.y);
414+
assert!(!path1.overlap(&path2));
415+
assert!(!path2.overlap(&path1));
416+
417+
let path3 = field_path!(x);
418+
assert!(!path1.overlap(&path3));
419+
assert!(!path3.overlap(&path1));
420+
}
391421
}

vortex-layout/src/layouts/path.rs

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use vortex_utils::aliases::hash_set::HashSet;
3333
use crate::IntoLayout;
3434
use crate::LayoutRef;
3535
use crate::LayoutStrategy;
36+
use crate::layouts::flat::writer::FlatLayoutStrategy;
3637
use crate::layouts::struct_::StructLayout;
3738
use crate::segments::SegmentSinkRef;
3839
use crate::sequence::SendableSequentialStream;
@@ -52,6 +53,18 @@ pub struct PathStrategy {
5253
fallback: Arc<dyn LayoutStrategy>,
5354
}
5455

56+
impl Default for PathStrategy {
57+
fn default() -> Self {
58+
let flat = Arc::new(FlatLayoutStrategy::default());
59+
60+
Self {
61+
leaf_writers: HashMap::default(),
62+
validity: flat.clone(),
63+
fallback: flat,
64+
}
65+
}
66+
}
67+
5568
impl PathStrategy {
5669
/// Create a new writer with the specified write strategies for validity, and for all leaf
5770
/// fields, with no overrides.
@@ -113,7 +126,8 @@ impl PathStrategy {
113126
field_path: impl Into<FieldPath>,
114127
writer: Arc<dyn LayoutStrategy>,
115128
) -> Self {
116-
self.leaf_writers.insert(field_path.into(), writer);
129+
self.leaf_writers
130+
.insert(self.validate_path(field_path.into()), writer);
117131
self
118132
}
119133

@@ -124,12 +138,16 @@ impl PathStrategy {
124138
mut self,
125139
writers: impl IntoIterator<Item = (FieldPath, Arc<dyn LayoutStrategy>)>,
126140
) -> Self {
127-
self.leaf_writers.extend(writers);
141+
for (field_path, strategy) in writers {
142+
self.leaf_writers
143+
.insert(self.validate_path(field_path), strategy);
144+
}
128145
self
129146
}
130147
}
131148

132149
impl PathStrategy {
150+
// Descend into a subfield for the writer.
133151
fn descend(&self, field: &Field) -> Self {
134152
// Start with the existing set of overrides, then only retain the ones that contain
135153
// the current field
@@ -149,6 +167,19 @@ impl PathStrategy {
149167
fallback: self.fallback.clone(),
150168
}
151169
}
170+
171+
fn validate_path(&self, path: FieldPath) -> FieldPath {
172+
// Validate that the field path does not conflict with any overrides
173+
// that we've added by overlapping.
174+
for field_path in self.leaf_writers.keys() {
175+
assert!(
176+
!path.overlap(field_path),
177+
"Override for field_path {path} conflicts with existing override for {field_path}"
178+
);
179+
}
180+
181+
path
182+
}
152183
}
153184

154185
/// Specialized strategy for when we exactly know the input schema.
@@ -322,3 +353,27 @@ impl LayoutStrategy for PathStrategy {
322353
Ok(StructLayout::new(row_count, dtype, column_layouts).into_layout())
323354
}
324355
}
356+
357+
#[cfg(test)]
358+
mod tests {
359+
use std::sync::Arc;
360+
361+
use vortex_dtype::field_path;
362+
363+
use crate::layouts::flat::writer::FlatLayoutStrategy;
364+
use crate::layouts::path::PathStrategy;
365+
366+
#[test]
367+
#[should_panic(
368+
expected = "Override for field_path $a.$b conflicts with existing override for $a.$b.$c"
369+
)]
370+
fn test_overlapping_paths_fail() {
371+
let flat = Arc::new(FlatLayoutStrategy::default());
372+
373+
// Success
374+
let path = PathStrategy::default().with_field_writer(field_path!(a.b.c), flat.clone());
375+
376+
// Should panic right here.
377+
let _path = path.with_field_writer(field_path!(a.b), flat);
378+
}
379+
}

vortex-layout/src/layouts/struct_/reader.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ mod tests {
417417
use crate::LayoutRef;
418418
use crate::LayoutStrategy;
419419
use crate::layouts::flat::writer::FlatLayoutStrategy;
420-
use crate::layouts::struct_::writer::StructStrategy;
420+
use crate::layouts::path::PathStrategy;
421421
use crate::segments::SegmentSource;
422422
use crate::segments::TestSegments;
423423
use crate::sequence::SequenceId;
@@ -430,8 +430,10 @@ mod tests {
430430

431431
let segments = Arc::new(TestSegments::default());
432432
let (ptr, eof) = SequenceId::root().split();
433-
let strategy =
434-
StructStrategy::new(FlatLayoutStrategy::default(), FlatLayoutStrategy::default());
433+
let strategy = PathStrategy::new(
434+
Arc::new(FlatLayoutStrategy::default()),
435+
Arc::new(FlatLayoutStrategy::default()),
436+
);
435437
let layout = block_on(|handle| {
436438
strategy.write_stream(
437439
ctx,
@@ -461,8 +463,10 @@ mod tests {
461463
let ctx = ArrayContext::empty();
462464
let segments = Arc::new(TestSegments::default());
463465
let (ptr, eof) = SequenceId::root().split();
464-
let strategy =
465-
StructStrategy::new(FlatLayoutStrategy::default(), FlatLayoutStrategy::default());
466+
let strategy = PathStrategy::new(
467+
Arc::new(FlatLayoutStrategy::default()),
468+
Arc::new(FlatLayoutStrategy::default()),
469+
);
466470
let layout = block_on(|handle| {
467471
strategy.write_stream(
468472
ctx,
@@ -495,8 +499,10 @@ mod tests {
495499

496500
let segments = Arc::new(TestSegments::default());
497501
let (ptr, eof) = SequenceId::root().split();
498-
let strategy =
499-
StructStrategy::new(FlatLayoutStrategy::default(), FlatLayoutStrategy::default());
502+
let strategy = PathStrategy::new(
503+
Arc::new(FlatLayoutStrategy::default()),
504+
Arc::new(FlatLayoutStrategy::default()),
505+
);
500506
let layout = block_on(|handle| {
501507
strategy.write_stream(
502508
ctx,
@@ -534,8 +540,10 @@ mod tests {
534540
let ctx = ArrayContext::empty();
535541
let segments = Arc::new(TestSegments::default());
536542
let (ptr, eof) = SequenceId::root().split();
537-
let strategy =
538-
StructStrategy::new(FlatLayoutStrategy::default(), FlatLayoutStrategy::default());
543+
let strategy = PathStrategy::new(
544+
Arc::new(FlatLayoutStrategy::default()),
545+
Arc::new(FlatLayoutStrategy::default()),
546+
);
539547
let layout = block_on(|handle| {
540548
strategy.write_stream(
541549
ctx,

vortex-layout/src/layouts/struct_/writer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#![allow(deprecated, reason = "This module is deprecated")]
5+
46
use std::sync::Arc;
57

68
use async_trait::async_trait;

0 commit comments

Comments
 (0)