Skip to content

Commit 0907599

Browse files
authored
Fix partition count assertion (#1597)
The actual asserted bound is a bit more complex because the last partial partition is merged into multiple existing ones instead of choosing to have one smaller partition.
1 parent 77a292d commit 0907599

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

vortex-datafusion/src/persistent/execution.rs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,23 +168,29 @@ fn repartition_by_size(
168168
curr_partition_size += file.object_meta.size;
169169
curr_partition.push(file);
170170

171-
if curr_partition_size > target_partition_size {
171+
if curr_partition_size >= target_partition_size {
172172
curr_partition_size = 0;
173173
partitions.push(std::mem::take(&mut curr_partition));
174174
}
175175
}
176176

177-
// if there's anything left, we shove it into existing partitions
178-
for (idx, file) in curr_partition.into_iter().enumerate() {
179-
let part_idx = idx % partitions.len();
180-
partitions[part_idx].push(file);
177+
// If we we're still missing the last partition
178+
if !curr_partition.is_empty() && partitions.len() != desired_partitions {
179+
partitions.push(std::mem::take(&mut curr_partition));
180+
// If we already have enough partitions
181+
} else if !curr_partition.is_empty() {
182+
for (idx, file) in curr_partition.into_iter().enumerate() {
183+
let new_part_idx = idx % partitions.len();
184+
partitions[new_part_idx].push(file);
185+
}
181186
}
182187

188+
// Assert that we have the correct number of partitions and that the total number of files is right
183189
assert_eq!(
184190
partitions.len(),
185-
usize::min(total_file_count, desired_partitions),
186-
"The final number of partitions should be smallest between the total number of files and the desired partition count."
191+
usize::min(desired_partitions, total_file_count)
187192
);
193+
assert_eq!(total_file_count, partitions.iter().flatten().count());
188194

189195
partitions
190196
}
@@ -203,8 +209,12 @@ mod tests {
203209
PartitionedFile::new("e", 50),
204210
]];
205211

206-
let output = repartition_by_size(file_groups, 2);
212+
repartition_by_size(file_groups, 2);
213+
214+
let file_groups = vec![(0..100)
215+
.map(|idx| PartitionedFile::new(format!("{idx}"), idx))
216+
.collect()];
207217

208-
assert_eq!(output.len(), 2);
218+
repartition_by_size(file_groups, 16);
209219
}
210220
}

0 commit comments

Comments
 (0)