@@ -168,23 +168,29 @@ fn repartition_by_size(
168168 curr_partition_size += file. object_meta . size ;
169169 curr_partition. push ( file) ;
170170
171- if curr_partition_size > target_partition_size {
171+ if curr_partition_size >= target_partition_size {
172172 curr_partition_size = 0 ;
173173 partitions. push ( std:: mem:: take ( & mut curr_partition) ) ;
174174 }
175175 }
176176
177- // if there's anything left, we shove it into existing partitions
178- for ( idx, file) in curr_partition. into_iter ( ) . enumerate ( ) {
179- let part_idx = idx % partitions. len ( ) ;
180- partitions[ part_idx] . push ( file) ;
177+ // If we we're still missing the last partition
178+ if !curr_partition. is_empty ( ) && partitions. len ( ) != desired_partitions {
179+ partitions. push ( std:: mem:: take ( & mut curr_partition) ) ;
180+ // If we already have enough partitions
181+ } else if !curr_partition. is_empty ( ) {
182+ for ( idx, file) in curr_partition. into_iter ( ) . enumerate ( ) {
183+ let new_part_idx = idx % partitions. len ( ) ;
184+ partitions[ new_part_idx] . push ( file) ;
185+ }
181186 }
182187
188+ // Assert that we have the correct number of partitions and that the total number of files is right
183189 assert_eq ! (
184190 partitions. len( ) ,
185- usize :: min( total_file_count, desired_partitions) ,
186- "The final number of partitions should be smallest between the total number of files and the desired partition count."
191+ usize :: min( desired_partitions, total_file_count)
187192 ) ;
193+ assert_eq ! ( total_file_count, partitions. iter( ) . flatten( ) . count( ) ) ;
188194
189195 partitions
190196}
@@ -203,8 +209,12 @@ mod tests {
203209 PartitionedFile :: new( "e" , 50 ) ,
204210 ] ] ;
205211
206- let output = repartition_by_size ( file_groups, 2 ) ;
212+ repartition_by_size ( file_groups, 2 ) ;
213+
214+ let file_groups = vec ! [ ( 0 ..100 )
215+ . map( |idx| PartitionedFile :: new( format!( "{idx}" ) , idx) )
216+ . collect( ) ] ;
207217
208- assert_eq ! ( output . len ( ) , 2 ) ;
218+ repartition_by_size ( file_groups , 16 ) ;
209219 }
210220}
0 commit comments