@@ -2315,24 +2315,56 @@ fn max_loop(list, compare, max) {
2315
2315
/// ```
2316
2316
///
2317
2317
pub fn sample ( from list : List ( a) , up_to n : Int ) -> List ( a) {
2318
- case n <= 0 {
2319
- True -> [ ]
2320
- False -> {
2321
- let # ( reservoir , list ) = split ( list , n )
2322
-
2323
- case length ( reservoir ) < n {
2324
- True -> reservoir
2318
+ let # ( reservoir , rest ) = build_reservoir ( from : list , sized : n )
2319
+
2320
+ case rest {
2321
+ // If we've already taken all the items there were in the list there's no
2322
+ // need to do anything else, we return the entire reservoire.
2323
+ [ ] -> dict . values ( reservoir )
2324
+ _ ->
2325
+ case dict . is_empty ( reservoir ) {
2326
+ // If the reservoire is empty that means we were asking to sample 0 or
2327
+ // less items. That doesn't make much sense, so we just return an empty
2328
+ // list.
2329
+ True -> [ ]
2325
2330
False -> {
2326
- let reservoir =
2327
- reservoir
2328
- |> map2 ( range ( 0 , n - 1 ) , _, fn ( a , b ) { # ( a , b ) } )
2329
- |> dict . from_list
2330
-
2331
2331
let w = float . exponential ( log_random ( ) /. int . to_float ( n ) )
2332
2332
sample_loop ( list , reservoir , n , n , w ) |> dict . values
2333
2333
}
2334
2334
}
2335
- }
2335
+ }
2336
+ }
2337
+
2338
+ /// Builds the initial reservoir used by Algorithm L.
2339
+ /// This is a dictionary with keys ranging from `0` up to `n - 1` where each
2340
+ /// value is the corresponding element at that position in `list`.
2341
+ ///
2342
+ /// This also returns the remaining elements of `list` that didn't end up in
2343
+ /// the reservoir.
2344
+ ///
2345
+ fn build_reservoir ( from list : List ( a) , sized n : Int ) -> # ( Dict ( Int , a) , List ( a) ) {
2346
+ build_reservoir_loop ( list , n , dict . new ( ) )
2347
+ }
2348
+
2349
+ fn build_reservoir_loop (
2350
+ list : List ( a) ,
2351
+ size : Int ,
2352
+ reservoir : Dict ( Int , a) ,
2353
+ ) -> # ( Dict ( Int , a) , List ( a) ) {
2354
+ let reservoir_size = dict . size ( reservoir )
2355
+ case reservoir_size >= size {
2356
+ // The reservoir already has the size we wanted.
2357
+ True -> # ( reservoir , list )
2358
+
2359
+ // Otherwise we add another element from the list to the reservoir
2360
+ False ->
2361
+ case list {
2362
+ [ ] -> # ( reservoir , [ ] )
2363
+ [ first , .. rest ] -> {
2364
+ let reservoir = dict . insert ( reservoir , reservoir_size , first )
2365
+ build_reservoir_loop ( rest , size , reservoir )
2366
+ }
2367
+ }
2336
2368
}
2337
2369
}
2338
2370
0 commit comments