Skip to content

Commit c98404b

Browse files
giacomocavalierilpil
authored andcommitted
make reservoire building more efficient
1 parent 65fd256 commit c98404b

File tree

1 file changed

+45
-13
lines changed

1 file changed

+45
-13
lines changed

src/gleam/list.gleam

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2315,24 +2315,56 @@ fn max_loop(list, compare, max) {
23152315
/// ```
23162316
///
23172317
pub fn sample(from list: List(a), up_to n: Int) -> List(a) {
2318-
case n <= 0 {
2319-
True -> []
2320-
False -> {
2321-
let #(reservoir, list) = split(list, n)
2322-
2323-
case length(reservoir) < n {
2324-
True -> reservoir
2318+
let #(reservoir, rest) = build_reservoir(from: list, sized: n)
2319+
2320+
case rest {
2321+
// If we've already taken all the items there were in the list there's no
2322+
// need to do anything else, we return the entire reservoire.
2323+
[] -> dict.values(reservoir)
2324+
_ ->
2325+
case dict.is_empty(reservoir) {
2326+
// If the reservoire is empty that means we were asking to sample 0 or
2327+
// less items. That doesn't make much sense, so we just return an empty
2328+
// list.
2329+
True -> []
23252330
False -> {
2326-
let reservoir =
2327-
reservoir
2328-
|> map2(range(0, n - 1), _, fn(a, b) { #(a, b) })
2329-
|> dict.from_list
2330-
23312331
let w = float.exponential(log_random() /. int.to_float(n))
23322332
sample_loop(list, reservoir, n, n, w) |> dict.values
23332333
}
23342334
}
2335-
}
2335+
}
2336+
}
2337+
2338+
/// Builds the initial reservoir used by Algorithm L.
2339+
/// This is a dictionary with keys ranging from `0` up to `n - 1` where each
2340+
/// value is the corresponding element at that position in `list`.
2341+
///
2342+
/// This also returns the remaining elements of `list` that didn't end up in
2343+
/// the reservoir.
2344+
///
2345+
fn build_reservoir(from list: List(a), sized n: Int) -> #(Dict(Int, a), List(a)) {
2346+
build_reservoir_loop(list, n, dict.new())
2347+
}
2348+
2349+
fn build_reservoir_loop(
2350+
list: List(a),
2351+
size: Int,
2352+
reservoir: Dict(Int, a),
2353+
) -> #(Dict(Int, a), List(a)) {
2354+
let reservoir_size = dict.size(reservoir)
2355+
case reservoir_size >= size {
2356+
// The reservoir already has the size we wanted.
2357+
True -> #(reservoir, list)
2358+
2359+
// Otherwise we add another element from the list to the reservoir
2360+
False ->
2361+
case list {
2362+
[] -> #(reservoir, [])
2363+
[first, ..rest] -> {
2364+
let reservoir = dict.insert(reservoir, reservoir_size, first)
2365+
build_reservoir_loop(rest, size, reservoir)
2366+
}
2367+
}
23362368
}
23372369
}
23382370

0 commit comments

Comments
 (0)