File tree Expand file tree Collapse file tree 1 file changed +61
-0
lines changed
Expand file tree Collapse file tree 1 file changed +61
-0
lines changed Original file line number Diff line number Diff line change 1+ import random
2+
3+
4+ class ReservoirSampler :
5+ '''Randomly samples k items from a stream of unknown n items.
6+ '''
7+
8+ def __init__ (self , capacity ):
9+ '''Creates an new `ReservoirSampler` instance.
10+
11+ Args:
12+ capacity: the number of items to randomly samples from a stream of unknown size.
13+ '''
14+
15+ self .capacity = capacity
16+ self .reservoir = []
17+ self .pushed = 0
18+
19+ def push (self , v ):
20+ '''Adds an item to the reservoir.
21+
22+ Args:
23+ v: the item from the stream to add to the reservoir.
24+ '''
25+
26+ size = len (self .reservoir )
27+
28+ if size < self .capacity :
29+ self .reservoir .append (v )
30+ else :
31+ assert size == self .capacity
32+ assert size <= self .pushed
33+
34+ p = self .capacity / self .pushed
35+
36+ if random .random () < p :
37+ i = random .randint (0 , size - 1 )
38+ self .reservoir [i ] = v
39+
40+ self .pushed += 1
41+
42+ def __len__ (self ):
43+ '''Returns the number of randomly sampled items.
44+
45+ Returns:
46+ The number of randomly sampled items in the reservoir.
47+ '''
48+
49+ return len (self .reservoir )
50+
51+ def __getitem__ (self , k ):
52+ '''Returns a randomly sampled item in the reservoir.
53+
54+ Args:
55+ k: the index for the kth item from the reservoir to return.
56+
57+ Returns:
58+ The kth item in the reservoir of randomly sampled items.
59+ '''
60+
61+ return self .reservoir [k ]
You can’t perform that action at this time.
0 commit comments