Skip to content

Commit 2ef5615

Browse files
committed
Implements reservoir sampler randomly sampling stream of features, closes #7
1 parent e36412b commit 2ef5615

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

robosat/osm/sampler.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import random
2+
3+
4+
class ReservoirSampler:
5+
'''Randomly samples k items from a stream of unknown n items.
6+
'''
7+
8+
def __init__(self, capacity):
9+
'''Creates an new `ReservoirSampler` instance.
10+
11+
Args:
12+
capacity: the number of items to randomly samples from a stream of unknown size.
13+
'''
14+
15+
self.capacity = capacity
16+
self.reservoir = []
17+
self.pushed = 0
18+
19+
def push(self, v):
20+
'''Adds an item to the reservoir.
21+
22+
Args:
23+
v: the item from the stream to add to the reservoir.
24+
'''
25+
26+
size = len(self.reservoir)
27+
28+
if size < self.capacity:
29+
self.reservoir.append(v)
30+
else:
31+
assert size == self.capacity
32+
assert size <= self.pushed
33+
34+
p = self.capacity / self.pushed
35+
36+
if random.random() < p:
37+
i = random.randint(0, size - 1)
38+
self.reservoir[i] = v
39+
40+
self.pushed += 1
41+
42+
def __len__(self):
43+
'''Returns the number of randomly sampled items.
44+
45+
Returns:
46+
The number of randomly sampled items in the reservoir.
47+
'''
48+
49+
return len(self.reservoir)
50+
51+
def __getitem__(self, k):
52+
'''Returns a randomly sampled item in the reservoir.
53+
54+
Args:
55+
k: the index for the kth item from the reservoir to return.
56+
57+
Returns:
58+
The kth item in the reservoir of randomly sampled items.
59+
'''
60+
61+
return self.reservoir[k]

0 commit comments

Comments
 (0)