Skip to content

Commit 1129ec1

Browse files
authored
Merge branch 'master' into bugfix/attribut_data_not_needed_anymore
2 parents 27ad052 + 8e41a3d commit 1129ec1

File tree

7 files changed

+339
-0
lines changed

7 files changed

+339
-0
lines changed

binary-search/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# How to Do a Binary Search in Python?
2+
3+
Code snippets supplementing the [How to Do a Binary Search in Python?](https://realpython.com/binary-search-python/) article on [Real Python](https://realpython.com/).

binary-search/benchmark.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
Benchmark the performance of a search algorithm.
3+
4+
Requirements:
5+
* Python 3.7+
6+
7+
Usage:
8+
$ python benchmark.py -a random -f names.txt 'Arnold Schwarzenegger'
9+
$ python benchmark.py -a linear -f names.txt 'Arnold Schwarzenegger'
10+
$ python benchmark.py -a binary -f sorted_names.txt 'Arnold Schwarzenegger'
11+
"""
12+
13+
import argparse
14+
import time
15+
from statistics import median
16+
from typing import List
17+
18+
from search.binary import find_index as binary_search
19+
from search.random import find_index as random_search
20+
from search.linear import find_index as linear_search
21+
22+
23+
def main(args: argparse.Namespace) -> None:
24+
"""Script entry point."""
25+
26+
algorithms = {
27+
"random": random_search,
28+
"linear": linear_search,
29+
"binary": binary_search,
30+
}
31+
32+
benchmark(
33+
algorithms[args.algorithm], load_names(args.path), args.search_term
34+
)
35+
36+
37+
def parse_args() -> argparse.Namespace:
38+
"""Parse command line arguments."""
39+
parser = argparse.ArgumentParser()
40+
parser.add_argument(
41+
"-a", "--algorithm", choices=("random", "linear", "binary")
42+
)
43+
parser.add_argument("-f", "--file", dest="path")
44+
parser.add_argument("search_term")
45+
return parser.parse_args()
46+
47+
48+
def load_names(path: str) -> List[str]:
49+
"""Return a list of names from the given file."""
50+
print("Loading names...", end="", flush=True)
51+
with open(path) as text_file:
52+
names = text_file.read().splitlines()
53+
print("ok")
54+
return names
55+
56+
57+
def convert(nano: int) -> str:
58+
"""Convert nano seconds to a formatted string."""
59+
60+
kilo, mega, giga = 1e3, 1e6, 1e9
61+
62+
if nano < kilo:
63+
return f"{nano} ns"
64+
65+
if nano < mega:
66+
return f"{nano / kilo:.2f} µs"
67+
68+
if nano < giga:
69+
return f"{nano / mega:.2f} ms"
70+
71+
return f"{nano / giga:.2f} s"
72+
73+
74+
def benchmark(
75+
algorithm, elements: List[str], value: str, repeat: int = 10
76+
) -> None:
77+
"""Search for a value in elements using the given algorithm."""
78+
79+
times: List[int] = []
80+
for i in range(repeat):
81+
print(f"[{i + 1}/{repeat}] Searching...", end="", flush=True)
82+
start_time = time.perf_counter_ns()
83+
index = algorithm(elements, value)
84+
elapsed_time = time.perf_counter_ns() - start_time
85+
times.append(elapsed_time)
86+
print("\b" * 12, end="")
87+
if index is None:
88+
print(f"Not found ({convert(elapsed_time)})")
89+
else:
90+
print(f"Found at index={index} ({convert(elapsed_time)})")
91+
92+
print(
93+
f"best={convert(min(times))}",
94+
f"worst={convert(max(times))}",
95+
f"avg={convert(int(sum(times) / len(times)))}",
96+
f"median={convert(int(median(times)))}",
97+
sep=", ",
98+
)
99+
100+
101+
if __name__ == "__main__":
102+
try:
103+
main(parse_args())
104+
except KeyboardInterrupt:
105+
print("Aborted")

binary-search/download_imdb.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
Fetch and parse people names from the IMDb.
5+
6+
Usage:
7+
$ python download_imdb.py
8+
"""
9+
10+
import csv
11+
import gzip
12+
import shutil
13+
import tempfile
14+
import urllib.request
15+
16+
17+
def main():
18+
"""Script entry point."""
19+
20+
print("Fetching data from IMDb...")
21+
22+
with open("names.txt", "w", encoding="utf-8") as destination:
23+
destination.writelines(names())
24+
25+
with open("names.txt", encoding="utf-8") as source, open(
26+
"sorted_names.txt", "w", encoding="utf-8"
27+
) as destination:
28+
destination.writelines(sorted(source.readlines()))
29+
30+
print('Created "names.txt" and "sorted_names.txt"')
31+
32+
33+
def names():
34+
"""Return a generator of names with a trailing newline."""
35+
url = "https://datasets.imdbws.com/name.basics.tsv.gz"
36+
with urllib.request.urlopen(url) as response:
37+
with tempfile.NamedTemporaryFile(mode="w+b") as archive:
38+
shutil.copyfileobj(response, archive)
39+
archive.seek(0)
40+
with gzip.open(archive, mode="rt", encoding="utf-8") as tsv_file:
41+
tsv = csv.reader(tsv_file, delimiter="\t")
42+
next(tsv) # Skip the header
43+
for record in tsv:
44+
full_name = record[1]
45+
yield f"{full_name}\n"
46+
47+
48+
if __name__ == "__main__":
49+
try:
50+
main()
51+
except KeyboardInterrupt:
52+
print("Aborted")

binary-search/search/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Callable, TypeVar, Union
2+
3+
T = TypeVar("T")
4+
S = TypeVar("S")
5+
6+
Key = Callable[[T], Union[T, S]]
7+
8+
9+
def identity(element: T) -> Union[T, S]:
10+
"""Identity function serving as a default key provider."""
11+
return element

binary-search/search/binary.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
The binary search algorithm.
3+
"""
4+
5+
from typing import Optional, Set, Sequence
6+
7+
from search import T, S, Key, identity
8+
9+
10+
def find_index(
11+
elements: Sequence[T], value: S, key: Key = identity
12+
) -> Optional[int]:
13+
"""Return the index of value in elements or None."""
14+
15+
left, right = 0, len(elements) - 1
16+
17+
while left <= right:
18+
middle = (left + right) // 2
19+
20+
middle_element = key(elements[middle])
21+
22+
if middle_element == value:
23+
return middle
24+
25+
if middle_element < value:
26+
left = middle + 1
27+
elif middle_element > value:
28+
right = middle - 1
29+
30+
return None
31+
32+
33+
def find_leftmost_index(
34+
elements: Sequence[T], value: S, key: Key = identity
35+
) -> Optional[int]:
36+
"""Return the leftmost index of value in elements or None."""
37+
38+
index = find_index(elements, value, key)
39+
40+
if index is not None:
41+
while index >= 0 and key(elements[index]) == value:
42+
index -= 1
43+
index += 1
44+
45+
return index
46+
47+
48+
def find_rightmost_index(
49+
elements: Sequence[T], value: S, key: Key = identity
50+
) -> Optional[int]:
51+
"""Return the rightmost index of value in elements or None."""
52+
53+
index = find_index(elements, value, key)
54+
55+
if index is not None:
56+
while index < len(elements) and key(elements[index]) == value:
57+
index += 1
58+
index -= 1
59+
60+
return index
61+
62+
63+
def find_all_indices(
64+
elements: Sequence[T], value: S, key: Key = identity
65+
) -> Set[int]:
66+
"""Return a set of indices of elements with matching key."""
67+
68+
left = find_leftmost_index(elements, value, key)
69+
right = find_rightmost_index(elements, value, key)
70+
71+
if left and right:
72+
return set(range(left, right + 1))
73+
74+
return set()
75+
76+
77+
def find(elements: Sequence[T], value: S, key: Key = identity) -> Optional[T]:
78+
"""Return an element with matching key or None."""
79+
return _get(elements, find_index(elements, value, key))
80+
81+
82+
def find_leftmost(
83+
elements: Sequence[T], value: S, key: Key = identity
84+
) -> Optional[T]:
85+
"""Return the leftmost element or None."""
86+
return _get(elements, find_leftmost_index(elements, value, key))
87+
88+
89+
def find_rightmost(
90+
elements: Sequence[T], value: S, key: Key = identity
91+
) -> Optional[T]:
92+
"""Return the rightmost element or None."""
93+
return _get(elements, find_rightmost_index(elements, value, key))
94+
95+
96+
def find_all(elements: Sequence[T], value: S, key: Key = identity) -> Set[T]:
97+
"""Return a set of elements with matching key."""
98+
return {elements[i] for i in find_all_indices(elements, value, key)}
99+
100+
101+
def contains(elements: Sequence[T], value: S, key: Key = identity) -> bool:
102+
"""Return True if value is present in elements."""
103+
return find_index(elements, value, key) is not None
104+
105+
106+
def _get(elements: Sequence[T], index: Optional[int]) -> Optional[T]:
107+
"""Return element at the given index or None."""
108+
return None if index is None else elements[index]

binary-search/search/linear.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""
2+
The linear search algorithm.
3+
"""
4+
5+
from typing import Optional, Sequence
6+
7+
from search import T, S, Key, identity
8+
9+
10+
def find_index(
11+
elements: Sequence[T], value: S, key: Key = identity
12+
) -> Optional[int]:
13+
"""Return the index of value in elements or None."""
14+
for i, element in enumerate(elements):
15+
if key(element) == value:
16+
return i
17+
return None
18+
19+
20+
def find(elements: Sequence[T], value: S, key: Key = identity) -> Optional[T]:
21+
"""Return an element with matching key or None."""
22+
index = find_index(elements, value, key)
23+
return None if index is None else elements[index]
24+
25+
26+
def contains(elements: Sequence[T], value: S, key: Key = identity) -> bool:
27+
"""Return True if value is present in elements."""
28+
return find_index(elements, value, key) is not None

binary-search/search/random.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
The random search algorithm.
3+
"""
4+
5+
import random
6+
from typing import Optional, Set, Sequence
7+
8+
from search import T, S, Key, identity
9+
10+
11+
def find_index(
12+
elements: Sequence[T], value: S, key: Key = identity
13+
) -> Optional[int]:
14+
"""Return the index of value in elements or None."""
15+
visited: Set[int] = set()
16+
while len(visited) < len(elements):
17+
random_index = random.randint(0, len(elements) - 1)
18+
visited.add(random_index)
19+
if key(elements[random_index]) == value:
20+
return random_index
21+
return None
22+
23+
24+
def find(elements: Sequence[T], value: S, key: Key = identity) -> Optional[T]:
25+
"""Return an element with matching key or None."""
26+
index = find_index(elements, value, key)
27+
return None if index is None else elements[index]
28+
29+
30+
def contains(elements: Sequence[T], value: S, key: Key = identity) -> bool:
31+
"""Return True if value is present in elements."""
32+
return find_index(elements, value, key) is not None

0 commit comments

Comments
 (0)