Skip to content

Commit 1134124

Browse files
authored
Merge pull request #14 from JostMigenda/examples_setup
Remove duplicate setup code in examples
2 parents 5f72645 + edfbf2c commit 1134124

File tree

3 files changed

+121
-137
lines changed

3 files changed

+121
-137
lines changed

episodes/optimisation-data-structures-algorithms.md

Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -238,41 +238,35 @@ If you reduce the value of `repeats` it will run faster, how does changing the n
238238
import random
239239
from timeit import timeit
240240

241-
def generateInputs(N = 25000):
242-
random.seed(12) # Ensure every list is the same
243-
return [random.randint(0,int(N/2)) for i in range(N)]
244-
241+
N = 25000 # Number of elements in the list
242+
data = [random.randint(0, int(N/2)) for i in range(N)]
243+
245244
def uniqueSet():
246-
ls_in = generateInputs()
247-
set_out = set(ls_in)
245+
set_out = set(data)
248246

249247
def uniqueSetAdd():
250-
ls_in = generateInputs()
251248
set_out = set()
252-
for i in ls_in:
249+
for i in data:
253250
set_out.add(i)
254251

255252
def uniqueList():
256-
ls_in = generateInputs()
257253
ls_out = []
258-
for i in ls_in:
254+
for i in data:
259255
if not i in ls_out:
260256
ls_out.append(i)
261257

262258
def uniqueListSort():
263-
ls_in = generateInputs()
264-
ls_in.sort()
259+
ls_in = sorted(data)
265260
ls_out = [ls_in[0]]
266261
for i in ls_in:
267262
if ls_out[-1] != i:
268263
ls_out.append(i)
269-
264+
270265
repeats = 1000
271-
gen_time = timeit(generateInputs, number=repeats)
272-
print(f"uniqueSet: {timeit(uniqueSet, number=repeats)-gen_time:.2f}ms")
273-
print(f"uniqueSetAdd: {timeit(uniqueSetAdd, number=repeats)-gen_time:.2f}ms")
274-
print(f"uniqueList: {timeit(uniqueList, number=repeats)-gen_time:.2f}ms")
275-
print(f"uniqueListSort: {timeit(uniqueListSort, number=repeats)-gen_time:.2f}ms")
266+
print(f"uniqueSet: {timeit(uniqueSet, number=repeats):.2f}ms")
267+
print(f"uniqueSetAdd: {timeit(uniqueSetAdd, number=repeats):.2f}ms")
268+
print(f"uniqueList: {timeit(uniqueList, number=repeats):.2f}ms")
269+
print(f"uniqueListSort: {timeit(uniqueListSort, number=repeats):.2f}ms")
276270
```
277271

278272
:::::::::::::::::::::::: hint
@@ -325,51 +319,43 @@ from bisect import bisect_left
325319
N = 25000 # Number of elements in list
326320
M = 2 # N*M == Range over which the elements span
327321

328-
def generateInputs():
329-
random.seed(12) # Ensure every list is the same
330-
st = set([random.randint(0, int(N*M)) for i in range(N)])
331-
ls = list(st)
332-
ls.sort() # Sort required for binary
333-
return st, ls # Return both set and list
322+
st = set([random.randint(0, int(N*M)) for i in range(N)])
323+
ls = list(st)
324+
ls.sort() # Sort required for binary search
334325

335326
def search_set():
336-
st, _ = generateInputs()
337327
j = 0
338328
for i in range(0, int(N*M), M):
339329
if i in st:
340330
j += 1
341331

342332
def linear_search_list():
343-
_, ls = generateInputs()
344333
j = 0
345334
for i in range(0, int(N*M), M):
346335
if i in ls:
347336
j += 1
348337

349338
def binary_search_list():
350-
_, ls = generateInputs()
351339
j = 0
352340
for i in range(0, int(N*M), M):
353341
k = bisect_left(ls, i)
354342
if k != len(ls) and ls[k] == i:
355343
j += 1
356344

357-
358345
repeats = 1000
359-
gen_time = timeit(generateInputs, number=repeats)
360-
print(f"search_set: {timeit(search_set, number=repeats)-gen_time:.2f}ms")
361-
print(f"linear_search_list: {timeit(linear_search_list, number=repeats)-gen_time:.2f}ms")
362-
print(f"binary_search_list: {timeit(binary_search_list, number=repeats)-gen_time:.2f}ms")
346+
print(f"search_set: {timeit(search_set, number=repeats):.2f}ms")
347+
print(f"linear_search_list: {timeit(linear_search_list, number=repeats):.2f}ms")
348+
print(f"binary_search_list: {timeit(binary_search_list, number=repeats):.2f}ms")
363349
```
364350

365-
Searching the set is fastest performing 25,000 searches in 0.04ms.
366-
This is followed by the binary search of the (sorted) list which is 145x slower, although the list has been filtered for duplicates. A list still containing duplicates would be longer, leading to a more expensive search.
367-
The linear search of the list is more than 56,600x slower than the fastest, it really shouldn't be used!
351+
Searching the set is fastest performing 25,000 searches in 0.57ms.
352+
This is followed by the binary search of the (sorted) list which is 6x slower, although the list has been filtered for duplicates. A list still containing duplicates would be longer, leading to a more expensive search.
353+
The linear search of the list is about 2700x slower than the fastest, it really shouldn't be used!
368354

369355
```output
370-
search_set: 0.04ms
371-
linear_search_list: 2264.91ms
372-
binary_search_list: 5.79ms
356+
search_set: 0.57ms
357+
linear_search_list: 1531.61ms
358+
binary_search_list: 3.43ms
373359
```
374360

375361
These results are subject to change based on the number of items and the proportion of searched items that exist within the list. However, the pattern is likely to remain the same. Linear searches should be avoided!

episodes/optimisation-using-python.md

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,13 @@ The function `manualSearch()` manually iterates through the list (`ls`) and chec
111111

112112
```python
113113
import random
114+
from timeit import timeit
114115

115116
N = 2500 # Number of elements in list
116117
M = 2 # N*M == Range over which the elements span
117-
118-
def generateInputs():
119-
random.seed(12) # Ensure every list is the same
120-
return [random.randint(0, int(N*M)) for i in range(N)]
118+
ls = [random.randint(0, int(N*M)) for i in range(N)]
121119

122120
def manualSearch():
123-
ls = generateInputs()
124121
ct = 0
125122
for i in range(0, int(N*M), M):
126123
for j in range(0, len(ls)):
@@ -129,16 +126,14 @@ def manualSearch():
129126
break
130127

131128
def operatorSearch():
132-
ls = generateInputs()
133129
ct = 0
134130
for i in range(0, int(N*M), M):
135131
if i in ls:
136132
ct += 1
137133

138134
repeats = 1000
139-
gen_time = timeit(generateInputs, number=repeats)
140-
print(f"manualSearch: {timeit(manualSearch, number=repeats)-gen_time:.2f}ms")
141-
print(f"operatorSearch: {timeit(operatorSearch, number=repeats)-gen_time:.2f}ms")
135+
print(f"manualSearch: {timeit(manualSearch, number=repeats):.2f}ms")
136+
print(f"operatorSearch: {timeit(operatorSearch, number=repeats):.2f}ms")
142137
```
143138

144139
This results in the manual Python implementation being 5x slower, doing the exact same operation!

learners/technical-appendix.md

Lines changed: 93 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,18 @@ The topics covered here exceed the level of knowledge required to benefit from t
1414

1515
You can use `dis` to view the bytecode generated by Python. The amount of bytecode more strongly correlates with how much code is being executed by the Python interpreter and hence how long it may take to execute. However, this is a crude proxy as it does not account for functions that are called and whether those functions are implemented using Python or C.
1616

17-
The pure Python search compiles to 82 lines of bytecode.
17+
The pure Python search compiles to 51 lines of bytecode. (Note that different versions of Python may produce slightly different bytecode than shown here.)
1818

1919
```python
2020
import dis
21+
import random
22+
23+
# Generate sample data
24+
N = 2500
25+
M = 2
26+
ls = [random.randint(0, int(N*M)) for i in range(N)]
2127

2228
def manualSearch():
23-
ls = generateInputs()
2429
ct = 0
2530
for i in range(0, int(N*M), M):
2631
for j in range(0, len(ls)):
@@ -31,65 +36,63 @@ def manualSearch():
3136
dis.dis(manualSearch)
3237
```
3338
```output
34-
11 0 LOAD_GLOBAL 0 (generateInputs)
35-
2 CALL_FUNCTION 0
36-
4 STORE_FAST 0 (ls)
37-
38-
12 6 LOAD_CONST 1 (0)
39-
8 STORE_FAST 1 (ct)
40-
41-
13 10 LOAD_GLOBAL 1 (range)
42-
12 LOAD_CONST 1 (0)
43-
14 LOAD_GLOBAL 2 (int)
44-
16 LOAD_GLOBAL 3 (N)
45-
18 LOAD_GLOBAL 4 (M)
46-
20 BINARY_MULTIPLY
47-
22 CALL_FUNCTION 1
48-
24 LOAD_GLOBAL 4 (M)
49-
26 CALL_FUNCTION 3
50-
28 GET_ITER
51-
>> 30 FOR_ITER 24 (to 80)
52-
32 STORE_FAST 2 (i)
53-
54-
14 34 LOAD_GLOBAL 1 (range)
55-
36 LOAD_CONST 1 (0)
56-
38 LOAD_GLOBAL 5 (len)
57-
40 LOAD_FAST 0 (ls)
58-
42 CALL_FUNCTION 1
59-
44 CALL_FUNCTION 2
60-
46 GET_ITER
61-
>> 48 FOR_ITER 14 (to 78)
62-
50 STORE_FAST 3 (j)
63-
64-
15 52 LOAD_FAST 0 (ls)
65-
54 LOAD_FAST 3 (j)
66-
56 BINARY_SUBSCR
67-
58 LOAD_FAST 2 (i)
68-
60 COMPARE_OP 2 (==)
69-
62 POP_JUMP_IF_FALSE 38 (to 76)
70-
71-
16 64 LOAD_FAST 1 (ct)
72-
66 LOAD_CONST 2 (1)
73-
68 INPLACE_ADD
74-
70 STORE_FAST 1 (ct)
75-
76-
17 72 POP_TOP
77-
74 JUMP_FORWARD 1 (to 78)
78-
79-
15 >> 76 JUMP_ABSOLUTE 24 (to 48)
80-
>> 78 JUMP_ABSOLUTE 15 (to 30)
81-
82-
13 >> 80 LOAD_CONST 0 (None)
83-
82 RETURN_VALUE
39+
9 RESUME 0
40+
41+
10 LOAD_CONST 1 (0)
42+
STORE_FAST 0 (ct)
43+
44+
11 LOAD_GLOBAL 1 (range + NULL)
45+
LOAD_CONST 1 (0)
46+
LOAD_GLOBAL 3 (int + NULL)
47+
LOAD_GLOBAL 4 (N)
48+
LOAD_GLOBAL 6 (M)
49+
BINARY_OP 5 (*)
50+
CALL 1
51+
LOAD_GLOBAL 6 (M)
52+
CALL 3
53+
GET_ITER
54+
L1: FOR_ITER 56 (to L5)
55+
STORE_FAST 1 (i)
56+
57+
12 LOAD_GLOBAL 1 (range + NULL)
58+
LOAD_CONST 1 (0)
59+
LOAD_GLOBAL 9 (len + NULL)
60+
LOAD_GLOBAL 10 (ls)
61+
CALL 1
62+
CALL 2
63+
GET_ITER
64+
L2: FOR_ITER 24 (to L4)
65+
STORE_FAST 2 (j)
66+
67+
13 LOAD_GLOBAL 10 (ls)
68+
LOAD_FAST 2 (j)
69+
BINARY_SUBSCR
70+
LOAD_FAST 1 (i)
71+
COMPARE_OP 88 (bool(==))
72+
POP_JUMP_IF_TRUE 2 (to L3)
73+
JUMP_BACKWARD 18 (to L2)
74+
75+
14 L3: LOAD_FAST 0 (ct)
76+
LOAD_CONST 2 (1)
77+
BINARY_OP 13 (+=)
78+
STORE_FAST 0 (ct)
79+
80+
15 POP_TOP
81+
JUMP_BACKWARD 54 (to L1)
82+
83+
12 L4: END_FOR
84+
POP_TOP
85+
JUMP_BACKWARD 58 (to L1)
86+
87+
11 L5: END_FOR
88+
POP_TOP
89+
RETURN_CONST 0 (None)
8490
```
8591

86-
Whereas the `in` variant only compiles to 54.
92+
Whereas the `in` variant only compiles to 33.
8793

8894
```python
89-
import dis
90-
9195
def operatorSearch():
92-
ls = generateInputs()
9396
ct = 0
9497
for i in range(0, int(N*M), M):
9598
if i in ls:
@@ -98,43 +101,43 @@ def operatorSearch():
98101
dis.dis(operatorSearch)
99102
```
100103
```output
101-
4 0 LOAD_GLOBAL 0 (generateInputs)
102-
2 CALL_FUNCTION 0
103-
4 STORE_FAST 0 (ls)
104-
105-
5 6 LOAD_CONST 1 (0)
106-
8 STORE_FAST 1 (ct)
107-
108-
6 10 LOAD_GLOBAL 1 (range)
109-
12 LOAD_CONST 1 (0)
110-
14 LOAD_GLOBAL 2 (int)
111-
16 LOAD_GLOBAL 3 (N)
112-
18 LOAD_GLOBAL 4 (M)
113-
20 BINARY_MULTIPLY
114-
22 CALL_FUNCTION 1
115-
24 LOAD_GLOBAL 4 (M)
116-
26 CALL_FUNCTION 3
117-
28 GET_ITER
118-
>> 30 FOR_ITER 10 (to 52)
119-
32 STORE_FAST 2 (i)
120-
121-
7 34 LOAD_FAST 2 (i)
122-
36 LOAD_FAST 0 (ls)
123-
38 CONTAINS_OP 0
124-
40 POP_JUMP_IF_FALSE 25 (to 50)
125-
126-
8 42 LOAD_FAST 1 (ct)
127-
44 LOAD_CONST 2 (1)
128-
46 INPLACE_ADD
129-
48 STORE_FAST 1 (ct)
130-
>> 50 JUMP_ABSOLUTE 15 (to 30)
131-
132-
6 >> 52 LOAD_CONST 0 (None)
133-
54 RETURN_VALUE
104+
1 RESUME 0
105+
106+
2 LOAD_CONST 1 (0)
107+
STORE_FAST 0 (ct)
108+
109+
3 LOAD_GLOBAL 1 (range + NULL)
110+
LOAD_CONST 1 (0)
111+
LOAD_GLOBAL 3 (int + NULL)
112+
LOAD_GLOBAL 4 (N)
113+
LOAD_GLOBAL 6 (M)
114+
BINARY_OP 5 (*)
115+
CALL 1
116+
LOAD_GLOBAL 6 (M)
117+
CALL 3
118+
GET_ITER
119+
L1: FOR_ITER 20 (to L3)
120+
STORE_FAST 1 (i)
121+
122+
4 LOAD_FAST 1 (i)
123+
LOAD_GLOBAL 8 (ls)
124+
CONTAINS_OP 0
125+
POP_JUMP_IF_TRUE 2 (to L2)
126+
JUMP_BACKWARD 15 (to L1)
127+
128+
5 L2: LOAD_FAST 0 (ct)
129+
LOAD_CONST 2 (1)
130+
BINARY_OP 13 (+=)
131+
STORE_FAST 0 (ct)
132+
JUMP_BACKWARD 22 (to L1)
133+
134+
3 L3: END_FOR
135+
POP_TOP
136+
RETURN_CONST 0 (None)
134137
```
135138

136139
A naive assessment of how expensive two functions are can be carried out with this comparison.
137-
However this method of displaying bytecode only shows bytecode for the requested function, so it is not clear how expensive called function's will be or higher level changes to an algorithm which could reduce the number of iterations or similar.
140+
However this method of displaying bytecode only shows bytecode for the requested function, so it is not clear how expensive called functions will be or higher level changes to an algorithm which could reduce the number of iterations or similar.
138141

139142
## Hardware Level Memory Accesses
140143

0 commit comments

Comments
 (0)