Skip to content

Commit aa2360d

Browse files
authored
Can't use symlinks on Windows -- replace with real files (#159)
The regex_compile benchmark uses two symlinks, but those don't work reliably on Windows. Replace them with real files. See faster-cpython/ideas#333
1 parent 098ffc9 commit aa2360d

File tree

2 files changed

+1965
-2
lines changed

2 files changed

+1965
-2
lines changed

pyperformance/data-files/benchmarks/bm_regex_compile/bm_regex_effbot.py

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
2+
"""Benchmarks for Python's regex engine.
3+
4+
These are some of the original benchmarks used to tune Python's regex engine
5+
in 2000 written by Fredrik Lundh. Retreived from
6+
http://mail.python.org/pipermail/python-dev/2000-August/007797.html and
7+
integrated into Unladen Swallow's pyperf.py in 2009 by David Laing.
8+
9+
These benchmarks are of interest since they helped to guide the original
10+
optimization of the sre engine, and we shouldn't necessarily ignore them just
11+
because they're "old".
12+
"""
13+
14+
# Python imports
15+
import re
16+
17+
# Local imports
18+
import pyperf
19+
20+
USE_BYTES = False
21+
22+
23+
def re_compile(s):
24+
if USE_BYTES:
25+
return re.compile(s.encode('latin1'))
26+
else:
27+
return re.compile(s)
28+
29+
# These are the regular expressions to be tested. These sync up,
30+
# index-for-index with the list of strings generated by gen_string_table()
31+
# below.
32+
33+
34+
def gen_regex_table():
35+
return [
36+
re_compile('Python|Perl'),
37+
re_compile('Python|Perl'),
38+
re_compile('(Python|Perl)'),
39+
re_compile('(?:Python|Perl)'),
40+
re_compile('Python'),
41+
re_compile('Python'),
42+
re_compile('.*Python'),
43+
re_compile('.*Python.*'),
44+
re_compile('.*(Python)'),
45+
re_compile('.*(?:Python)'),
46+
re_compile('Python|Perl|Tcl'),
47+
re_compile('Python|Perl|Tcl'),
48+
re_compile('(Python|Perl|Tcl)'),
49+
re_compile('(?:Python|Perl|Tcl)'),
50+
re_compile('(Python)\\1'),
51+
re_compile('(Python)\\1'),
52+
re_compile('([0a-z][a-z0-9]*,)+'),
53+
re_compile('(?:[0a-z][a-z0-9]*,)+'),
54+
re_compile('([a-z][a-z0-9]*,)+'),
55+
re_compile('(?:[a-z][a-z0-9]*,)+'),
56+
re_compile('.*P.*y.*t.*h.*o.*n.*')]
57+
58+
59+
def gen_string_table(n):
60+
"""Generates the list of strings that will be used in the benchmarks.
61+
62+
All strings have repeated prefixes and suffices, and n specifies the
63+
number of repetitions.
64+
"""
65+
strings = []
66+
67+
def append(s):
68+
if USE_BYTES:
69+
strings.append(s.encode('latin1'))
70+
else:
71+
strings.append(s)
72+
append('-' * n + 'Perl' + '-' * n)
73+
append('P' * n + 'Perl' + 'P' * n)
74+
append('-' * n + 'Perl' + '-' * n)
75+
append('-' * n + 'Perl' + '-' * n)
76+
append('-' * n + 'Python' + '-' * n)
77+
append('P' * n + 'Python' + 'P' * n)
78+
append('-' * n + 'Python' + '-' * n)
79+
append('-' * n + 'Python' + '-' * n)
80+
append('-' * n + 'Python' + '-' * n)
81+
append('-' * n + 'Python' + '-' * n)
82+
append('-' * n + 'Perl' + '-' * n)
83+
append('P' * n + 'Perl' + 'P' * n)
84+
append('-' * n + 'Perl' + '-' * n)
85+
append('-' * n + 'Perl' + '-' * n)
86+
append('-' * n + 'PythonPython' + '-' * n)
87+
append('P' * n + 'PythonPython' + 'P' * n)
88+
append('-' * n + 'a5,b7,c9,' + '-' * n)
89+
append('-' * n + 'a5,b7,c9,' + '-' * n)
90+
append('-' * n + 'a5,b7,c9,' + '-' * n)
91+
append('-' * n + 'a5,b7,c9,' + '-' * n)
92+
append('-' * n + 'Python' + '-' * n)
93+
return strings
94+
95+
96+
def init_benchmarks(n_values=None):
97+
"""Initialize the strings we'll run the regexes against.
98+
99+
The strings used in the benchmark are prefixed and suffixed by
100+
strings that are repeated n times.
101+
102+
The sequence n_values contains the values for n.
103+
If n_values is None the values of n from the original benchmark
104+
are used.
105+
106+
The generated list of strings is cached in the string_tables
107+
variable, which is indexed by n.
108+
109+
Returns:
110+
A list of string prefix/suffix lengths.
111+
"""
112+
113+
if n_values is None:
114+
n_values = (0, 5, 50, 250, 1000, 5000, 10000)
115+
116+
string_tables = {n: gen_string_table(n) for n in n_values}
117+
regexs = gen_regex_table()
118+
119+
data = []
120+
for n in n_values:
121+
for id in range(len(regexs)):
122+
regex = regexs[id]
123+
string = string_tables[n][id]
124+
data.append((regex, string))
125+
return data
126+
127+
128+
def bench_regex_effbot(loops):
129+
if bench_regex_effbot.data is None:
130+
bench_regex_effbot.data = init_benchmarks()
131+
data = bench_regex_effbot.data
132+
133+
range_it = range(loops)
134+
search = re.search
135+
t0 = pyperf.perf_counter()
136+
137+
for _ in range_it:
138+
# Runs all of the benchmarks for a given value of n.
139+
for regex, string in data:
140+
# search 10 times
141+
search(regex, string)
142+
search(regex, string)
143+
search(regex, string)
144+
search(regex, string)
145+
search(regex, string)
146+
search(regex, string)
147+
search(regex, string)
148+
search(regex, string)
149+
search(regex, string)
150+
search(regex, string)
151+
152+
return pyperf.perf_counter() - t0
153+
154+
155+
# cached data, generated at the first call
156+
bench_regex_effbot.data = None
157+
158+
159+
def add_cmdline_args(cmd, args):
160+
if args.force_bytes:
161+
cmd.append("--force_bytes")
162+
163+
164+
if __name__ == '__main__':
165+
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
166+
runner.metadata['description'] = ("Test the performance of regexps "
167+
"using Fredik Lundh's benchmarks.")
168+
runner.argparser.add_argument("-B", "--force_bytes", action="store_true",
169+
help="test bytes regexps")
170+
options = runner.parse_args()
171+
if options.force_bytes:
172+
USE_BYTES = True
173+
174+
runner.bench_time_func('regex_effbot', bench_regex_effbot,
175+
inner_loops=10)

pyperformance/data-files/benchmarks/bm_regex_compile/bm_regex_v8.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)