Skip to content

Commit 6fab294

Browse files
committed
Add python/cython benchmark for the lark parser
1 parent d2a245e commit 6fab294

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""
2+
Based on:
3+
https://github.com/Erotemic/misc/blob/main/tests/python/bench_template.py
4+
5+
Requirements:
6+
pip install ubelt timerit pandas numpy seaborn matplotlib
7+
"""
8+
9+
10+
def random_lark_grammar(size):
11+
"""
12+
TODO: could likely be more sophisticated with how we *generate* random
13+
text. (Almost as if that's what CFGs do!).
14+
"""
15+
lines = [
16+
'start: final',
17+
'simple_rule_0 : CNAME'
18+
]
19+
idx = 0
20+
for idx in range(1, size):
21+
lines.append(f'simple_rule_{idx} : "(" simple_rule_{idx - 1} ")"')
22+
23+
lines.append(f'final : simple_rule_{idx} "."')
24+
lines.append('%import common.CNAME')
25+
text = '\n'.join(lines)
26+
return text
27+
28+
29+
def _autompl_lite():
30+
"""
31+
A minimal port of :func:`kwplot.autompl`
32+
33+
References:
34+
https://gitlab.kitware.com/computer-vision/kwplot/-/blob/main/kwplot/auto_backends.py#L98
35+
"""
36+
import ubelt as ub
37+
import matplotlib as mpl
38+
interactive = False
39+
if ub.modname_to_modpath('PyQt5'):
40+
# Try to use PyQt Backend
41+
mpl.use('Qt5Agg')
42+
try:
43+
__IPYTHON__
44+
except NameError:
45+
pass
46+
else:
47+
import IPython
48+
ipython = IPython.get_ipython()
49+
ipython.magic('pylab qt5 --no-import-all')
50+
interactive = True
51+
return interactive
52+
53+
54+
def benchmark():
55+
import ubelt as ub
56+
import pandas as pd
57+
import timerit
58+
import numpy as np
59+
import lark
60+
import lark_cython
61+
62+
grammar_fpath = ub.Path(lark.__file__).parent / 'grammars/lark.lark'
63+
grammar_text = grammar_fpath.read_text()
64+
65+
cython_parser = lark.Lark(grammar_text, start='start', parser='lalr', _plugins=lark_cython.plugins)
66+
python_parser = lark.Lark(grammar_text, start='start', parser='lalr')
67+
68+
def parse_cython(text):
69+
cython_parser.parse(text)
70+
71+
def parse_python(text):
72+
python_parser.parse(text)
73+
74+
method_lut = locals() # can populate this some other way
75+
76+
# Change params here to modify number of trials
77+
ti = timerit.Timerit(300, bestof=10, verbose=1)
78+
79+
# if True, record every trail run and show variance in seaborn
80+
# if False, use the standard timerit min/mean measures
81+
RECORD_ALL = True
82+
83+
# These are the parameters that we benchmark over
84+
basis = {
85+
'method': [
86+
'parse_python',
87+
'parse_cython',
88+
],
89+
'size': np.linspace(16, 512, 8).round().astype(int),
90+
}
91+
xlabel = 'size'
92+
# Set these to param labels that directly transfer to method kwargs
93+
kw_labels = []
94+
# Set these to empty lists if they are not used
95+
group_labels = {
96+
'style': [],
97+
'size': [],
98+
}
99+
group_labels['hue'] = list(
100+
(ub.oset(basis) - {xlabel}) - set.union(*map(set, group_labels.values())))
101+
grid_iter = list(ub.named_product(basis))
102+
103+
# For each variation of your experiment, create a row.
104+
rows = []
105+
for params in grid_iter:
106+
group_keys = {}
107+
for gname, labels in group_labels.items():
108+
group_keys[gname + '_key'] = ub.repr2(
109+
ub.dict_isect(params, labels), compact=1, si=1)
110+
key = ub.repr2(params, compact=1, si=1)
111+
# Make any modifications you need to compute input kwargs for each
112+
# method here.
113+
kwargs = ub.dict_isect(params.copy(), kw_labels)
114+
kwargs['text'] = random_lark_grammar(params['size'])
115+
method = method_lut[params['method']]
116+
# Timerit will run some user-specified number of loops.
117+
# and compute time stats with similar methodology to timeit
118+
for timer in ti.reset(key):
119+
# Put any setup logic you dont want to time here.
120+
# ...
121+
with timer:
122+
# Put the logic you want to time here
123+
method(**kwargs)
124+
if RECORD_ALL:
125+
# Seaborn will show the variance if this is enabled, otherwise
126+
# use the robust timerit mean / min times
127+
chunk_iter = ub.chunks(ti.times, ti.bestof)
128+
times = list(map(min, chunk_iter))
129+
for time in times:
130+
row = {
131+
# 'mean': ti.mean(),
132+
'time': time,
133+
'key': key,
134+
**group_keys,
135+
**params,
136+
}
137+
rows.append(row)
138+
else:
139+
row = {
140+
'mean': ti.mean(),
141+
'min': ti.min(),
142+
'key': key,
143+
**group_keys,
144+
**params,
145+
}
146+
rows.append(row)
147+
148+
time_key = 'time' if RECORD_ALL else 'min'
149+
150+
# The rows define a long-form pandas data array.
151+
# Data in long-form makes it very easy to use seaborn.
152+
data = pd.DataFrame(rows)
153+
data = data.sort_values(time_key)
154+
print(data)
155+
156+
if RECORD_ALL:
157+
# Show the min / mean if we record all
158+
min_times = data.groupby('key').min().rename({'time': 'min'}, axis=1)
159+
mean_times = data.groupby('key')[['time']].mean().rename({'time': 'mean'}, axis=1)
160+
stats_data = pd.concat([min_times, mean_times], axis=1)
161+
stats_data = stats_data.sort_values('min')
162+
print('Statistics:')
163+
print(stats_data)
164+
165+
plot = True
166+
if plot:
167+
# import seaborn as sns
168+
# kwplot autosns works well for IPython and script execution.
169+
# not sure about notebooks.
170+
interactive = _autompl_lite()
171+
import seaborn as sns
172+
from matplotlib import pyplot as plt
173+
sns.set()
174+
175+
plotkw = {}
176+
for gname, labels in group_labels.items():
177+
if labels:
178+
plotkw[gname] = gname + '_key'
179+
180+
# Your variables may change
181+
fig = plt.figure()
182+
fig.clf()
183+
ax = fig.gca()
184+
sns.lineplot(data=data, x=xlabel, y=time_key, marker='o', ax=ax, **plotkw)
185+
ax.set_title('Benchmark Python Grammar')
186+
ax.set_xlabel('Input Size')
187+
ax.set_ylabel('Time (seconds)')
188+
# ax.set_xscale('log')
189+
# ax.set_yscale('log')
190+
if not interactive:
191+
plt.show()
192+
193+
194+
if __name__ == '__main__':
195+
"""
196+
CommandLine:
197+
python benchmarks/benchmark_lark_parser.py
198+
"""
199+
benchmark()

0 commit comments

Comments
 (0)