Skip to content

Commit 6f029b3

Browse files
committed
fixed several edge cases when loading lines from files, and added tests (#147)
also, by iterating over the file and only encode the parts that we need, loading lines from files is now about twice as fast closes #147
1 parent ff43877 commit 6f029b3

File tree

8 files changed

+110
-53
lines changed

8 files changed

+110
-53
lines changed

docs/configuration.asciidoc

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -293,31 +293,29 @@ With this setting, you can limit the length of lists in local variables.
293293
[[config-source-lines-error-app-frames]]
294294
==== `source_lines_error_app_frames`
295295
[float]
296-
[[config-source-lines-span-app-frames]]
297-
==== `source_lines_span_app_frames`
298-
[float]
299296
[[config-source-lines-error-library-frames]]
300297
==== `source_lines_error_library_frames`
301298
[float]
299+
[[config-source-lines-span-app-frames]]
300+
==== `source_lines_span_app_frames`
301+
[float]
302302
[[config-source-lines-span-library-frames]]
303303
==== `source_lines_span_library_frames`
304304

305305
|============
306306
| Environment | Django/Flask | Default
307307
| `ELASTIC_APM_SOURCE_LINES_ERROR_APP_FRAMES` | `SOURCE_LINES_ERROR_APP_FRAMES` | `5`
308-
| `ELASTIC_APM_SOURCE_LINES_SPAN_APP_FRAMES` | `SOURCE_LINES_SPAN_APP_FRAMES` | `5`
309308
| `ELASTIC_APM_SOURCE_LINES_ERROR_LIBRARY_FRAMES` | `SOURCE_LINES_ERROR_LIBRARY_FRAMES` | `5`
309+
| `ELASTIC_APM_SOURCE_LINES_SPAN_APP_FRAMES` | `SOURCE_LINES_SPAN_APP_FRAMES` | `0`
310310
| `ELASTIC_APM_SOURCE_LINES_SPAN_LIBRARY_FRAMES` | `SOURCE_LINES_SPAN_LIBRARY_FRAMES` | `0`
311311
|============
312312

313-
By default, the APM agent collects source code snippets for
314-
315-
* errors, both library frames and in-app frames
316-
* transaction spans, only for in-app frames.
317-
313+
By default, the APM agent collects source code snippets for errors.
318314
With the above settings, you can modify how many lines of source code is collected.
319315

320-
WARNING: Especially for transactions, collecting source code can have a large impact on storage use in your Elasticsearch cluster.
316+
We differ between errors and spans, as well as library frames and app frames.
317+
318+
WARNING: Especially for spans, collecting source code can have a large impact on storage use in your Elasticsearch cluster.
321319

322320
[float]
323321
[[config-flush-interval]]

elasticapm/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@
2626
from elasticapm.conf import Config, constants
2727
from elasticapm.traces import TransactionsStore, get_transaction
2828
from elasticapm.transport.base import TransportException
29+
from elasticapm.utils import compat, is_master_process
2930
from elasticapm.utils import json_encoder as json
30-
from elasticapm.utils import compat, is_master_process, stacks, varmap
31+
from elasticapm.utils import stacks, varmap
3132
from elasticapm.utils.encoding import shorten, transform
3233
from elasticapm.utils.module_import import import_string
3334

elasticapm/conf/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,10 @@ class Config(_ConfigBase):
160160
transaction_max_spans = _ConfigValue('TRANSACTION_MAX_SPANS', type=int, default=500)
161161
max_queue_size = _ConfigValue('MAX_QUEUE_SIZE', type=int, default=500)
162162
collect_local_variables = _ConfigValue('COLLECT_LOCAL_VARIABLES', default='errors')
163+
source_lines_error_app_frames = _ConfigValue('SOURCE_LINES_ERROR_APP_FRAMES', type=int, default=5)
163164
source_lines_error_library_frames = _ConfigValue('SOURCE_LINES_ERROR_LIBRARY_FRAMES', type=int, default=5)
165+
source_lines_span_app_frames = _ConfigValue('SOURCE_LINES_SPAN_APP_FRAMES', type=int, default=0)
164166
source_lines_span_library_frames = _ConfigValue('SOURCE_LINES_SPAN_LIBRARY_FRAMES', type=int, default=0)
165-
source_lines_error_app_frames = _ConfigValue('SOURCE_LINES_ERROR_APP_FRAMES', type=int, default=5)
166-
source_lines_span_app_frames = _ConfigValue('SOURCE_LINES_SPAN_APP_FRAMES', type=int, default=5)
167167
local_var_max_length = _ConfigValue('LOCAL_VAR_MAX_LENGTH', type=int, default=200)
168168
local_var_list_max_length = _ConfigValue('LOCAL_VAR_LIST_MAX_LENGTH', type=int, default=10)
169169
async_mode = _BoolConfigValue('ASYNC_MODE', default=True)

elasticapm/contrib/django/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,4 @@
1010
"""
1111
from elasticapm.contrib.django.client import * # noqa E401
1212

13-
1413
default_app_config = 'elasticapm.contrib.django.apps.ElasticAPMConfig'

elasticapm/utils/stacks.py

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"""
1111
import fnmatch
1212
import inspect
13+
import itertools
1314
import os
1415
import re
1516
import sys
@@ -32,48 +33,56 @@ def get_lines_from_file(filename, lineno, context_lines, loader=None, module_nam
3233
Returns context_lines before and after lineno from file.
3334
Returns (pre_context_lineno, pre_context, context_line, post_context).
3435
"""
36+
lineno = lineno - 1
37+
lower_bound = max(0, lineno - context_lines)
38+
upper_bound = lineno + context_lines
39+
3540
source = None
3641
if loader is not None and hasattr(loader, "get_source"):
37-
try:
38-
source = loader.get_source(module_name)
39-
except ImportError:
40-
# ImportError: Loader for module cProfile cannot handle module __main__
41-
source = None
42-
if source is not None:
43-
source = source.splitlines()
42+
result = get_source_lines_from_loader(loader, module_name, lineno, lower_bound, upper_bound)
43+
if result is not None:
44+
return result
45+
4446
if source is None:
4547
try:
46-
f = open(filename, 'rb')
47-
try:
48-
source = f.readlines()
49-
finally:
50-
f.close()
51-
except (OSError, IOError):
48+
with open(filename, 'rb') as file_obj:
49+
encoding = 'utf8'
50+
# try to find encoding of source file by "coding" header
51+
# if none is found, utf8 is used as a fallback
52+
for line in itertools.islice(file_obj, 0, 2):
53+
match = _coding_re.search(line.decode('utf8'))
54+
if match:
55+
encoding = match.group(1)
56+
break
57+
file_obj.seek(0)
58+
lines = [compat.text_type(line, encoding, 'replace')
59+
for line in itertools.islice(file_obj, lower_bound, upper_bound + 1)]
60+
offset = lineno - lower_bound
61+
return (
62+
[l.strip('\r\n') for l in lines[0:offset]],
63+
lines[offset].strip('\r\n'),
64+
[l.strip('\r\n') for l in lines[offset + 1:]] if len(lines) > offset else []
65+
)
66+
except (OSError, IOError, IndexError):
5267
pass
68+
return None, None, None
5369

54-
if source is None:
55-
return None, None, None
56-
encoding = 'utf8'
57-
for line in source[:2]:
58-
# File coding may be specified. Match pattern from PEP-263
59-
# (http://www.python.org/dev/peps/pep-0263/)
60-
match = _coding_re.search(line.decode('utf8')) # let's assume utf8
61-
if match:
62-
encoding = match.group(1)
63-
break
64-
source = [compat.text_type(sline, encoding, 'replace') for sline in source]
65-
66-
lower_bound = max(0, lineno - context_lines)
67-
upper_bound = lineno + 1 + context_lines
6870

71+
def get_source_lines_from_loader(loader, module_name, lineno, lower_bound, upper_bound):
72+
try:
73+
source = loader.get_source(module_name)
74+
except ImportError:
75+
# ImportError: Loader for module cProfile cannot handle module __main__
76+
return None
77+
if source is not None:
78+
source = source.splitlines()
6979
try:
7080
pre_context = [line.strip('\r\n') for line in source[lower_bound:lineno]]
7181
context_line = source[lineno].strip('\r\n')
72-
post_context = [line.strip('\r\n') for line in source[(lineno + 1):upper_bound]]
82+
post_context = [line.strip('\r\n') for line in source[(lineno + 1):upper_bound + 1]]
7383
except IndexError:
7484
# the file may have changed since it was loaded into memory
7585
return None, None, None
76-
7786
return pre_context, context_line, post_context
7887

7988

@@ -181,9 +190,6 @@ def get_frame_info(frame, lineno, with_locals=True,
181190
abs_path = None
182191
function = None
183192

184-
if lineno:
185-
lineno -= 1
186-
187193
# Try to pull a relative file path
188194
# This changes /foo/site-packages/baz/bar.py into baz/bar.py
189195
try:
@@ -200,7 +206,7 @@ def get_frame_info(frame, lineno, with_locals=True,
200206
'filename': filename,
201207
'module': module_name,
202208
'function': function,
203-
'lineno': lineno + 1,
209+
'lineno': lineno,
204210
'library_frame': is_library_frame(abs_path, include_paths_re, exclude_paths_re)
205211
}
206212

@@ -212,11 +218,9 @@ def get_frame_info(frame, lineno, with_locals=True,
212218
else:
213219
pre_context, context_line, post_context = [], None, []
214220
if context_line:
215-
frame_result.update({
216-
'pre_context': pre_context,
217-
'context_line': context_line,
218-
'post_context': post_context,
219-
})
221+
frame_result['pre_context'] = pre_context
222+
frame_result['context_line'] = context_line
223+
frame_result['post_context'] = post_context
220224
if with_locals:
221225
if f_locals is not None and not isinstance(f_locals, dict):
222226
# XXX: Genshi (and maybe others) have broken implementations of

tests/utils/json/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import datetime
55
import uuid
66

7-
from elasticapm.utils import json_encoder as json
87
from elasticapm.utils import compat
8+
from elasticapm.utils import json_encoder as json
99

1010

1111
def test_uuid():

tests/utils/stacks/linenos.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
1
2+
2
3+
3
4+
4
5+
5
6+
6
7+
7
8+
8
9+
9
10+
10
11+
11
12+
12
13+
13
14+
14
15+
15
16+
16
17+
17
18+
18
19+
19
20+
20

tests/utils/stacks/tests.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import absolute_import
33

44
import os
5+
import pkgutil
56

67
import pytest
78
from mock import Mock
@@ -113,3 +114,37 @@ def test_get_frame_info():
113114
assert frame_info['lineno'] == 6
114115
assert frame_info['context_line'] == ' return inspect.currentframe()'
115116
assert frame_info['vars'] == {'a_local_var': 42}
117+
118+
119+
@pytest.mark.parametrize("lineno,context,expected", [
120+
(10, 5, (['5', '6', '7', '8', '9'], '10', ['11', '12', '13', '14', '15'])),
121+
(1, 5, ([], '1', ['2', '3', '4', '5', '6'])),
122+
(2, 5, (['1'], '2', ['3', '4', '5', '6', '7'])),
123+
(20, 5, (['15', '16', '17', '18', '19'], '20', [])),
124+
(19, 5, (['14', '15', '16', '17', '18'], '19', ['20'])),
125+
(1, 0, ([], '1', [])),
126+
(21, 0, (None, None, None)),
127+
])
128+
def test_get_lines_from_file(lineno, context, expected):
129+
stacks.get_lines_from_file.cache_clear()
130+
fname = os.path.join(os.path.dirname(__file__), 'linenos.py')
131+
result = stacks.get_lines_from_file(fname, lineno, context)
132+
assert result == expected
133+
134+
135+
@pytest.mark.parametrize("lineno,context,expected", [
136+
(10, 5, (['5', '6', '7', '8', '9'], '10', ['11', '12', '13', '14', '15'])),
137+
(1, 5, ([], '1', ['2', '3', '4', '5', '6'])),
138+
(2, 5, (['1'], '2', ['3', '4', '5', '6', '7'])),
139+
(20, 5, (['15', '16', '17', '18', '19'], '20', [])),
140+
(19, 5, (['14', '15', '16', '17', '18'], '19', ['20'])),
141+
(1, 0, ([], '1', [])),
142+
(21, 0, (None, None, None)),
143+
])
144+
def test_get_lines_from_loader(lineno, context, expected):
145+
stacks.get_lines_from_file.cache_clear()
146+
module = 'tests.utils.stacks.linenos'
147+
loader = pkgutil.get_loader(module)
148+
fname = os.path.join(os.path.dirname(__file__), 'linenos.py')
149+
result = stacks.get_lines_from_file(fname, lineno, context, loader=loader, module_name=module)
150+
assert result == expected

0 commit comments

Comments
 (0)