Skip to content

Commit 8a486bb

Browse files
committed
truncate strings that are stored as keywords in ES (#159)
this should avoid schema errors for values that are too long closes #156 closes #159
1 parent dac0629 commit 8a486bb

File tree

9 files changed

+121
-47
lines changed

9 files changed

+121
-47
lines changed

CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
https://github.com/elastic/apm-agent-python/compare/v2.0.0\...master[Check the HEAD diff]
1414

1515
* fixed compatibility issue with aiohttp 3.0 ({pull}157[#157])
16+
* Added truncation for fields that have a `maxLength` in the JSON Schema ({pull}159[#159])
1617

1718
[[release-2.0.0]]
1819
[float]

elasticapm/base.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from elasticapm.utils import compat, is_master_process
3030
from elasticapm.utils import json_encoder as json
3131
from elasticapm.utils import stacks, varmap
32-
from elasticapm.utils.encoding import shorten, transform
32+
from elasticapm.utils.encoding import keyword_field, shorten, transform
3333
from elasticapm.utils.module_import import import_string
3434

3535
__all__ = ('Client',)
@@ -111,6 +111,7 @@ def __init__(self, config=None, **defaults):
111111
self.filter_exception_types_dict = {}
112112
self._send_timer = None
113113
self._transports = {}
114+
self._service_info = None
114115

115116
self.config = Config(config, default_dict=defaults)
116117
if self.config.errors:
@@ -327,45 +328,48 @@ def _send_remote(self, url, data, headers=None):
327328
self.handle_transport_success(url=url)
328329

329330
def get_service_info(self):
331+
if self._service_info:
332+
return self._service_info
330333
language_version = platform.python_version()
331334
if hasattr(sys, 'pypy_version_info'):
332335
runtime_version = '.'.join(map(str, sys.pypy_version_info[:3]))
333336
else:
334337
runtime_version = language_version
335338
result = {
336-
'name': self.config.service_name,
337-
'environment': self.config.environment,
338-
'version': self.config.service_version,
339+
'name': keyword_field(self.config.service_name),
340+
'environment': keyword_field(self.config.environment),
341+
'version': keyword_field(self.config.service_version),
339342
'agent': {
340343
'name': 'python',
341344
'version': elasticapm.VERSION,
342345
},
343346
'language': {
344347
'name': 'python',
345-
'version': platform.python_version(),
348+
'version': keyword_field(platform.python_version()),
346349
},
347350
'runtime': {
348-
'name': platform.python_implementation(),
349-
'version': runtime_version,
351+
'name': keyword_field(platform.python_implementation()),
352+
'version': keyword_field(runtime_version),
350353
}
351354
}
352355
if self.config.framework_name:
353356
result['framework'] = {
354-
'name': self.config.framework_name,
355-
'version': self.config.framework_version,
357+
'name': keyword_field(self.config.framework_name),
358+
'version': keyword_field(self.config.framework_version),
356359
}
360+
self._service_info = result
357361
return result
358362

359363
def get_process_info(self):
360364
return {
361365
'pid': os.getpid(),
362366
'argv': sys.argv,
363-
'title': None,
367+
'title': None, # Note: if we implement this, the value needs to be wrapped with keyword_field
364368
}
365369

366370
def get_system_info(self):
367371
return {
368-
'hostname': socket.gethostname(),
372+
'hostname': keyword_field(socket.gethostname()),
369373
'architecture': platform.machine(),
370374
'platform': platform.system().lower(),
371375
}

elasticapm/contrib/django/client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from elasticapm.base import Client
2323
from elasticapm.contrib.django.utils import iterate_with_template_sources
24-
from elasticapm.utils import compat, get_url_dict
24+
from elasticapm.utils import compat, encoding, get_url_dict
2525
from elasticapm.utils.module_import import import_string
2626
from elasticapm.utils.wsgi import get_environ, get_headers
2727

@@ -80,11 +80,11 @@ def get_user_info(self, request):
8080
else:
8181
user_info['is_authenticated'] = bool(user.is_authenticated)
8282
if hasattr(user, 'id'):
83-
user_info['id'] = user.id
83+
user_info['id'] = encoding.keyword_field(user.id)
8484
if hasattr(user, 'get_username'):
85-
user_info['username'] = user.get_username()
85+
user_info['username'] = encoding.keyword_field(user.get_username())
8686
elif hasattr(user, 'username'):
87-
user_info['username'] = user.username
87+
user_info['username'] = encoding.keyword_field(user.username)
8888

8989
if hasattr(user, 'email'):
9090
user_info['email'] = user.email

elasticapm/events.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import uuid
1515

1616
from elasticapm.utils import varmap
17-
from elasticapm.utils.encoding import shorten, to_unicode
17+
from elasticapm.utils.encoding import keyword_field, shorten, to_unicode
1818
from elasticapm.utils.stacks import (get_culprit, get_stack_info,
1919
iter_traceback_frames)
2020

@@ -112,8 +112,8 @@ def capture(client, exc_info=None, **kwargs):
112112
'culprit': culprit,
113113
'exception': {
114114
'message': message,
115-
'type': str(exc_type),
116-
'module': str(exc_module),
115+
'type': keyword_field(str(exc_type)),
116+
'module': keyword_field(str(exc_module)),
117117
'stacktrace': frames,
118118
}
119119
}
@@ -146,10 +146,10 @@ def capture(client, param_message=None, message=None, level=None, logger_name=No
146146
message_data = {
147147
'id': str(uuid.uuid4()),
148148
'log': {
149-
'level': level or 'error',
150-
'logger_name': logger_name or '__root__',
149+
'level': keyword_field(level or 'error'),
150+
'logger_name': keyword_field(logger_name or '__root__'),
151151
'message': message,
152-
'param_message': param_message['message'],
152+
'param_message': keyword_field(param_message['message']),
153153
}
154154
}
155155
if isinstance(data.get('stacktrace'), dict):

elasticapm/traces.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import uuid
99

1010
from elasticapm.conf import constants
11-
from elasticapm.utils import compat, get_name_from_func
11+
from elasticapm.utils import compat, encoding, get_name_from_func
1212

1313
__all__ = ('capture_span', 'tag', 'set_transaction_name', 'set_custom_context', 'set_user_context')
1414

@@ -112,10 +112,10 @@ def to_dict(self):
112112
self.context['tags'] = self.tags
113113
result = {
114114
'id': self.id,
115-
'name': self.name,
116-
'type': self.transaction_type,
115+
'name': encoding.keyword_field(self.name),
116+
'type': encoding.keyword_field(self.transaction_type),
117117
'duration': self.duration * 1000, # milliseconds
118-
'result': str(self.result),
118+
'result': encoding.keyword_field(str(self.result)),
119119
'timestamp': self.timestamp.strftime(constants.TIMESTAMP_FORMAT),
120120
'sampled': self.is_sampled,
121121
}
@@ -159,8 +159,8 @@ def fingerprint(self):
159159
def to_dict(self):
160160
return {
161161
'id': self.idx,
162-
'name': self.name,
163-
'type': self.type,
162+
'name': encoding.keyword_field(self.name),
163+
'type': encoding.keyword_field(self.type),
164164
'start': self.start_time * 1000, # milliseconds
165165
'duration': self.duration * 1000, # milliseconds
166166
'parent': self.parent,
@@ -278,7 +278,7 @@ def tag(**tags):
278278
error_logger.warning("Ignored tag %s. No transaction currently active.", name)
279279
return
280280
if TAG_RE.match(name):
281-
transaction.tags[compat.text_type(name)] = compat.text_type(value)
281+
transaction.tags[compat.text_type(name)] = encoding.keyword_field(compat.text_type(value))
282282
else:
283283
error_logger.warning("Ignored tag %s. Tag names can't contain stars, dots or double quotes.", name)
284284

@@ -308,9 +308,9 @@ def set_context(data, key='custom'):
308308
def set_user_context(username=None, email=None, user_id=None):
309309
data = {}
310310
if username is not None:
311-
data['username'] = username
311+
data['username'] = encoding.keyword_field(username)
312312
if email is not None:
313-
data['email'] = email
313+
data['email'] = encoding.keyword_field(email)
314314
if user_id is not None:
315-
data['id'] = user_id
315+
data['id'] = encoding.keyword_field(user_id)
316316
set_context(data, 'user')

elasticapm/utils/__init__.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,7 @@
1010
"""
1111
import os
1212

13-
from elasticapm.utils import compat
14-
15-
try:
16-
import urlparse
17-
except ImportError:
18-
import urllib.parse as urlparse
19-
13+
from elasticapm.utils import compat, encoding
2014

2115
default_ports = {
2216
"https": 433,
@@ -76,19 +70,19 @@ def is_master_process():
7670

7771

7872
def get_url_dict(url):
79-
scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
73+
scheme, netloc, path, params, query, fragment = compat.urlparse.urlparse(url)
8074
if ':' in netloc:
8175
hostname, port = netloc.split(':')
8276
else:
8377
hostname, port = (netloc, None)
8478
url_dict = {
85-
'full': url,
79+
'full': encoding.keyword_field(url),
8680
'protocol': scheme + ':',
87-
'hostname': hostname,
88-
'pathname': path,
81+
'hostname': encoding.keyword_field(hostname),
82+
'pathname': encoding.keyword_field(path),
8983
}
9084
if port:
9185
url_dict['port'] = port
9286
if query:
93-
url_dict['search'] = '?' + query
87+
url_dict['search'] = encoding.keyword_field('?' + query)
9488
return url_dict

elasticapm/utils/encoding.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
13
"""
24
elasticapm.utils.encoding
35
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -13,6 +15,7 @@
1315
import uuid
1416
from decimal import Decimal
1517

18+
from elasticapm.conf.constants import KEYWORD_MAX_LENGTH
1619
from elasticapm.utils import compat
1720

1821
PROTECTED_TYPES = compat.integer_types + (type(None), float, Decimal, datetime.datetime, datetime.date, datetime.time)
@@ -165,3 +168,9 @@ def shorten(var, list_length=50, string_length=200):
165168
# TODO: when we finish the above, we should also implement this for dicts
166169
var = list(var)[:list_length] + ['...', '(%d more elements)' % (len(var) - list_length,)]
167170
return var
171+
172+
173+
def keyword_field(string):
174+
if not isinstance(string, compat.string_types) or len(string) <= KEYWORD_MAX_LENGTH:
175+
return string
176+
return string[:KEYWORD_MAX_LENGTH - 1] + u'…'

tests/client/client_tests.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99

1010
import elasticapm
1111
from elasticapm.base import Client, ClientState
12-
from elasticapm.transport.base import Transport, TransportException
13-
from elasticapm.utils import compat
14-
from elasticapm.utils.compat import urlparse
12+
from elasticapm.conf.constants import KEYWORD_MAX_LENGTH
13+
from elasticapm.transport.base import Transport
14+
from elasticapm.utils import compat, encoding
1515

1616

1717
def test_client_state_should_try_online():
@@ -404,7 +404,7 @@ def test_client_uses_sync_mode_when_master_process(is_master_process):
404404
secret_token='secret',
405405
async_mode=True,
406406
)
407-
transport = client._get_transport(urlparse.urlparse('http://exampe.com'))
407+
transport = client._get_transport(compat.urlparse.urlparse('http://exampe.com'))
408408
assert transport.async_mode is False
409409

410410

@@ -702,3 +702,67 @@ def test_transaction_context_is_used_in_errors(elasticapm_client):
702702
assert message['context']['tags'] == {'foo': 'baz'}
703703
assert 'a' in transaction.context['custom']
704704
assert 'foo' not in transaction.context['custom']
705+
706+
707+
def test_transaction_keyword_truncation(sending_elasticapm_client):
708+
too_long = 'x' * (KEYWORD_MAX_LENGTH + 1)
709+
expected = encoding.keyword_field(too_long)
710+
assert too_long != expected
711+
assert len(expected) == KEYWORD_MAX_LENGTH
712+
assert expected[-1] != 'x'
713+
sending_elasticapm_client.begin_transaction(too_long)
714+
elasticapm.tag(val=too_long)
715+
elasticapm.set_user_context(username=too_long, email=too_long, user_id=too_long)
716+
with elasticapm.capture_span(name=too_long, span_type=too_long):
717+
pass
718+
sending_elasticapm_client.end_transaction(too_long, too_long)
719+
sending_elasticapm_client.close()
720+
assert sending_elasticapm_client.httpserver.responses[0]['code'] == 202
721+
transaction = sending_elasticapm_client.httpserver.payloads[0]['transactions'][0]
722+
span = transaction['spans'][0]
723+
724+
assert transaction['name'] == expected
725+
assert transaction['type'] == expected
726+
assert transaction['result'] == expected
727+
728+
assert transaction['context']['user']['id'] == expected
729+
assert transaction['context']['user']['username'] == expected
730+
assert transaction['context']['user']['email'] == expected
731+
732+
assert transaction['context']['tags']['val'] == expected
733+
734+
assert span['type'] == expected
735+
assert span['name'] == expected
736+
737+
738+
def test_error_keyword_truncation(sending_elasticapm_client):
739+
too_long = 'x' * (KEYWORD_MAX_LENGTH + 1)
740+
expected = encoding.keyword_field(too_long)
741+
742+
# let's create a way too long Exception type with a way too long module name
743+
WayTooLongException = type(too_long.upper(), (Exception,), {})
744+
WayTooLongException.__module__ = too_long
745+
try:
746+
raise WayTooLongException()
747+
except WayTooLongException:
748+
sending_elasticapm_client.capture_exception()
749+
error = sending_elasticapm_client.httpserver.payloads[0]['errors'][0]
750+
751+
assert error['exception']['type'] == expected.upper()
752+
assert error['exception']['module'] == expected
753+
754+
755+
def test_message_keyword_truncation(sending_elasticapm_client):
756+
too_long = 'x' * (KEYWORD_MAX_LENGTH + 1)
757+
expected = encoding.keyword_field(too_long)
758+
sending_elasticapm_client.capture_message(
759+
param_message={'message': too_long, 'params': []},
760+
logger_name=too_long,
761+
)
762+
763+
error = sending_elasticapm_client.httpserver.payloads[0]['errors'][0]
764+
765+
assert error['log']['param_message'] == expected
766+
assert error['log']['message'] == too_long # message is not truncated
767+
768+
assert error['log']['logger_name'] == expected

tests/fixtures.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class ValidatingWSGIApp(ContentServer):
3535
def __init__(self, **kwargs):
3636
super(ValidatingWSGIApp, self).__init__(**kwargs)
3737
self.payloads = []
38+
self.responses = []
3839
self.skip_validate = False
3940

4041
def __call__(self, environ, start_response):
@@ -61,6 +62,7 @@ def __call__(self, environ, start_response):
6162
response.headers.clear()
6263
response.headers.extend(self.headers)
6364
response.data = content
65+
self.responses.append({'code': code, 'content': content})
6466
return response(environ, start_response)
6567

6668

0 commit comments

Comments
 (0)