|
| 1 | +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- |
| 2 | +# vi: set ft=python sts=4 ts=4 sw=4 et: |
| 3 | +"""Stripped out routines for Sentry""" |
| 4 | +import os |
| 5 | +from pathlib import Path |
| 6 | +import re |
| 7 | +from niworkflows.utils.misc import read_crashfile |
| 8 | +import sentry_sdk |
| 9 | + |
| 10 | +CHUNK_SIZE = 16384 |
| 11 | +# Group common events with pre specified fingerprints |
| 12 | +KNOWN_ERRORS = { |
| 13 | + 'permission-denied': [ |
| 14 | + "PermissionError: [Errno 13] Permission denied" |
| 15 | + ], |
| 16 | + 'memory-error': [ |
| 17 | + "MemoryError", |
| 18 | + "Cannot allocate memory", |
| 19 | + "Return code: 134", |
| 20 | + ], |
| 21 | + 'reconall-already-running': [ |
| 22 | + "ERROR: it appears that recon-all is already running" |
| 23 | + ], |
| 24 | + 'no-disk-space': [ |
| 25 | + "[Errno 28] No space left on device", |
| 26 | + "[Errno 122] Disk quota exceeded" |
| 27 | + ], |
| 28 | + 'segfault': [ |
| 29 | + "Segmentation Fault", |
| 30 | + "Segfault", |
| 31 | + "Return code: 139", |
| 32 | + ], |
| 33 | + 'potential-race-condition': [ |
| 34 | + "[Errno 39] Directory not empty", |
| 35 | + "_unfinished.json", |
| 36 | + ], |
| 37 | + 'keyboard-interrupt': [ |
| 38 | + "KeyboardInterrupt", |
| 39 | + ], |
| 40 | +} |
| 41 | + |
| 42 | + |
| 43 | +def start_ping(run_uuid, npart): |
| 44 | + with sentry_sdk.configure_scope() as scope: |
| 45 | + if run_uuid: |
| 46 | + scope.set_tag('run_uuid', run_uuid) |
| 47 | + scope.set_tag('npart', npart) |
| 48 | + sentry_sdk.add_breadcrumb(message='dMRIPrep started', level='info') |
| 49 | + sentry_sdk.capture_message('dMRIPrep started', level='info') |
| 50 | + |
| 51 | + |
| 52 | +def sentry_setup(opts, exec_env): |
| 53 | + from os import cpu_count |
| 54 | + import psutil |
| 55 | + import hashlib |
| 56 | + from ..__about__ import __version__ |
| 57 | + |
| 58 | + environment = "prod" |
| 59 | + release = __version__ |
| 60 | + if not __version__: |
| 61 | + environment = "dev" |
| 62 | + release = "dev" |
| 63 | + elif int(os.getenv('DMRIPREP_DEV', '0')) or ('+' in __version__): |
| 64 | + environment = "dev" |
| 65 | + |
| 66 | + sentry_sdk. init( "https://[email protected]/1137693", |
| 67 | + release=release, |
| 68 | + environment=environment, |
| 69 | + before_send=before_send) |
| 70 | + with sentry_sdk.configure_scope() as scope: |
| 71 | + scope.set_tag('exec_env', exec_env) |
| 72 | + |
| 73 | + if exec_env == 'dmriprep-docker': |
| 74 | + scope.set_tag('docker_version', os.getenv('DOCKER_VERSION_8395080871')) |
| 75 | + |
| 76 | + dset_desc_path = opts.bids_dir / 'dataset_description.json' |
| 77 | + if dset_desc_path.exists(): |
| 78 | + desc_content = dset_desc_path.read_bytes() |
| 79 | + scope.set_tag('dset_desc_sha256', hashlib.sha256(desc_content).hexdigest()) |
| 80 | + |
| 81 | + free_mem_at_start = round(psutil.virtual_memory().free / 1024**3, 1) |
| 82 | + scope.set_tag('free_mem_at_start', free_mem_at_start) |
| 83 | + scope.set_tag('cpu_count', cpu_count()) |
| 84 | + |
| 85 | + # Memory policy may have a large effect on types of errors experienced |
| 86 | + overcommit_memory = Path('/proc/sys/vm/overcommit_memory') |
| 87 | + if overcommit_memory.exists(): |
| 88 | + policy = {'0': 'heuristic', |
| 89 | + '1': 'always', |
| 90 | + '2': 'never'}.get(overcommit_memory.read_text().strip(), 'unknown') |
| 91 | + scope.set_tag('overcommit_memory', policy) |
| 92 | + if policy == 'never': |
| 93 | + overcommit_kbytes = Path('/proc/sys/vm/overcommit_memory') |
| 94 | + kb = overcommit_kbytes.read_text().strip() |
| 95 | + if kb != '0': |
| 96 | + limit = '{}kB'.format(kb) |
| 97 | + else: |
| 98 | + overcommit_ratio = Path('/proc/sys/vm/overcommit_ratio') |
| 99 | + limit = '{}%'.format(overcommit_ratio.read_text().strip()) |
| 100 | + scope.set_tag('overcommit_limit', limit) |
| 101 | + else: |
| 102 | + scope.set_tag('overcommit_limit', 'n/a') |
| 103 | + else: |
| 104 | + scope.set_tag('overcommit_memory', 'n/a') |
| 105 | + scope.set_tag('overcommit_limit', 'n/a') |
| 106 | + |
| 107 | + for k, v in vars(opts).items(): |
| 108 | + scope.set_tag(k, v) |
| 109 | + |
| 110 | + |
| 111 | +def process_crashfile(crashfile): |
| 112 | + """Parse the contents of a crashfile and submit sentry messages""" |
| 113 | + crash_info = read_crashfile(str(crashfile)) |
| 114 | + with sentry_sdk.push_scope() as scope: |
| 115 | + scope.level = 'fatal' |
| 116 | + |
| 117 | + # Extract node name |
| 118 | + node_name = crash_info.pop('node').split('.')[-1] |
| 119 | + scope.set_tag("node_name", node_name) |
| 120 | + |
| 121 | + # Massage the traceback, extract the gist |
| 122 | + traceback = crash_info.pop('traceback') |
| 123 | + # last line is probably most informative summary |
| 124 | + gist = traceback.splitlines()[-1] |
| 125 | + exception_text_start = 1 |
| 126 | + for line in traceback.splitlines()[1:]: |
| 127 | + if not line[0].isspace(): |
| 128 | + break |
| 129 | + exception_text_start += 1 |
| 130 | + |
| 131 | + exception_text = '\n'.join( |
| 132 | + traceback.splitlines()[exception_text_start:]) |
| 133 | + |
| 134 | + # Extract inputs, if present |
| 135 | + inputs = crash_info.pop('inputs', None) |
| 136 | + if inputs: |
| 137 | + scope.set_extra('inputs', dict(inputs)) |
| 138 | + |
| 139 | + # Extract any other possible metadata in the crash file |
| 140 | + for k, v in crash_info.items(): |
| 141 | + strv = list(_chunks(str(v))) |
| 142 | + if len(strv) == 1: |
| 143 | + scope.set_extra(k, strv[0]) |
| 144 | + else: |
| 145 | + for i, chunk in enumerate(strv): |
| 146 | + scope.set_extra('%s_%02d' % (k, i), chunk) |
| 147 | + |
| 148 | + fingerprint = '' |
| 149 | + issue_title = '{}: {}'.format(node_name, gist) |
| 150 | + for new_fingerprint, error_snippets in KNOWN_ERRORS.items(): |
| 151 | + for error_snippet in error_snippets: |
| 152 | + if error_snippet in traceback: |
| 153 | + fingerprint = new_fingerprint |
| 154 | + issue_title = new_fingerprint |
| 155 | + break |
| 156 | + if fingerprint: |
| 157 | + break |
| 158 | + |
| 159 | + message = issue_title + '\n\n' |
| 160 | + message += exception_text[-(8192 - len(message)):] |
| 161 | + if fingerprint: |
| 162 | + sentry_sdk.add_breadcrumb(message=fingerprint, level='fatal') |
| 163 | + else: |
| 164 | + # remove file paths |
| 165 | + fingerprint = re.sub(r"(/[^/ ]*)+/?", '', message) |
| 166 | + # remove words containing numbers |
| 167 | + fingerprint = re.sub(r"([a-zA-Z]*[0-9]+[a-zA-Z]*)+", '', fingerprint) |
| 168 | + # adding the return code if it exists |
| 169 | + for line in message.splitlines(): |
| 170 | + if line.startswith("Return code"): |
| 171 | + fingerprint += line |
| 172 | + break |
| 173 | + |
| 174 | + scope.fingerprint = [fingerprint] |
| 175 | + sentry_sdk.capture_message(message, 'fatal') |
| 176 | + |
| 177 | + |
| 178 | +def before_send(event, hints): |
| 179 | + # Filtering log messages about crashed nodes |
| 180 | + if 'logentry' in event and 'message' in event['logentry']: |
| 181 | + msg = event['logentry']['message'] |
| 182 | + if msg.startswith("could not run node:"): |
| 183 | + return None |
| 184 | + if msg.startswith("Saving crash info to "): |
| 185 | + return None |
| 186 | + if re.match("Node .+ failed to run on host .+", msg): |
| 187 | + return None |
| 188 | + |
| 189 | + if 'breadcrumbs' in event and isinstance(event['breadcrumbs'], list): |
| 190 | + fingerprints_to_propagate = ['no-disk-space', 'memory-error', 'permission-denied', |
| 191 | + 'keyboard-interrupt'] |
| 192 | + for bc in event['breadcrumbs']: |
| 193 | + msg = bc.get('message', 'empty-msg') |
| 194 | + if msg in fingerprints_to_propagate: |
| 195 | + event['fingerprint'] = [msg] |
| 196 | + break |
| 197 | + |
| 198 | + return event |
| 199 | + |
| 200 | + |
| 201 | +def _chunks(string, length=CHUNK_SIZE): |
| 202 | + """ |
| 203 | + Splits a string into smaller chunks |
| 204 | + >>> list(_chunks('some longer string.', length=3)) |
| 205 | + ['som', 'e l', 'ong', 'er ', 'str', 'ing', '.'] |
| 206 | + """ |
| 207 | + return (string[i:i + length] |
| 208 | + for i in range(0, len(string), length)) |
0 commit comments