Skip to content

Commit bc68c92

Browse files
committed
Merge remote-tracking branch 'upstream/main' into introduce-sto-worker-simcore-service
2 parents 1fd8605 + dabcf72 commit bc68c92

File tree

7 files changed

+194
-414
lines changed

7 files changed

+194
-414
lines changed

scripts/common.Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ $(REPO_BASE_DIR)/.venv/bin/activate:
274274
$(REPO_BASE_DIR)/.venv/bin/pip3 install --upgrade pip wheel setuptools
275275
$(REPO_BASE_DIR)/.venv/bin/pip3 install jinja2 j2cli[yaml] typer
276276
@echo "To activate the venv, execute 'source $(REPO_BASE_DIR)/.venv/bin/activate'"
277-
PHONY: .venv
277+
.PHONY: .venv
278278
.venv: $(REPO_BASE_DIR)/.venv/bin/activate ## Creates a python virtual environment with dev tools (pip, pylint, ...)
279279
.PHONY: venv
280280
venv: $(REPO_BASE_DIR)/.venv/bin/activate ## Creates a python virtual environment with dev tools (pip, pylint, ...)

services/admin-panels/docker-compose.yml.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ configs:
77
file: ./data/{{ item.name }}{% endfor %}
88
services:
99
adminpanels:
10-
image: jupyter/base-notebook:18b10e7f732d
10+
image: jupyter/base-notebook:notebook-7.0.6
1111
user: root
1212
networks:
1313
- public

services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"v": "1",
33
"id": "dfaeea11-bde6-4203-9cfe-6ca2a23ca22e",
4-
"rev": 42,
4+
"rev": 43,
55
"name": "osparc-custom-content-pack-v2",
66
"summary": "osparc-custom-content-pack-v2",
77
"description": "",

services/graylog/scripts/alerts.template.yaml

Lines changed: 5 additions & 310 deletions
Original file line numberDiff line numberDiff line change
@@ -1,237 +1,3 @@
1-
- title: "${MACHINE_FQDN}: Interactive Study Data too large for AWS S3 and hanging"
2-
description: "${MACHINE_FQDN}: Study Hanging"
3-
priority: 2
4-
config:
5-
query: >
6-
"EntityTooLarge" AND NOT container_name:/.*graylog_graylog.*/
7-
query_parameters: []
8-
search_within_ms: 600000
9-
execute_every_ms: 600000
10-
group_by: []
11-
series: []
12-
conditions: {}
13-
type: aggregation-v1
14-
field_spec:
15-
source:
16-
data_type: string
17-
providers:
18-
- type: template-v1
19-
template: "${source.source}"
20-
require_values: false
21-
container_name:
22-
data_type: string
23-
providers:
24-
- type: template-v1
25-
template: "${source.container_name}"
26-
require_values: false
27-
full_message:
28-
data_type: string
29-
providers:
30-
- type: template-v1
31-
template: "${source.full_message}"
32-
key_spec:
33-
- source
34-
- container_name
35-
- full_message
36-
notification_settings:
37-
grace_period_ms: 0
38-
backlog_size: 99
39-
alert: true
40-
- title: "${MACHINE_FQDN}: Writer Is None Error in Webserver"
41-
description: "${MACHINE_FQDN}: Alert if \"writer is None\" pops up. Communication with rabbitMQ is disrupted and this will make simcore go crazy"
42-
priority: 2
43-
config:
44-
query: >
45-
"writer is None" AND NOT container_name:/.*graylog_graylog.*/
46-
query_parameters: []
47-
search_within_ms: 600000
48-
execute_every_ms: 600000
49-
group_by: []
50-
series: []
51-
conditions: {}
52-
type: aggregation-v1
53-
field_spec:
54-
source:
55-
data_type: string
56-
providers:
57-
- type: template-v1
58-
template: "${source.source}"
59-
require_values: false
60-
container_name:
61-
data_type: string
62-
providers:
63-
- type: template-v1
64-
template: "${source.container_name}"
65-
require_values: false
66-
full_message:
67-
data_type: string
68-
providers:
69-
- type: template-v1
70-
template: "${source.full_message}"
71-
key_spec:
72-
- source
73-
- container_name
74-
- full_message
75-
notification_settings:
76-
grace_period_ms: 0
77-
backlog_size: 99
78-
alert: true
79-
- title: "${MACHINE_FQDN}: Dynamic Sidecar failed to save with S3TransferError"
80-
description: "${MACHINE_FQDN}: Alert if Dynamic Sidecar failed to save with S3TransferError"
81-
priority: 2
82-
config:
83-
query: >
84-
"simcore_sdk.node_ports_common.exceptions.S3TransferError: Could not upload file" AND NOT container_name:/.*graylog_graylog.*/
85-
query_parameters: []
86-
search_within_ms: 600000
87-
execute_every_ms: 600000
88-
group_by: []
89-
series: []
90-
conditions: {}
91-
type: aggregation-v1
92-
field_spec:
93-
source:
94-
data_type: string
95-
providers:
96-
- type: template-v1
97-
template: "${source.source}"
98-
require_values: false
99-
container_name:
100-
data_type: string
101-
providers:
102-
- type: template-v1
103-
template: "${source.container_name}"
104-
require_values: false
105-
full_message:
106-
data_type: string
107-
providers:
108-
- type: template-v1
109-
template: "${source.full_message}"
110-
key_spec:
111-
- source
112-
- container_name
113-
- full_message
114-
notification_settings:
115-
grace_period_ms: 0
116-
backlog_size: 99
117-
alert: true
118-
- title: "${MACHINE_FQDN}: Dynamic Sidecar failed to save - 2"
119-
description: "${MACHINE_FQDN}: Alert if Dynamic Sidecar failed to save - 2"
120-
priority: 2
121-
config:
122-
query: >
123-
"Could not contact dynamic-sidecar to save service" AND NOT container_name:/.*graylog_graylog.*/
124-
query_parameters: []
125-
search_within_ms: 60000
126-
execute_every_ms: 60000
127-
group_by: []
128-
series: []
129-
conditions: {}
130-
type: aggregation-v1
131-
field_spec:
132-
source:
133-
data_type: string
134-
providers:
135-
- type: template-v1
136-
template: "${source.source}"
137-
require_values: false
138-
container_name:
139-
data_type: string
140-
providers:
141-
- type: template-v1
142-
template: "${source.container_name}"
143-
require_values: false
144-
full_message:
145-
data_type: string
146-
providers:
147-
- type: template-v1
148-
template: "${source.full_message}"
149-
key_spec:
150-
- source
151-
- container_name
152-
- full_message
153-
notification_settings:
154-
grace_period_ms: 0
155-
backlog_size: 99
156-
alert: true
157-
- title: "${MACHINE_FQDN}: simcore-agent failed pushing docker volume data to backup S3 bucket"
158-
description: "${MACHINE_FQDN}: simcore-agent failed pushing docker volume data to backup S3 bucket"
159-
priority: 2
160-
config:
161-
query: >
162-
container_name: /.*agent.*/ AND "Shell subprocesses yielded nonzero error code" AND NOT container_name:/.*graylog_graylog.*/
163-
query_parameters: []
164-
search_within_ms: 600000
165-
execute_every_ms: 600000
166-
group_by: []
167-
series: []
168-
conditions: {}
169-
type: aggregation-v1
170-
field_spec:
171-
source:
172-
data_type: string
173-
providers:
174-
- type: template-v1
175-
template: "${source.source}"
176-
require_values: false
177-
container_name:
178-
data_type: string
179-
providers:
180-
- type: template-v1
181-
template: "${source.container_name}"
182-
require_values: false
183-
full_message:
184-
data_type: string
185-
providers:
186-
- type: template-v1
187-
template: "${source.full_message}"
188-
key_spec:
189-
- source
190-
- container_name
191-
- full_message
192-
notification_settings:
193-
grace_period_ms: 0
194-
backlog_size: 99
195-
alert: true
196-
- title: "${MACHINE_FQDN}: faulty env-var setup"
197-
description: "${MACHINE_FQDN}: Look e.g. here https://git.speag.com/oSparc/osparc-ops-environments/-/issues/564"
198-
priority: 2
199-
config:
200-
query: >
201-
"unresolved, defaulting to None" AND NOT container_name:/.*graylog_graylog.*/
202-
query_parameters: []
203-
search_within_ms: 600000
204-
execute_every_ms: 600000
205-
group_by: []
206-
series: []
207-
conditions: {}
208-
type: aggregation-v1
209-
field_spec:
210-
source:
211-
data_type: string
212-
providers:
213-
- type: template-v1
214-
template: "${source.source}"
215-
require_values: false
216-
container_name:
217-
data_type: string
218-
providers:
219-
- type: template-v1
220-
template: "${source.container_name}"
221-
require_values: false
222-
full_message:
223-
data_type: string
224-
providers:
225-
- type: template-v1
226-
template: "${source.full_message}"
227-
key_spec:
228-
- source
229-
- container_name
230-
- full_message
231-
notification_settings:
232-
grace_period_ms: 0
233-
backlog_size: 99
234-
alert: true
2351
- title: "${MACHINE_FQDN}: DOCKER IP POOL EXHAUSTED, no service can start"
2362
description: "${MACHINE_FQDN}: DOCKER IP POOL EXHAUSTED, no service can start. See: https://github.com/moby/moby/issues/30820"
2373
priority: 3
@@ -240,6 +6,7 @@
2406
container_name: /.*director-v2.*/ AND "could not find an available, non-overlapping IPv4 address pool among the defaults to assign to the network" AND NOT container_name:/.*graylog_graylog.*/
2417
query_parameters: []
2428
search_within_ms: 600000
9+
event_limit: 1
24310
execute_every_ms: 600000
24411
group_by: []
24512
series: []
@@ -271,82 +38,6 @@
27138
grace_period_ms: 0
27239
backlog_size: 99
27340
alert: true
274-
- title: "${MACHINE_FQDN}: Potential hanging dy-sidecar service detected."
275-
description: "${MACHINE_FQDN}: Potential hanging dy-sidecar service detected. Human intervention required. Please investigate."
276-
priority: 3
277-
config:
278-
query: >
279-
"waiting for manual intervention" AND container_name:/.*director-v2.*/
280-
query_parameters: []
281-
search_within_ms: 3600000
282-
execute_every_ms: 3600000
283-
group_by: []
284-
series: []
285-
conditions: {}
286-
type: aggregation-v1
287-
field_spec:
288-
source:
289-
data_type: string
290-
providers:
291-
- type: template-v1
292-
template: "${source.source}"
293-
require_values: false
294-
container_name:
295-
data_type: string
296-
providers:
297-
- type: template-v1
298-
template: "${source.container_name}"
299-
require_values: false
300-
full_message:
301-
data_type: string
302-
providers:
303-
- type: template-v1
304-
template: "${source.full_message}"
305-
key_spec:
306-
- source
307-
- container_name
308-
notification_settings:
309-
grace_period_ms: 0
310-
backlog_size: 99
311-
alert: true
312-
- title: "${MACHINE_FQDN}: Syslogs indicate OOM-Event"
313-
description: "${MACHINE_FQDN}: Likely the oom-killer has reaped a container Please investigate and adjust service limitations."
314-
priority: 2
315-
config:
316-
query: >
317-
"Memory cgroup out of memory:"
318-
query_parameters: []
319-
search_within_ms: 3600000
320-
execute_every_ms: 3600000
321-
group_by: []
322-
series: []
323-
conditions: {}
324-
type: aggregation-v1
325-
field_spec:
326-
source:
327-
data_type: string
328-
providers:
329-
- type: template-v1
330-
template: "${source.source}"
331-
require_values: false
332-
container_name:
333-
data_type: string
334-
providers:
335-
- type: template-v1
336-
template: "${source.container_name}"
337-
require_values: false
338-
full_message:
339-
data_type: string
340-
providers:
341-
- type: template-v1
342-
template: "${source.full_message}"
343-
key_spec:
344-
- source
345-
- container_name
346-
notification_settings:
347-
grace_period_ms: 0
348-
backlog_size: 99
349-
alert: true
35041
- title: "${MACHINE_FQDN}: Unexpected error with redis lock detected"
35142
description: "${MACHINE_FQDN}: This error should only occur in unit tests due to very low timings, maybe something happene here"
35243
priority: 2
@@ -355,6 +46,7 @@
35546
"lock is no longer owned. This is unexpected and requires investigation" AND NOT container_name:/.*graylog_graylog.*/
35647
query_parameters: []
35748
search_within_ms: 3600000
49+
event_limit: 1
35850
execute_every_ms: 3600000
35951
group_by: []
36052
series: []
@@ -393,6 +85,7 @@
39385
"LockNotOwnedError" AND NOT container_name:/.*graylog_graylog.*/
39486
query_parameters: []
39587
search_within_ms: 3600000
88+
event_limit: 1
39689
execute_every_ms: 3600000
39790
group_by: []
39891
series: []
@@ -432,6 +125,7 @@
432125
query_parameters: []
433126
search_within_ms: 86400000
434127
execute_every_ms: 86400000
128+
event_limit: 1
435129
group_by: []
436130
series: []
437131
conditions: {}
@@ -463,6 +157,7 @@
463157
query: log_service:/.+payments/ AND (log_level:ERROR OR log_level:WARNING)
464158
query_parameters: []
465159
search_within_ms: 600000
160+
event_limit: 1
466161
execute_every_ms: 600000
467162
group_by: []
468163
series: []

0 commit comments

Comments
 (0)