|
46 | 46 | Nova will continue to place new VMs, but the placement will be less desirable. |
47 | 47 | Thus, no immediate action is needed. |
48 | 48 |
|
49 | | - - alert: CortexSyncNotSuccessful |
50 | | - expr: cortex_sync_request_processed_total{component=~"{{ $componentPrefix }}-.*", namespace="{{ $namespaceSelector }}" } - cortex_sync_request_duration_seconds_count{component=~"{{ $componentPrefix }}-.*", namespace="{{ $namespaceSelector }}" } > 0 |
51 | | - for: 5m |
52 | | - labels: |
53 | | - context: syncstatus |
54 | | - dashboard: cortex/cortex |
55 | | - service: cortex |
56 | | - severity: warning |
57 | | - support_group: workload-management |
58 | | - annotations: |
59 | | - summary: "`{{`{{$labels.component}}`}}` Sync not successful" |
60 | | - description: > |
61 | | - `{{`{{$labels.component}}`}}` experienced an issue syncing data from a datasource. This may |
62 | | - happen when the datasource (OpenStack, Prometheus, etc.) is down or |
63 | | - the sync module is misconfigured. No immediate action is needed, since |
64 | | - the sync module will retry the sync operation and the currently synced |
65 | | - data will be kept. However, when this problem persists for a longer |
66 | | - time the service will have a less recent view of the datacenter. |
67 | | -
|
68 | | - - alert: CortexSyncObjectsDroppedToZero |
69 | | - expr: cortex_sync_objects{component=~"{{ $componentPrefix }}-.*", datasource!="openstack_migrations", namespace="{{ $namespaceSelector }}" } == 0 |
70 | | - for: 60m |
71 | | - labels: |
72 | | - context: syncobjects |
73 | | - dashboard: cortex/cortex |
74 | | - service: cortex |
75 | | - severity: warning |
76 | | - support_group: workload-management |
77 | | - annotations: |
78 | | - summary: "`{{`{{$labels.component}}`}}` is not syncing any new data from `{{`{{$labels.datasource}}`}}`" |
79 | | - description: > |
80 | | - `{{`{{$labels.component}}`}}` is not syncing any objects from a datasource. This may happen |
81 | | - when the datasource (OpenStack, Prometheus, etc.) is down or the sync |
82 | | - module is misconfigured. No immediate action is needed, since the sync |
83 | | - module will retry the sync operation and the currently synced data will |
84 | | - be kept. However, when this problem persists for a longer time the |
85 | | - service will have a less recent view of the datacenter. |
86 | | -
|
87 | | - - alert: CortexSyncObjectsTooHigh |
88 | | - expr: cortex_sync_objects{component=~"{{ $componentPrefix }}-.*", namespace="{{ $namespaceSelector }}" } > 10000000 |
89 | | - for: 5m |
90 | | - labels: |
91 | | - context: syncobjects |
92 | | - dashboard: cortex/cortex |
93 | | - service: cortex |
94 | | - severity: warning |
95 | | - support_group: workload-management |
96 | | - annotations: |
97 | | - summary: "`{{`{{$labels.component}}`}}` is syncing unexpectedly many objects from `{{`{{$labels.datasource}}`}}`" |
98 | | - description: > |
99 | | - `{{`{{$labels.component}}`}}` is syncing more than 1 million objects from a datasource. This |
100 | | - may happen when the datasource (OpenStack, Prometheus, etc.) returns |
101 | | - unexpectedly many objects, or when the database cannot drop old objects. |
102 | | - No immediate action is needed, but should this condition persist for a |
103 | | - longer time, the database may fill up and crash. |
104 | | -
|
105 | 49 | - alert: CortexHighMemoryUsage |
106 | 50 | expr: process_resident_memory_bytes{component=~"{{ $componentPrefix }}-.*", namespace="{{ $namespaceSelector }}" } > 6000 * 1024 * 1024 |
107 | 51 | for: 5m |
|
0 commit comments