Skip to content

Commit 1ee2c14

Browse files
committed
Add SkyPredictor project to E2E Testing alarm with baseline
1 parent 028757f commit 1ee2c14

File tree

15 files changed

+284
-64
lines changed

15 files changed

+284
-64
lines changed

oap-server/ai-pipeline/src/main/proto/baseline.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
*/
1818

1919
syntax = "proto3";
20-
package skywalking.baseline.v3;
20+
package skywalking.v3;
2121

2222
import "google/protobuf/empty.proto";
2323

test/e2e-v2/cases/alarm/alarm-cases.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,22 @@
1717
# service list
1818
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql service ls
1919
expected: expected/service.yml
20+
- query: |
21+
curl -s -XPOST http://${sender_host}:${sender_9093}/sendMetrics4Predict/7 > /dev/null;
22+
sleep 10;
23+
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=service_resp_time --service-name=e2e-test-dest-service
24+
expected: expected/metrics-has-value.yml
2025
# before silence alarm list WARNING,receivers=lisi
2126
- query: |
2227
sleep 5;
2328
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql alarm ls --tags level=WARNING,receivers=lisi
2429
expected: expected/silence-before-graphql-warn.yml
2530
# before silence alarm list level=CRITICAL,receivers=zhangsan
26-
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql alarm ls --tags level=CRITICAL,receivers=zhangsan
31+
- query: |
32+
sleep 10;
33+
curl -s -XPOST http://${sender_host}:${sender_9093}/sendMetrics4Predict/1 > /dev/null; # still trigger the service mesh traffic for keep it have traffic
34+
sleep 10;
35+
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql alarm ls --tags level=CRITICAL,receivers=zhangsan
2736
expected: expected/silence-before-graphql-critical.yml
2837
# query auto complete tag key and value
2938
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql alarm autocomplete-keys

test/e2e-v2/cases/alarm/alarm-settings.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ rules:
4444
level: CRITICAL
4545
receivers: zhangsan
4646
baseline_rule:
47-
expression: sum(service_resp_time > baseline(service_resp_time,upper)) >= 1
47+
expression: sum(service_resp_time > baseline(service_resp_time,lower)) >= 1
4848
period: 10
4949
message: Response time of service {name} is more than baseline in 1 minutes of last 10 minutes.
5050
tags:

test/e2e-v2/cases/alarm/banyandb/docker-compose.yml

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,12 @@ services:
3030
environment:
3131
SW_STORAGE: banyandb
3232
SW_SEARCHABLE_ALARM_TAG_KEYS: level,receivers
33-
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-server
33+
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-predictor
3434
ports:
3535
- 12800
3636
depends_on:
3737
banyandb:
3838
condition: service_healthy
39-
baseline-server:
40-
condition: service_healthy
4139
volumes:
4240
- ../alarm-settings.yml:/skywalking/config/alarm-settings.yml
4341

@@ -51,20 +49,48 @@ services:
5149
oap:
5250
condition: service_healthy
5351

54-
baseline-server:
55-
image: "eclipse-temurin:11-jre"
56-
volumes:
57-
- ../../../java-test-service/e2e-mock-baseline-server/target/e2e-mock-baseline-server-2.0.0.jar:/e2e-mock-baseline-server-2.0.0.jar
58-
command: [ "java", "-jar", "/e2e-mock-baseline-server-2.0.0.jar" ]
52+
baseline-predictor:
53+
extends:
54+
file: ../../../script/docker-compose/base-compose.yml
55+
service: baseline-predictor
5956
networks:
6057
- e2e
6158
ports:
6259
- 18080
60+
environment:
61+
BASELINE_FETCH_METRICS: service_resp_time,service_percentile
62+
BASELINE_FETCH_CRON: "* * * * *"
63+
BASELINE_FETCH_SERVER_ENDPOINT: http://oap:12800
64+
BASELINE_FETCH_SERVER_LAYERS: MESH
6365
healthcheck:
6466
test: ["CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/18080"]
6567
interval: 5s
6668
timeout: 60s
6769
retries: 120
70+
depends_on:
71+
oap:
72+
condition: service_healthy
73+
74+
sender:
75+
image: "eclipse-temurin:8-jre"
76+
volumes:
77+
- ./../../../java-test-service/e2e-mock-sender/target/e2e-mock-sender-2.0.0.jar:/e2e-mock-sender-2.0.0.jar
78+
command: [ "java", "-jar", "/e2e-mock-sender-2.0.0.jar" ]
79+
environment:
80+
OAP_HOST: oap
81+
OAP_GRPC_PORT: 11800
82+
networks:
83+
- e2e
84+
ports:
85+
- 9093
86+
healthcheck:
87+
test: [ "CMD", "sh", "-c", "nc -nz 127.0.0.1 9093" ]
88+
interval: 5s
89+
timeout: 60s
90+
retries: 120
91+
depends_on:
92+
oap:
93+
condition: service_healthy
6894

6995
networks:
7096
e2e:

test/e2e-v2/cases/alarm/es/docker-compose.yml

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,12 @@ services:
3737
environment:
3838
SW_STORAGE: elasticsearch
3939
SW_SEARCHABLE_ALARM_TAG_KEYS: level,receivers
40-
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-server
40+
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-predictor
4141
ports:
4242
- 12800
4343
depends_on:
4444
es:
4545
condition: service_healthy
46-
baseline-server:
47-
condition: service_healthy
4846
volumes:
4947
- ../alarm-settings.yml:/skywalking/config/alarm-settings.yml
5048

@@ -58,20 +56,47 @@ services:
5856
oap:
5957
condition: service_healthy
6058

61-
baseline-server:
62-
image: "eclipse-temurin:11-jre"
63-
volumes:
64-
- ../../../java-test-service/e2e-mock-baseline-server/target/e2e-mock-baseline-server-2.0.0.jar:/e2e-mock-baseline-server-2.0.0.jar
65-
command: [ "java", "-jar", "/e2e-mock-baseline-server-2.0.0.jar" ]
59+
baseline-predictor:
60+
extends:
61+
file: ../../../script/docker-compose/base-compose.yml
62+
service: baseline-predictor
6663
networks:
6764
- e2e
6865
ports:
6966
- 18080
67+
environment:
68+
BASELINE_FETCH_METRICS: service_resp_time,service_percentile
69+
BASELINE_FETCH_CRON: "* * * * *"
70+
BASELINE_FETCH_SERVER_ENDPOINT: http://oap:12800
71+
BASELINE_FETCH_SERVER_LAYERS: MESH
7072
healthcheck:
71-
test: ["CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/18080"]
73+
test: [ "CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/18080" ]
7274
interval: 5s
7375
timeout: 60s
7476
retries: 120
77+
depends_on:
78+
oap:
79+
condition: service_healthy
7580

81+
sender:
82+
image: "eclipse-temurin:8-jre"
83+
volumes:
84+
- ./../../../java-test-service/e2e-mock-sender/target/e2e-mock-sender-2.0.0.jar:/e2e-mock-sender-2.0.0.jar
85+
command: [ "java", "-jar", "/e2e-mock-sender-2.0.0.jar" ]
86+
environment:
87+
OAP_HOST: oap
88+
OAP_GRPC_PORT: 11800
89+
networks:
90+
- e2e
91+
ports:
92+
- 9093
93+
healthcheck:
94+
test: [ "CMD", "sh", "-c", "nc -nz 127.0.0.1 9093" ]
95+
interval: 5s
96+
timeout: 60s
97+
retries: 120
98+
depends_on:
99+
oap:
100+
condition: service_healthy
76101
networks:
77102
e2e:

test/e2e-v2/cases/alarm/es/es-sharding/docker-compose.yml

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,12 @@ services:
3838
SW_STORAGE: elasticsearch
3939
SW_SEARCHABLE_ALARM_TAG_KEYS: level,receivers
4040
SW_STORAGE_ES_LOGIC_SHARDING: "true"
41-
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-server
41+
SW_API_PIPELINE_BASELINE_SERVICE_HOST: baseline-predictor
4242
ports:
4343
- 12800
4444
depends_on:
4545
es:
4646
condition: service_healthy
47-
baseline-server:
48-
condition: service_healthy
4947
volumes:
5048
- ../../alarm-settings.yml:/skywalking/config/alarm-settings.yml
5149

@@ -59,20 +57,48 @@ services:
5957
oap:
6058
condition: service_healthy
6159

62-
baseline-server:
63-
image: "eclipse-temurin:11-jre"
64-
volumes:
65-
- ../../../../java-test-service/e2e-mock-baseline-server/target/e2e-mock-baseline-server-2.0.0.jar:/e2e-mock-baseline-server-2.0.0.jar
66-
command: [ "java", "-jar", "/e2e-mock-baseline-server-2.0.0.jar" ]
60+
baseline-predictor:
61+
extends:
62+
file: ../../../../script/docker-compose/base-compose.yml
63+
service: baseline-predictor
6764
networks:
6865
- e2e
6966
ports:
7067
- 18080
68+
environment:
69+
BASELINE_FETCH_METRICS: service_resp_time,service_percentile
70+
BASELINE_FETCH_CRON: "* * * * *"
71+
BASELINE_FETCH_SERVER_ENDPOINT: http://oap:12800
72+
BASELINE_FETCH_SERVER_LAYERS: MESH
7173
healthcheck:
72-
test: ["CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/18080"]
74+
test: [ "CMD", "bash", "-c", "cat < /dev/null > /dev/tcp/127.0.0.1/18080" ]
7375
interval: 5s
7476
timeout: 60s
7577
retries: 120
78+
depends_on:
79+
oap:
80+
condition: service_healthy
81+
82+
sender:
83+
image: "eclipse-temurin:8-jre"
84+
volumes:
85+
- ./../../../../java-test-service/e2e-mock-sender/target/e2e-mock-sender-2.0.0.jar:/e2e-mock-sender-2.0.0.jar
86+
command: [ "java", "-jar", "/e2e-mock-sender-2.0.0.jar" ]
87+
environment:
88+
OAP_HOST: oap
89+
OAP_GRPC_PORT: 11800
90+
networks:
91+
- e2e
92+
ports:
93+
- 9093
94+
healthcheck:
95+
test: [ "CMD", "sh", "-c", "nc -nz 127.0.0.1 9093" ]
96+
interval: 5s
97+
timeout: 60s
98+
retries: 120
99+
depends_on:
100+
oap:
101+
condition: service_healthy
76102

77103
networks:
78104
e2e:
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
debuggingtrace: null
17+
type: TIME_SERIES_VALUES
18+
results:
19+
{{- contains .results }}
20+
- metric:
21+
labels: []
22+
values:
23+
{{- contains .values }}
24+
- id: {{ notEmpty .id }}
25+
value: {{ notEmpty .value }}
26+
owner: null
27+
traceid: null
28+
{{- end}}
29+
{{- end}}
30+
error: null

test/e2e-v2/cases/alarm/expected/silence-after-graphql-critical.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ msgs:
6060
{{- end }}
6161
- starttime: {{ gt .starttime 0 }}
6262
scope: Service
63-
id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
64-
name: e2e-service-provider
65-
message: Response time of service e2e-service-provider is more than baseline in 1 minutes of last 10 minutes.
63+
id: ZTJlLXRlc3QtZGVzdC1zZXJ2aWNl.1
64+
name: e2e-test-dest-service
65+
message: Response time of service e2e-test-dest-service is more than baseline in 1 minutes of last 10 minutes.
6666
tags:
6767
- key: level
6868
value: CRITICAL
@@ -72,7 +72,7 @@ msgs:
7272
{{- contains .events }}
7373
- uuid: {{ notEmpty .uuid }}
7474
source:
75-
service: e2e-service-provider
75+
service: e2e-test-dest-service
7676
serviceinstance: ""
7777
endpoint: ""
7878
name: Alarm
@@ -81,10 +81,10 @@ msgs:
8181
parameters: []
8282
starttime: {{ gt .starttime 0 }}
8383
endtime: {{ gt .endtime 0 }}
84-
layer: GENERAL
84+
layer: MESH
8585
{{- end }}
8686
snapshot:
87-
expression: sum(service_resp_time > baseline(service_resp_time,upper)) >= 1
87+
expression: sum(service_resp_time > baseline(service_resp_time,lower)) >= 1
8888
metrics:
8989
{{- contains .snapshot.metrics }}
9090
- name: service_resp_time
@@ -100,7 +100,7 @@ msgs:
100100
traceid: null
101101
{{- end }}
102102
{{- end }}
103-
- name: baseline(service_resp_time,upper)
103+
- name: baseline(service_resp_time,lower)
104104
results:
105105
{{- contains .results }}
106106
- metric:
@@ -109,7 +109,7 @@ msgs:
109109
{{- contains .values }}
110110
- id: {{ notEmpty .id }}
111111
owner: null
112-
value: 10
112+
value: 1999
113113
traceid: null
114114
{{- end }}
115115
{{- end }}

test/e2e-v2/cases/alarm/expected/silence-before-graphql-critical.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ msgs:
6060
{{- end }}
6161
- starttime: {{ gt .starttime 0 }}
6262
scope: Service
63-
id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
64-
name: e2e-service-provider
65-
message: Response time of service e2e-service-provider is more than baseline in 1 minutes of last 10 minutes.
63+
id: ZTJlLXRlc3QtZGVzdC1zZXJ2aWNl.1
64+
name: e2e-test-dest-service
65+
message: Response time of service e2e-test-dest-service is more than baseline in 1 minutes of last 10 minutes.
6666
tags:
6767
- key: level
6868
value: CRITICAL
@@ -72,7 +72,7 @@ msgs:
7272
{{- contains .events }}
7373
- uuid: {{ notEmpty .uuid }}
7474
source:
75-
service: e2e-service-provider
75+
service: e2e-test-dest-service
7676
serviceinstance: ""
7777
endpoint: ""
7878
name: Alarm
@@ -81,10 +81,10 @@ msgs:
8181
parameters: []
8282
starttime: {{ gt .starttime 0 }}
8383
endtime: {{ gt .endtime 0 }}
84-
layer: GENERAL
84+
layer: MESH
8585
{{- end }}
8686
snapshot:
87-
expression: sum(service_resp_time > baseline(service_resp_time,upper)) >= 1
87+
expression: sum(service_resp_time > baseline(service_resp_time,lower)) >= 1
8888
metrics:
8989
{{- contains .snapshot.metrics }}
9090
- name: service_resp_time
@@ -100,7 +100,7 @@ msgs:
100100
traceid: null
101101
{{- end }}
102102
{{- end }}
103-
- name: baseline(service_resp_time,upper)
103+
- name: baseline(service_resp_time,lower)
104104
results:
105105
{{- contains .results }}
106106
- metric:
@@ -109,7 +109,7 @@ msgs:
109109
{{- contains .values }}
110110
- id: {{ notEmpty .id }}
111111
owner: null
112-
value: 10
112+
value: "1999"
113113
traceid: null
114114
{{- end }}
115115
{{- end }}

0 commit comments

Comments
 (0)