Skip to content

Commit e96583d

Browse files
authored
OAP Self Observability: make Trace analysis metrics separate by label protocol, add Zipkin span dropped metrics. (#13531)
1 parent 020bd48 commit e96583d

File tree

8 files changed

+85
-20
lines changed

8 files changed

+85
-20
lines changed

docs/en/changes/changes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
* Aggregate TopN Slow SQL by service dimension.
105105
* BanyanDB: support add group prefix (namespace) for BanyanDB groups.
106106
* BanyanDB: fix when setting `@BanyanDB.TimestampColumn`, the column should not be indexed.
107+
* OAP Self Observability: make Trace analysis metrics separate by label `protocol`, add Zipkin span dropped metrics.
107108

108109
#### UI
109110

oap-server/server-starter/src/main/resources/otel-rules/oap.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@ metricsRules:
5151
.tag({tags -> if (tags['gc'] == 'PS Scavenge' || tags['gc'] == 'Copy' || tags['gc'] == 'ParNew' || tags['gc'] == 'G1 Young Generation') {tags.gc = 'young_gc_time'} })
5252
.tag({tags -> if (tags['gc'] == 'PS MarkSweep' || tags['gc'] == 'MarkSweepCompact' || tags['gc'] == 'ConcurrentMarkSweep' || tags['gc'] == 'G1 Old Generation') {tags.gc = 'old_gc_time'} })
5353
- name: instance_trace_count
54-
exp: trace_in_latency_count.sum(['service', 'host_name']).increase('PT1M')
54+
exp: trace_in_latency_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
5555
- name: instance_trace_latency_percentile
56-
exp: trace_in_latency.sum(['le', 'service', 'host_name']).increase('PT1M').histogram().histogram_percentile([50,70,90,99])
56+
exp: trace_in_latency.sum(['le', 'service', 'host_name', 'protocol']).increase('PT1M').histogram().histogram_percentile([50,70,90,99])
5757
- name: instance_trace_analysis_error_count
58-
exp: trace_analysis_error_count.sum(['service', 'host_name']).increase('PT1M')
58+
exp: trace_analysis_error_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
59+
- name: instance_spans_dropped_count
60+
exp: spans_dropped_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
5961
- name: instance_mesh_count
6062
exp: mesh_analysis_latency_count.sum(['service', 'host_name']).increase('PT1M')
6163
- name: instance_mesh_latency_percentile

oap-server/server-starter/src/main/resources/ui-initialized-templates/general/general-root.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@
158158
"x": 0,
159159
"y": 0,
160160
"w": 24,
161-
"h": 49,
161+
"h": 48,
162162
"i": "0",
163163
"type": "Trace"
164164
}

oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@
649649
]
650650
},
651651
{
652-
"x": 12,
652+
"x": 18,
653653
"y": 0,
654654
"w": 6,
655655
"h": 13,
@@ -676,8 +676,8 @@
676676
]
677677
},
678678
{
679-
"x": 18,
680-
"y": 0,
679+
"x": 0,
680+
"y": 13,
681681
"w": 6,
682682
"h": 13,
683683
"i": "12",
@@ -698,7 +698,7 @@
698698
]
699699
},
700700
{
701-
"x": 0,
701+
"x": 6,
702702
"y": 13,
703703
"w": 6,
704704
"h": 13,
@@ -725,7 +725,7 @@
725725
]
726726
},
727727
{
728-
"x": 12,
728+
"x": 18,
729729
"y": 13,
730730
"w": 6,
731731
"h": 13,
@@ -747,7 +747,7 @@
747747
]
748748
},
749749
{
750-
"x": 6,
750+
"x": 0,
751751
"y": 26,
752752
"w": 6,
753753
"h": 13,
@@ -774,8 +774,8 @@
774774
]
775775
},
776776
{
777-
"x": 0,
778-
"y": 39,
777+
"x": 12,
778+
"y": 26,
779779
"w": 6,
780780
"h": 13,
781781
"i": "20",
@@ -796,8 +796,8 @@
796796
}
797797
},
798798
{
799-
"x": 12,
800-
"y": 26,
799+
"x": 0,
800+
"y": 39,
801801
"w": 6,
802802
"h": 13,
803803
"i": "21",
@@ -823,7 +823,7 @@
823823
]
824824
},
825825
{
826-
"x": 0,
826+
"x": 6,
827827
"y": 26,
828828
"w": 6,
829829
"h": 13,
@@ -867,8 +867,8 @@
867867
}
868868
},
869869
{
870-
"x": 18,
871-
"y": 13,
870+
"x": 6,
871+
"y": 39,
872872
"w": 6,
873873
"h": 13,
874874
"i": "24",
@@ -889,7 +889,7 @@
889889
}
890890
},
891891
{
892-
"x": 6,
892+
"x": 12,
893893
"y": 13,
894894
"w": 6,
895895
"h": 13,
@@ -906,6 +906,28 @@
906906
"meter_oap_instance_k8s_als_streams",
907907
"meter_oap_instance_k8s_als_error_streams"
908908
]
909+
},
910+
{
911+
"x": 12,
912+
"y": 0,
913+
"w": 6,
914+
"h": 13,
915+
"i": "26",
916+
"type": "Widget",
917+
"widget": {
918+
"title": "Zipkin Span Dropped Count (Per Minute)"
919+
},
920+
"graph": {
921+
"type": "Line",
922+
"step": false,
923+
"smooth": false,
924+
"showSymbol": true,
925+
"showXAxis": true,
926+
"showYAxis": true
927+
},
928+
"expressions": [
929+
"meter_oap_instance_spans_dropped_count"
930+
]
909931
}
910932
]
911933
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
debuggingtrace: null
17+
type: TIME_SERIES_VALUES
18+
results:
19+
{{- contains .results }}
20+
- metric:
21+
labels:
22+
- key: protocol
23+
value: grpc
24+
values:
25+
{{- contains .values }}
26+
- id: {{ notEmpty .id }}
27+
value: {{ .value }}
28+
owner: null
29+
traceid: null
30+
- id: {{ notEmpty .id }}
31+
value: null
32+
owner: null
33+
traceid: null
34+
{{- end}}
35+
{{- end}}
36+
error: null

test/e2e-v2/cases/so11y/expected/metrics-has-value-percentile.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ results:
2121
labels:
2222
- key: p
2323
value: "90"
24+
- key: protocol
25+
value: grpc
2426
values:
2527
{{- contains .values }}
2628
- id: {{ notEmpty .id }}
@@ -36,6 +38,8 @@ results:
3638
labels:
3739
- key: p
3840
value: "99"
41+
- key: protocol
42+
value: grpc
3943
values:
4044
{{- contains .values }}
4145
- id: {{ notEmpty .id }}

test/e2e-v2/cases/so11y/so11y-cases.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_jvm_memory_bytes_used --instance-name=http://localhost:1234 --service-name=oap-server
3030
expected: expected/metrics-has-memory-value-label.yml
3131
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_trace_count --instance-name=http://localhost:1234 --service-name=oap-server
32-
expected: expected/metrics-has-value.yml
32+
expected: expected/metrics-has-value-label-trace.yml
3333
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression="meter_oap_instance_metrics_aggregation{level='L1 aggregation'}" --instance-name=http://localhost:1234 --service-name=oap-server
3434
expected: expected/metrics-has-value-label.yml
3535
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_persistence_prepare_count --instance-name=http://localhost:1234 --service-name=oap-server

test/e2e-v2/script/env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ SW_AGENT_CLIENT_JS_COMMIT=af0565a67d382b683c1dbd94c379b7080db61449
2323
SW_AGENT_CLIENT_JS_TEST_COMMIT=4f1eb1dcdbde3ec4a38534bf01dded4ab5d2f016
2424
SW_KUBERNETES_COMMIT_SHA=6fe5e6f0d3b7686c6be0457733e825ee68cb9b35
2525
SW_ROVER_COMMIT=79292fe07f17f98f486e0c4471213e1961fb2d1d
26-
SW_BANYANDB_COMMIT=ac67a23cdbf9c36cd679699c1d7957724c83cce0
26+
SW_BANYANDB_COMMIT=a3fc5bc16e8c9c3385beb41dcef8b988314ff58d
2727
SW_AGENT_PHP_COMMIT=d1114e7be5d89881eec76e5b56e69ff844691e35
2828
SW_PREDICTOR_COMMIT=54a0197654a3781a6f73ce35146c712af297c994
2929

0 commit comments

Comments
 (0)