Skip to content

Commit 3e67db7

Browse files
authored
[Break Change]BanyanDB: Setup new Group policy. (#13267)
1 parent fd82275 commit 3e67db7

File tree

33 files changed

+920
-298
lines changed

33 files changed

+920
-298
lines changed

.github/workflows/skywalking.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,12 +1019,12 @@ jobs:
10191019
found=false
10201020
for i in {1..60}; do
10211021
# check if segment files exist
1022-
if docker exec $CONTAINER_ID sh -c '[ -n "$(ls /tmp/measure-data/measure/data/day/seg* 2>/dev/null)" ]'; then
1022+
if docker exec $CONTAINER_ID sh -c '[ -n "$(ls /tmp/measure-data/measure/data/metricsDay/seg* 2>/dev/null)" ]'; then
10231023
echo "✅ found segment files"
10241024
sleep 180
10251025
# create and copy files
10261026
docker cp $CONTAINER_ID:/tmp ${BANYANDB_DATA_GENERATE_ROOT}
1027-
docker cp $CONTAINER_ID:/tmp/measure-data/measure/data/index ${BANYANDB_DATA_GENERATE_ROOT}
1027+
docker cp $CONTAINER_ID:/tmp/measure-data/measure/data/metadata ${BANYANDB_DATA_GENERATE_ROOT}
10281028
found=true
10291029
break
10301030
else

docs/en/changes/changes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
* Adapt the mesh metrics if detect the ambient mesh in the eBPF access log receiver.
2323
* Add JSON format support for the `/debugging/config/dump` status API.
2424
* Enhance status APIs to support multiple `accept` header values, e.g. `Accept: application/json; charset=utf-8`.
25+
* Storage: separate `SpanAttachedEventRecord` for SkyWalking trace and Zipkin trace.
26+
* [Break Change]BanyanDB: Setup new Group policy.
2527

2628
#### UI
2729

docs/en/setup/backend/configuration-vocabulary.md

Lines changed: 136 additions & 88 deletions
Large diffs are not rendered by default.

docs/en/setup/backend/storages/banyandb.md

Lines changed: 77 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,6 @@ storage:
3232
Since 10.2.0, the banyandb configuration is separated to an independent configuration file: `bydb.yaml`:
3333

3434
```yaml
35-
# Licensed to the Apache Software Foundation (ASF) under one or more
36-
# contributor license agreements. See the NOTICE file distributed with
37-
# this work for additional information regarding copyright ownership.
38-
# The ASF licenses this file to You under the Apache License, Version 2.0
39-
# (the "License"); you may not use this file except in compliance with
40-
# the License. You may obtain a copy of the License at
41-
#
42-
# http://www.apache.org/licenses/LICENSE-2.0
43-
#
44-
# Unless required by applicable law or agreed to in writing, software
45-
# distributed under the License is distributed on an "AS IS" BASIS,
46-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
47-
# See the License for the specific language governing permissions and
48-
# limitations under the License.
49-
5035
global:
5136
# Targets is the list of BanyanDB servers, separated by commas.
5237
# Each target is a BanyanDB server in the format of `host:port`.
@@ -82,9 +67,15 @@ global:
8267

8368
groups:
8469
# The group settings of record.
70+
# - "ShardNum": Number of shards in the group. Shards are the basic units of data storage in BanyanDB. Data is distributed across shards based on the hash value of the series ID.
71+
# Refer to the [BanyanDB Shard](https://skywalking.apache.org/docs/skywalking-banyandb/latest/concept/clustering/#52-data-sharding) documentation for more details.
72+
# - "SIDays": Interval in days for creating a new segment. Segments are time-based, allowing efficient data retention and querying. `SI` stands for Segment Interval.
73+
# - "TTLDays": Time-to-live for the data in the group, in days. Data exceeding the TTL will be deleted.
8574
#
86-
# The "normal" section defines settings for datasets not specified in "super".
87-
# Each dataset will be grouped under a single group named "normal".
75+
# For more details on setting `segmentIntervalDays` and `ttlDays`, refer to the [BanyanDB TTL](https://skywalking.apache.org/docs/main/latest/en/banyandb/ttl) documentation.
76+
77+
# The "recordsNormal" section defines settings for datasets not specified in records.
78+
# Each dataset will be grouped under a single group named "recordsNormal".
8879
recordsNormal:
8980
# The settings for the default "hot" stage.
9081
shardNum: ${SW_STORAGE_BANYANDB_GR_NORMAL_SHARD_NUM:1}
@@ -108,24 +99,72 @@ groups:
10899
segmentInterval: ${SW_STORAGE_BANYANDB_GR_NORMAL_COLD_SI_DAYS:3}
109100
ttl: ${SW_STORAGE_BANYANDB_GR_NORMAL_COLD_TTL_DAYS:30}
110101
nodeSelector: ${SW_STORAGE_BANYANDB_GR_NORMAL_COLD_NODE_SELECTOR:"type=cold"}
111-
# "super" is a special dataset designed to store trace or log data that is too large for normal datasets.
112-
# Each super dataset will be a separate group in BanyanDB, following the settings defined in the "super" section.
113-
recordsSuper:
114-
shardNum: ${SW_STORAGE_BANYANDB_GR_SUPER_SHARD_NUM:2}
115-
segmentInterval: ${SW_STORAGE_BANYANDB_GR_SUPER_SI_DAYS:1}
116-
ttl: ${SW_STORAGE_BANYANDB_GR_SUPER_TTL_DAYS:3}
117-
enableWarmStage: ${SW_STORAGE_BANYANDB_GR_SUPER_ENABLE_WARM_STAGE:false}
118-
enableColdStage: ${SW_STORAGE_BANYANDB_GR_SUPER_ENABLE_COLD_STAGE:false}
102+
# The group settings of super datasets.
103+
# Super datasets are used to store trace or log data that is too large for normal datasets.
104+
recordsTrace:
105+
shardNum: ${SW_STORAGE_BANYANDB_GR_TRACE_SHARD_NUM:2}
106+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_TRACE_SI_DAYS:1}
107+
ttl: ${SW_STORAGE_BANYANDB_GR_TRACE_TTL_DAYS:3}
108+
enableWarmStage: ${SW_STORAGE_BANYANDB_GR_TRACE_ENABLE_WARM_STAGE:false}
109+
enableColdStage: ${SW_STORAGE_BANYANDB_GR_TRACE_ENABLE_COLD_STAGE:false}
110+
warm:
111+
shardNum: ${SW_STORAGE_BANYANDB_GR_TRACE_WARM_SHARD_NUM:2}
112+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_TRACE_WARM_SI_DAYS:1}
113+
ttl: ${SW_STORAGE_BANYANDB_GR_TRACE_WARM_TTL_DAYS:7}
114+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_TRACE_WARM_NODE_SELECTOR:"type=warm"}
115+
cold:
116+
shardNum: ${SW_STORAGE_BANYANDB_GR_TRACE_COLD_SHARD_NUM:2}
117+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_TRACE_COLD_SI_DAYS:1}
118+
ttl: ${SW_STORAGE_BANYANDB_GR_TRACE_COLD_TTL_DAYS:30}
119+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_TRACE_COLD_NODE_SELECTOR:"type=cold"}
120+
recordsZipkinTrace:
121+
shardNum: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_SHARD_NUM:2}
122+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_SI_DAYS:1}
123+
ttl: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_TTL_DAYS:3}
124+
enableWarmStage: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_ENABLE_WARM_STAGE:false}
125+
enableColdStage: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_ENABLE_COLD_STAGE:false}
126+
warm:
127+
shardNum: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_WARM_SHARD_NUM:2}
128+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_WARM_SI_DAYS:1}
129+
ttl: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_WARM_TTL_DAYS:7}
130+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_WARM_NODE_SELECTOR:"type=warm"}
131+
cold:
132+
shardNum: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_COLD_SHARD_NUM:2}
133+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_COLD_SI_DAYS:1}
134+
ttl: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_COLD_TTL_DAYS:30}
135+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_ZIPKIN_TRACE_COLD_NODE_SELECTOR:"type=cold"}
136+
recordsLog:
137+
shardNum: ${SW_STORAGE_BANYANDB_GR_LOG_SHARD_NUM:2}
138+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_LOG_SI_DAYS:1}
139+
ttl: ${SW_STORAGE_BANYANDB_GR_LOG_TTL_DAYS:3}
140+
enableWarmStage: ${SW_STORAGE_BANYANDB_GR_LOG_ENABLE_WARM_STAGE:false}
141+
enableColdStage: ${SW_STORAGE_BANYANDB_GR_LOG_ENABLE_COLD_STAGE:false}
142+
warm:
143+
shardNum: ${SW_STORAGE_BANYANDB_GR_LOG_WARM_SHARD_NUM:2}
144+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_LOG_WARM_SI_DAYS:1}
145+
ttl: ${SW_STORAGE_BANYANDB_GR_LOG_WARM_TTL_DAYS:7}
146+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_LOG_WARM_NODE_SELECTOR:"type=warm"}
147+
cold:
148+
shardNum: ${SW_STORAGE_BANYANDB_GR_LOG_COLD_SHARD_NUM:2}
149+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_LOG_COLD_SI_DAYS:1}
150+
ttl: ${SW_STORAGE_BANYANDB_GR_LOG_COLD_TTL_DAYS:30}
151+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_LOG_COLD_NODE_SELECTOR:"type=cold"}
152+
recordsBrowserErrorLog:
153+
shardNum: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_SHARD_NUM:2}
154+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_SI_DAYS:1}
155+
ttl: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_TTL_DAYS:3}
156+
enableWarmStage: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_ENABLE_WARM_STAGE:false}
157+
enableColdStage: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_ENABLE_COLD_STAGE:false}
119158
warm:
120-
shardNum: ${SW_STORAGE_BANYANDB_GR_SUPER_WARM_SHARD_NUM:2}
121-
segmentInterval: ${SW_STORAGE_BANYANDB_GR_SUPER_WARM_SI_DAYS:1}
122-
ttl: ${SW_STORAGE_BANYANDB_GR_SUPER_WARM_TTL_DAYS:7}
123-
nodeSelector: ${SW_STORAGE_BANYANDB_GR_SUPER_WARM_NODE_SELECTOR:"type=warm"}
159+
shardNum: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_WARM_SHARD_NUM:2}
160+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_WARM_SI_DAYS:1}
161+
ttl: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_WARM_TTL_DAYS:7}
162+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_WARM_NODE_SELECTOR:"type=warm"}
124163
cold:
125-
shardNum: ${SW_STORAGE_BANYANDB_GR_SUPER_COLD_SHARD_NUM:2}
126-
segmentInterval: ${SW_STORAGE_BANYANDB_GR_SUPER_COLD_SI_DAYS:1}
127-
ttl: ${SW_STORAGE_BANYANDB_GR_SUPER_COLD_TTL_DAYS:30}
128-
nodeSelector: ${SW_STORAGE_BANYANDB_GR_SUPER_COLD_NODE_SELECTOR:"type=cold"}
164+
shardNum: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_COLD_SHARD_NUM:2}
165+
segmentInterval: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_COLD_SI_DAYS:1}
166+
ttl: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_COLD_TTL_DAYS:30}
167+
nodeSelector: ${SW_STORAGE_BANYANDB_GR_BROWSER_ERROR_LOG_COLD_NODE_SELECTOR:"type=cold"}
129168
# The group settings of metrics.
130169
#
131170
# OAP stores metrics based its granularity.
@@ -180,14 +219,14 @@ groups:
180219
segmentInterval: ${SW_STORAGE_BANYANDB_GM_DAY_COLD_SI_DAYS:15}
181220
ttl: ${SW_STORAGE_BANYANDB_GM_DAY_COLD_TTL_DAYS:120}
182221
nodeSelector: ${SW_STORAGE_BANYANDB_GM_DAY_COLD_NODE_SELECTOR:"type=cold"}
183-
# If the metrics is marked as "index_mode", the metrics will be stored in the "index" group.
184-
# The "index" group is designed to store metrics that are used for indexing without value columns.
222+
# If the metrics is marked as "index_mode", the metrics will be stored in the "metadata" group.
223+
# The "metadata" group is designed to store metrics that are used for indexing without value columns.
185224
# Such as `service_traffic`, `network_address_alias`, etc.
186225
# "index_mode" requires BanyanDB *0.8.0* or later.
187226
metadata:
188-
shardNum: ${SW_STORAGE_BANYANDB_GM_INDEX_SHARD_NUM:2}
189-
segmentInterval: ${SW_STORAGE_BANYANDB_GM_INDEX_SI_DAYS:15}
190-
ttl: ${SW_STORAGE_BANYANDB_GM_INDEX_TTL_DAYS:15}
227+
shardNum: ${SW_STORAGE_BANYANDB_GM_METADATA_SHARD_NUM:2}
228+
segmentInterval: ${SW_STORAGE_BANYANDB_GM_METADATA_SI_DAYS:15}
229+
ttl: ${SW_STORAGE_BANYANDB_GM_METADATA_TTL_DAYS:15}
191230

192231
# The group settings of property, such as UI and profiling.
193232
property:
@@ -232,30 +271,4 @@ docker run -d \
232271
- **Cluster Mode**: Suitable for large-scale deployments.
233272
- **Configuration**: `targets` is the IP address/hostname and port of the `liaison` nodes, separated by commas. `Liaison` nodes are the entry points of the BanyanDB cluster.
234273

235-
### Group Settings
236-
237-
BanyanDB supports **group settings** to configure storage groups, shards, segment intervals, and TTL (Time-To-Live). The group settings file is a YAML file required when using BanyanDB as the storage.
238-
239-
#### Basic Group Settings
240-
241-
- `ShardNum`: Number of shards in the group. Shards are the basic units of data storage in BanyanDB. Data is distributed across shards based on the hash value of the series ID. Refer to the [BanyanDB Shard](https://skywalking.apache.org/docs/skywalking-banyandb/latest/concept/clustering/#52-data-sharding) documentation for more details.
242-
- `SIDays`: Interval in days for creating a new segment. Segments are time-based, allowing efficient data retention and querying. `SI` stands for Segment Interval.
243-
- `TTLDays`: Time-to-live for the data in the group, in days. Data exceeding the TTL will be deleted.
244-
245-
For more details on setting `segmentIntervalDays` and `ttlDays`, refer to the [BanyanDB TTL](../../../banyandb/ttl.md) documentation.
246-
247-
#### Record Group Settings
248-
249-
The `gr` prefix is used for record group settings. The `normal` and `super` sections are used to define settings for normal and super datasets, respectively.
250-
251-
Super datasets are used to store trace or log data that is too large for normal datasets. Each super dataset is stored in a separate group in BanyanDB. The settings defined in the `super` section are applied to all super datasets.
252-
253-
Normal datasets are stored in a single group named `normal`. The settings defined in the `normal` section are applied to all normal datasets.
254-
255-
#### Metrics Group Settings
256-
257-
The `gm` prefix is used for metrics group settings. The `minute`, `hour`, and `day` sections are used to define settings for metrics stored based on granularity.
258-
259-
The `index` group is designed to store metrics used for indexing without value columns. For example, `service_traffic`, `network_address_alias`, etc.
260-
261274
For more details, refer to the documentation of [BanyanDB](https://skywalking.apache.org/docs/skywalking-banyandb/latest/readme/) and the [BanyanDB Java Client](https://github.com/apache/skywalking-banyandb-java-client) subprojects.

docs/en/status/query_ttl_setup.md

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ This API is used to get the unified and effective TTL configurations.
1313
```shell
1414
> curl -X GET "http://127.0.0.1:12800/status/config/ttl"
1515
# Metrics TTL includes the definition of the TTL of the metrics-ish data in the storage,
16+
# Metrics TTL includes the definition of the TTL of the metrics-ish data in the storage,
1617
# e.g.
1718
# 1. The metadata of the service, instance, endpoint, topology map, etc.
1819
# 2. Generated metrics data from OAL and MAL engines.
@@ -34,12 +35,17 @@ metrics.day.cold=-1
3435
# Super dataset of records are traces and logs, which volume should be much larger.
3536
#
3637
# Cover hot and warm data for BanyanDB.
37-
records.default=3
38-
records.superDataset=3
38+
records.normal=3
39+
records.trace=10
40+
records.zipkinTrace=3
41+
records.log=3
42+
records.browserErrorLog=3
3943
# Cold data, '-1' represents no cold stage data.
40-
records.default.cold=-1
41-
records.superDataset.cold=-1
42-
44+
records.normal.cold=-1
45+
records.trace.cold=30
46+
records.zipkinTrace.cold=-1
47+
records.log.cold=-1
48+
records.browserErrorLog.cold=-1
4349
```
4450

4551
This API also provides the response in JSON format, which is more friendly for programmatic usage.
@@ -49,19 +55,25 @@ This API also provides the response in JSON format, which is more friendly for p
4955
-H "Accept: application/json"
5056

5157
{
52-
"metrics": {
53-
"minute": 7,
54-
"hour": 15,
55-
"day": 15,
56-
"coldMinute": -1,
57-
"coldHour": -1,
58-
"coldDay": -1
59-
},
60-
"records": {
61-
"default": 3,
62-
"superDataset": 3,
63-
"coldValue": -1,
64-
"coldSuperDataset": -1
65-
}
58+
"metrics": {
59+
"minute": 7,
60+
"hour": 15,
61+
"day": 15,
62+
"coldMinute": -1,
63+
"coldHour": -1,
64+
"coldDay": -1
65+
},
66+
"records": {
67+
"normal": 3,
68+
"trace": 10,
69+
"zipkinTrace": 3,
70+
"log": 3,
71+
"browserErrorLog": 3,
72+
"coldNormal": -1,
73+
"coldTrace": 30,
74+
"coldZipkinTrace": -1,
75+
"coldLog": -1,
76+
"coldBrowserErrorLog": -1
77+
}
6678
}
6779
```

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/manual/log/LogRecord.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
@Stream(name = LogRecord.INDEX_NAME, scopeId = DefaultScopeDefine.LOG, builder = LogRecord.Builder.class, processor = RecordStreamProcessor.class)
3737
@SQLDatabase.ExtraColumn4AdditionalEntity(additionalTable = AbstractLogRecord.ADDITIONAL_TAG_TABLE, parentColumn = TIME_BUCKET)
3838
@BanyanDB.TimestampColumn(AbstractLogRecord.TIMESTAMP)
39+
@BanyanDB.Group(streamGroup = BanyanDB.StreamGroup.RECORDS_LOG)
3940
public class LogRecord extends AbstractLogRecord {
4041

4142
public static final String INDEX_NAME = "log";

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/manual/segment/SegmentRecord.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
@Stream(name = SegmentRecord.INDEX_NAME, scopeId = DefaultScopeDefine.SEGMENT, builder = SegmentRecord.Builder.class, processor = RecordStreamProcessor.class)
4444
@SQLDatabase.ExtraColumn4AdditionalEntity(additionalTable = SegmentRecord.ADDITIONAL_TAG_TABLE, parentColumn = TIME_BUCKET)
4545
@BanyanDB.TimestampColumn(SegmentRecord.START_TIME)
46+
@BanyanDB.Group(streamGroup = BanyanDB.StreamGroup.RECORDS_TRACE)
4647
public class SegmentRecord extends Record {
4748

4849
public static final String INDEX_NAME = "segment";

0 commit comments

Comments
 (0)