Skip to content

Commit d98f74d

Browse files
authored
Add more clear logs when oap-cluster-internal data format is inconcsistent (#13059)
1 parent 0f2ef5a commit d98f74d

File tree

2 files changed

+25
-15
lines changed

2 files changed

+25
-15
lines changed

docs/en/changes/changes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
* Add `Get Alarm Runtime Status` API.
7474
* Add `lock` when query the Alarm metrics window values.
7575
* Add a fail-safe mechanism to prevent traffic metrics inconsistent between in-memory and database server.
76+
* Add more clear logs when oap-cluster-internal data(metrics/traffic) format is inconsistent.
7677

7778
#### UI
7879

oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/remote/RemoteServiceHandler.java

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,24 +117,33 @@ public void onNext(RemoteMessage message) {
117117
String nextWorkerName = message.getNextWorkerName();
118118
RemoteData remoteData = message.getRemoteData();
119119

120-
try {
121-
RemoteHandleWorker handleWorker = workerInstanceGetter.get(nextWorkerName);
122-
if (handleWorker != null) {
123-
AbstractWorker nextWorker = handleWorker.getWorker();
124-
StreamData streamData = handleWorker.getStreamDataClass().newInstance();
120+
RemoteHandleWorker handleWorker = workerInstanceGetter.get(nextWorkerName);
121+
if (handleWorker != null) {
122+
AbstractWorker nextWorker = handleWorker.getWorker();
123+
StreamData streamData;
124+
try {
125+
streamData = handleWorker.getStreamDataClass().newInstance();
126+
} catch (Throwable t) {
127+
remoteInErrorCounter.inc();
128+
LOGGER.error(t.getMessage(), t);
129+
return;
130+
}
131+
try {
125132
streamData.deserialize(remoteData);
126-
nextWorker.in(streamData);
127-
} else {
128-
remoteInTargetNotFoundCounter.inc();
129-
LOGGER.warn(
130-
"Work name [{}] not found. Check OAL script, make sure they are same in the whole cluster.",
131-
nextWorkerName
132-
);
133+
} catch (Throwable t) {
134+
remoteInErrorCounter.inc();
135+
LOGGER.error("Can't deserialize data {}, this data is discarded.", message, t);
136+
return;
133137
}
134-
} catch (Throwable t) {
135-
remoteInErrorCounter.inc();
136-
LOGGER.error(t.getMessage(), t);
138+
nextWorker.in(streamData);
139+
} else {
140+
remoteInTargetNotFoundCounter.inc();
141+
LOGGER.warn(
142+
"Data is discarded due to worker not found. Check OAL/MAL script, make sure they are aligned in the whole cluster. The data is {}",
143+
message
144+
);
137145
}
146+
138147
} finally {
139148
timer.finish();
140149
}

0 commit comments

Comments
 (0)