Skip to content

Commit bf665a0

Browse files
committed
standardize error logging for alerts
1 parent 98b386b commit bf665a0

12 files changed

+106
-82
lines changed

src/main/java/com/uid2/optout/delta/DeltaProductionOrchestrator.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ private boolean processWindow(SqsWindowReader.WindowReadResult windowResult,
185185
OptOutTrafficCalculator.TrafficStatus trafficStatus = this.trafficCalculator.calculateStatus(deltaMessages, queueAttributes, droppedMessages.size(), filteredAsTooRecentCount);
186186

187187
if (trafficStatus == OptOutTrafficCalculator.TrafficStatus.DELAYED_PROCESSING) {
188-
LOGGER.error("optout delta production has hit DELAYED_PROCESSING status, stopping production and setting manual override");
188+
LOGGER.error("circuit_breaker_triggered: traffic spike detected, stopping production and setting manual override");
189189
manualOverrideService.setDelayedProcessing();
190190
return true;
191191
}
@@ -262,11 +262,11 @@ private boolean isJobTimedOut(long jobStartTime) {
262262
long elapsedTime = OptOutUtils.nowEpochSeconds() - jobStartTime;
263263

264264
if (elapsedTime > 3600) { // 1 hour - log warning
265-
LOGGER.error("delta production job has been running for {} seconds", elapsedTime);
265+
LOGGER.error("delta_job_timeout: job has been running for {} seconds", elapsedTime);
266266
}
267267

268268
if (elapsedTime > this.jobTimeoutSeconds) {
269-
LOGGER.error("delta production job has been running for {} seconds (exceeds timeout of {}s)",
269+
LOGGER.error("delta_job_timeout: job exceeded timeout, running for {} seconds (timeout: {}s)",
270270
elapsedTime, this.jobTimeoutSeconds);
271271
return true;
272272
}

src/main/java/com/uid2/optout/delta/ManualOverrideService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public boolean setDelayedProcessing() {
6262
LOGGER.info("set manual override to DELAYED_PROCESSING: {}", overrideS3Path);
6363
return true;
6464
} catch (Exception e) {
65-
LOGGER.error("error setting manual override: {}", overrideS3Path, e);
65+
LOGGER.error("manual_override_error: failed to set override at {}", overrideS3Path, e);
6666
return false;
6767
}
6868
}
@@ -76,7 +76,7 @@ private String getOverrideValue() {
7676
JsonObject configJson = Utils.toJsonObject(inputStream);
7777
return configJson.getString(OVERRIDE_KEY, "");
7878
} catch (Exception e) {
79-
LOGGER.error("no manual override found: {}", overrideS3Path);
79+
LOGGER.error("manual_override_error: no manual override file found at {}", overrideS3Path);
8080
return "";
8181
}
8282
}

src/main/java/com/uid2/optout/delta/S3UploadService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public void uploadAndDeleteMessages(byte[] data, String s3Path, List<Message> me
7272
onSuccess.onSuccess(messages.size());
7373
}
7474
} catch (Exception e) {
75-
LOGGER.error("failed to upload to s3: path={}", s3Path, e);
75+
LOGGER.error("s3_error: failed to upload delta or dropped requests to path={}", s3Path, e);
7676
throw new IOException("s3 upload failed: " + s3Path, e);
7777
}
7878

src/main/java/com/uid2/optout/sqs/SqsBatchProcessor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ public BatchProcessingResult processBatch(List<Message> messageBatch, int batchN
8282
if (parsedBatch.size() < messageBatch.size()) {
8383
List<Message> invalidMessages = identifyInvalidMessages(messageBatch, parsedBatch);
8484
if (!invalidMessages.isEmpty()) {
85-
LOGGER.error("found {} invalid messages in batch {}, deleting", invalidMessages.size(), batchNumber);
85+
LOGGER.error("sqs_error: found {} invalid messages in batch {}, deleting", invalidMessages.size(), batchNumber);
8686
SqsMessageOperations.deleteMessagesFromSqs(this.sqsClient, this.queueUrl, invalidMessages);
8787
}
8888
}
8989

9090
// No valid messages after deleting corrupt ones, continue reading
9191
if (parsedBatch.isEmpty()) {
92-
LOGGER.error("no valid messages in batch {}", batchNumber);
92+
LOGGER.info("no valid messages in batch {} after removing invalid messages", batchNumber);
9393
return BatchProcessingResult.corruptMessagesDeleted();
9494
}
9595

src/main/java/com/uid2/optout/sqs/SqsMessageOperations.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ public static List<Message> receiveMessagesFromSqs(
136136
return response.messages();
137137

138138
} catch (Exception e) {
139-
LOGGER.error("error receiving messages", e);
139+
LOGGER.error("sqs_error: failed to receive messages", e);
140140
return new ArrayList<>();
141141
}
142142
}
@@ -174,7 +174,7 @@ public static void deleteMessagesFromSqs(SqsClient sqsClient, String queueUrl, L
174174
DeleteMessageBatchResponse deleteResponse = sqsClient.deleteMessageBatch(deleteRequest);
175175

176176
if (!deleteResponse.failed().isEmpty()) {
177-
LOGGER.error("failed to delete {} messages", deleteResponse.failed().size());
177+
LOGGER.error("sqs_error: failed to delete {} messages", deleteResponse.failed().size());
178178
} else {
179179
totalDeleted += entries.size();
180180
}
@@ -186,7 +186,7 @@ public static void deleteMessagesFromSqs(SqsClient sqsClient, String queueUrl, L
186186
LOGGER.info("deleted {} messages", totalDeleted);
187187

188188
} catch (Exception e) {
189-
LOGGER.error("error deleting messages", e);
189+
LOGGER.error("sqs_error: exception during message deletion", e);
190190
}
191191
}
192192
}

src/main/java/com/uid2/optout/sqs/SqsMessageParser.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,21 @@ public static List<SqsParsedMessage> parseAndSortMessages(List<Message> messages
4040
String phone = body.getString("phone");
4141

4242
if (identityHash == null || advertisingId == null) {
43-
LOGGER.error("invalid message format: {}", message.body());
43+
LOGGER.error("sqs_error: invalid message format: {}", message.body());
4444
continue;
4545
}
4646

4747
byte[] hashBytes = OptOutUtils.base64StringTobyteArray(identityHash);
4848
byte[] idBytes = OptOutUtils.base64StringTobyteArray(advertisingId);
4949

5050
if (hashBytes == null || idBytes == null) {
51-
LOGGER.error("invalid base64 encoding");
51+
LOGGER.error("sqs_error: invalid base64 encoding");
5252
continue;
5353
}
5454

5555
parsedMessages.add(new SqsParsedMessage(message, hashBytes, idBytes, timestampSeconds, email, phone, clientIp, traceId));
5656
} catch (Exception e) {
57-
LOGGER.error("error parsing message", e);
57+
LOGGER.error("sqs_error: error parsing message", e);
5858
}
5959
}
6060

@@ -73,7 +73,7 @@ public static List<SqsParsedMessage> parseAndSortMessages(List<Message> messages
7373
private static long extractTimestamp(Message message) {
7474
String sentTimestampStr = message.attributes().get(MessageSystemAttributeName.SENT_TIMESTAMP);
7575
if (sentTimestampStr == null) {
76-
LOGGER.warn("message missing SentTimestamp, using current time");
76+
LOGGER.info("message missing SentTimestamp, using current time");
7777
return OptOutUtils.nowEpochSeconds();
7878
}
7979
return Long.parseLong(sentTimestampStr) / 1000; // ms to seconds

src/main/java/com/uid2/optout/sqs/SqsWindowReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public WindowReadResult readWindow() {
9696

9797
while (true) {
9898
if (windowMessages.size() >= maxMessagesPerWindow) {
99-
LOGGER.warn("message limit exceeded: {} messages >= limit {}", windowMessages.size(), maxMessagesPerWindow);
99+
LOGGER.warn("high_message_volume: message limit exceeded while reading window, {} messages >= limit {}", windowMessages.size(), maxMessagesPerWindow);
100100
return WindowReadResult.messageLimitExceeded(windowMessages, currentWindowStart, rawMessagesRead);
101101
}
102102

src/main/java/com/uid2/optout/traffic/OptOutTrafficCalculator.java

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,10 @@ public void reloadTrafficCalcConfig() throws MalformedTrafficCalcConfigException
139139
this.evaluationWindowSeconds, this.baselineTraffic, this.thresholdMultiplier, ranges.size());
140140

141141
} catch (MalformedTrafficCalcConfigException e) {
142-
LOGGER.warn("failed to load traffic calc config, config is malformed: {}", trafficCalcConfigPath, e);
142+
LOGGER.error("circuit_breaker_config_error: config is malformed, configPath={}", trafficCalcConfigPath, e);
143143
throw e;
144144
} catch (Exception e) {
145-
LOGGER.warn("failed to load traffic calc config, config is malformed or missing: {}", trafficCalcConfigPath, e);
145+
LOGGER.error("circuit_breaker_config_error: config is malformed or missing, configPath={}", trafficCalcConfigPath, e);
146146
throw new MalformedTrafficCalcConfigException("failed to load traffic calc config: " + e.getMessage());
147147
}
148148
}
@@ -163,12 +163,12 @@ List<List<Long>> parseAllowlistRanges(JsonObject config) throws MalformedTraffic
163163
long end = rangeArray.getLong(1);
164164

165165
if(start >= end) {
166-
LOGGER.error("invalid allowlist range: start must be less than end: [{}, {}]", start, end);
166+
LOGGER.error("circuit_breaker_config_error: allowlist range start must be less than end, range=[{}, {}]", start, end);
167167
throw new MalformedTrafficCalcConfigException("invalid allowlist range at index " + i + ": start must be less than end");
168168
}
169169

170170
if (end - start > 86400) {
171-
LOGGER.error("invalid allowlist range: range must be less than 24 hours: [{}, {}]", start, end);
171+
LOGGER.error("circuit_breaker_config_error: allowlist range must be less than 24 hours, range=[{}, {}]", start, end);
172172
throw new MalformedTrafficCalcConfigException("invalid allowlist range at index " + i + ": range must be less than 24 hours");
173173
}
174174

@@ -186,7 +186,7 @@ List<List<Long>> parseAllowlistRanges(JsonObject config) throws MalformedTraffic
186186
long currentEnd = ranges.get(i).get(1);
187187
long nextStart = ranges.get(i + 1).get(0);
188188
if (currentEnd >= nextStart) {
189-
LOGGER.error("overlapping allowlist ranges detected: [{}, {}] overlaps with [{}, {}]",
189+
LOGGER.error("circuit_breaker_config_error: overlapping allowlist ranges, range=[{}, {}] overlaps with range=[{}, {}]",
190190
ranges.get(i).get(0), currentEnd, nextStart, ranges.get(i + 1).get(1));
191191
throw new MalformedTrafficCalcConfigException(
192192
"overlapping allowlist ranges detected at indices " + i + " and " + (i + 1));
@@ -196,7 +196,7 @@ List<List<Long>> parseAllowlistRanges(JsonObject config) throws MalformedTraffic
196196
} catch (MalformedTrafficCalcConfigException e) {
197197
throw e;
198198
} catch (Exception e) {
199-
LOGGER.error("failed to parse allowlist ranges", e);
199+
LOGGER.error("circuit_breaker_config_error: failed to parse allowlist ranges", e);
200200
throw new MalformedTrafficCalcConfigException("failed to parse allowlist ranges: " + e.getMessage());
201201
}
202202

@@ -227,8 +227,8 @@ public TrafficStatus calculateStatus(List<Message> sqsMessages, SqsMessageOperat
227227
List<String> deltaS3Paths = listDeltaFiles();
228228

229229
if (deltaS3Paths.isEmpty()) {
230-
LOGGER.warn("no delta files found in s3 with prefix: {}", s3DeltaPrefix);
231-
return TrafficStatus.DEFAULT;
230+
LOGGER.error("s3_error: no delta files found in s3 at prefix={}", s3DeltaPrefix);
231+
throw new RuntimeException("no delta files found in s3 at prefix=" + s3DeltaPrefix);
232232
}
233233

234234
// Find newest delta file timestamp for delta traffic window
@@ -326,8 +326,8 @@ public TrafficStatus calculateStatus(List<Message> sqsMessages, SqsMessageOperat
326326
return status;
327327

328328
} catch (Exception e) {
329-
LOGGER.error("error calculating traffic status", e);
330-
return TrafficStatus.DEFAULT;
329+
LOGGER.error("delta_job_failed: error calculating traffic status", e);
330+
throw new RuntimeException("error calculating traffic status", e);
331331
}
332332
}
333333

@@ -345,7 +345,7 @@ private long findNewestDeltaTimestamp(List<String> deltaS3Paths) throws IOExcept
345345
List<Long> timestamps = getTimestampsFromFile(newestDeltaPath);
346346

347347
if (timestamps.isEmpty()) {
348-
LOGGER.warn("newest delta file has no timestamps: {}", newestDeltaPath);
348+
LOGGER.error("s3_error: newest delta file has no timestamps, path={}", newestDeltaPath);
349349
return System.currentTimeMillis() / 1000;
350350
}
351351

@@ -372,7 +372,7 @@ private List<String> listDeltaFiles() {
372372
return deltaFiles;
373373

374374
} catch (Exception e) {
375-
LOGGER.error("failed to list delta files from s3 with prefix: {}", s3DeltaPrefix, e);
375+
LOGGER.error("s3_error: failed to list delta files at prefix={}", s3DeltaPrefix, e);
376376
return Collections.emptyList();
377377
}
378378
}
@@ -429,7 +429,7 @@ private List<Long> readTimestampsFromS3(String s3Path) throws IOException {
429429

430430
return timestamps;
431431
} catch (Exception e) {
432-
LOGGER.error("failed to read delta file from s3: {}", s3Path, e);
432+
LOGGER.error("s3_error: failed to read delta file at path={}", s3Path, e);
433433
throw new IOException("failed to read delta file from s3: " + s3Path, e);
434434
}
435435
}
@@ -512,7 +512,7 @@ private Long extractTimestampFromMessage(Message msg) {
512512
try {
513513
return Long.parseLong(sentTimestamp) / 1000; // Convert ms to seconds
514514
} catch (NumberFormatException e) {
515-
LOGGER.warn("invalid sentTimestamp: {}", sentTimestamp);
515+
LOGGER.error("sqs_error: invalid sentTimestamp, messageId={}, sentTimestamp={}", msg.messageId(), sentTimestamp);
516516
}
517517
}
518518

@@ -590,23 +590,38 @@ private void evictOldCacheEntries(long cutoffTimestamp) {
590590
}
591591

592592
/**
593-
* Determine traffic status based on current vs past counts
593+
* Determine traffic status based on current vs baseline traffic.
594+
* Logs warnings at 50%, 75%, and 90% of the circuit breaker threshold.
594595
*/
595596
TrafficStatus determineStatus(int sumCurrent, int baselineTraffic) {
596597
if (baselineTraffic == 0 || thresholdMultiplier == 0) {
597-
// Avoid division by zero - if no baseline traffic, return DEFAULT status
598-
LOGGER.warn("baselineTraffic is 0 or thresholdMultiplier is 0, returning default status");
599-
return TrafficStatus.DEFAULT;
598+
LOGGER.error("circuit_breaker_config_error: baselineTraffic is 0 or thresholdMultiplier is 0");
599+
throw new RuntimeException("invalid circuit breaker config: baselineTraffic=" + baselineTraffic + ", thresholdMultiplier=" + thresholdMultiplier);
600600
}
601601

602-
if (sumCurrent >= thresholdMultiplier * baselineTraffic) {
603-
LOGGER.error("delayed_processing threshold breached: sumCurrent={}, thresholdMultiplier={}, baselineTraffic={}",
604-
sumCurrent, thresholdMultiplier, baselineTraffic);
602+
int threshold = thresholdMultiplier * baselineTraffic;
603+
double thresholdPercent = (double) sumCurrent / threshold * 100;
604+
605+
// Log warnings at increasing thresholds before circuit breaker triggers
606+
if (thresholdPercent >= 90.0) {
607+
LOGGER.warn("high_message_volume: 90% of threshold reached, sumCurrent={}, threshold={} ({}x{}), thresholdPercent={}%",
608+
sumCurrent, threshold, thresholdMultiplier, baselineTraffic, String.format("%.1f", thresholdPercent));
609+
} else if (thresholdPercent >= 75.0) {
610+
LOGGER.warn("high_message_volume: 75% of threshold reached, sumCurrent={}, threshold={} ({}x{}), thresholdPercent={}%",
611+
sumCurrent, threshold, thresholdMultiplier, baselineTraffic, String.format("%.1f", thresholdPercent));
612+
} else if (thresholdPercent >= 50.0) {
613+
LOGGER.warn("high_message_volume: 50% of threshold reached, sumCurrent={}, threshold={} ({}x{}), thresholdPercent={}%",
614+
sumCurrent, threshold, thresholdMultiplier, baselineTraffic, String.format("%.1f", thresholdPercent));
615+
}
616+
617+
if (sumCurrent >= threshold) {
618+
LOGGER.error("circuit_breaker_triggered: traffic threshold breached, sumCurrent={}, threshold={} ({}x{})",
619+
sumCurrent, threshold, thresholdMultiplier, baselineTraffic);
605620
return TrafficStatus.DELAYED_PROCESSING;
606621
}
607622

608-
LOGGER.info("traffic within normal range: sumCurrent={}, thresholdMultiplier={}, baselineTraffic={}",
609-
sumCurrent, thresholdMultiplier, baselineTraffic);
623+
LOGGER.info("traffic within normal range: sumCurrent={}, threshold={} ({}x{}), thresholdPercent={}%",
624+
sumCurrent, threshold, thresholdMultiplier, baselineTraffic, String.format("%.1f", thresholdPercent));
610625
return TrafficStatus.DEFAULT;
611626
}
612627

0 commit comments

Comments
 (0)