diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/GCLogParser.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/GCLogParser.java index 6dcd1078..8c864a0d 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/GCLogParser.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/GCLogParser.java @@ -30,11 +30,6 @@ public abstract class GCLogParser implements DataSourceParser, SharedPatterns { */ public static final String END_OF_DATA_SENTINEL = GCLogFile.END_OF_DATA_SENTINEL; - // TODO: GCID_COUNTER should be in SharedPatterns, not here. - /** - * Rule for parsing the GCID counter. - */ - public static final GCParseRule GCID_COUNTER = new GCParseRule("GCID_COUNTER", " GC\\((\\d+)\\) "); private JVMEventChannel consumer; protected Diary diary; private DateTimeStamp clock = new DateTimeStamp(DateTimeStamp.EPOC, 0.0d); @@ -246,9 +241,54 @@ MemoryPoolSummary extractPermGenRecord(GCLogTrace trace) { * @param line the line to parse. * @return the extracted GCID, or -1 if not found. */ - int extractGCID(String line) { - GCLogTrace trace = GCID_COUNTER.parse(line); - return (trace != null) ? trace.getIntegerGroup(1) : -1; + static int extractGCID(String line) { + long packed = extractGCCycleIdAndTextualLength(line); + if (packed == -1) { + return -1; + } + return extractGCCycleId(packed); + } + + /** + * Returns a packed long containing two ints: + * - the GC cycle id in the high bytes + * - the length of the text containing the GC cycle id,e.g. 'GC(10)' + * See {@link #extractGCCycleId(long)} and {@link #extractGCCycleIdTextualLength(long)} + */ + protected static long extractGCCycleIdAndTextualLength(String line) { + if (!line.contains("GC(")) { + return -1; + } + // [2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure) + // we search for the value between parenthesis + int start = line.indexOf('('); + int end = line.indexOf(')', start); + if (start == -1 || end == -1) { + return -1; + } + try { + int gcId = Integer.parseInt(line, start + 1, end, 10); + // add the closing parenthesis to the length + int endToReturn = end + 1; + // pack the two ints in a long + return (((long) gcId) << 32) | (endToReturn & 0xFFFFFFFFL); + } catch (Exception e) { + throw new RuntimeException("Failed to extract gc cycle id from " + line, e); + } + } + + protected static int extractGCCycleId(long packedGcCycleIdAndEnd) { + if (packedGcCycleIdAndEnd == -1) { + return -1; + } + return (int) (packedGcCycleIdAndEnd >> 32); + } + + protected static int extractGCCycleIdTextualLength(long packedGcCycleIdAndEnd) { + if (packedGcCycleIdAndEnd == -1) { + return 0; + } + return (int) packedGcCycleIdAndEnd; } /** @@ -281,4 +321,4 @@ public void publishTo(JVMEventChannel channel) { public ChannelName channel() { return ChannelName.DATA_SOURCE; } -} \ No newline at end of file +} diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedG1GCParser.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedG1GCParser.java index 015d2f89..e9ea1829 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedG1GCParser.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedG1GCParser.java @@ -4,11 +4,7 @@ import com.microsoft.gctoolkit.GCToolKit; import com.microsoft.gctoolkit.aggregator.EventSource; -import com.microsoft.gctoolkit.event.CPUSummary; -import com.microsoft.gctoolkit.event.GarbageCollectionTypes; -import com.microsoft.gctoolkit.event.MalformedEvent; -import com.microsoft.gctoolkit.event.MemoryPoolSummary; -import com.microsoft.gctoolkit.event.RegionSummary; +import com.microsoft.gctoolkit.event.*; import com.microsoft.gctoolkit.event.g1gc.G1ConcurrentUndoCycle; import com.microsoft.gctoolkit.event.g1gc.G1GCConcurrentEvent; import com.microsoft.gctoolkit.event.g1gc.G1GCEvent; @@ -24,7 +20,6 @@ import com.microsoft.gctoolkit.parser.unified.UnifiedG1GCPatterns; import com.microsoft.gctoolkit.time.DateTimeStamp; -import java.util.AbstractMap; import java.util.LinkedList; import java.util.Map; import java.util.Queue; @@ -33,8 +28,6 @@ import java.util.function.BiConsumer; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import static com.microsoft.gctoolkit.event.GarbageCollectionTypes.fromLabel; @@ -177,36 +170,38 @@ protected void process(String line) { parse(line); } - private static final Pattern gcIdPattern = GCLogParser.GCID_COUNTER.pattern(); - private void parse(String line) { // Minor optimization. The parse rule only applies to what comes after the GC ID. final int end; final int gcid; - final Matcher gcIdMatcher = gcIdPattern.matcher(line); - if (gcIdMatcher.find()) { - gcid = Integer.parseInt(gcIdMatcher.group(1)); - end = gcIdMatcher.end(); + long packedGcCycleIdAndEnd = extractGCCycleIdAndTextualLength(line); + if (packedGcCycleIdAndEnd != -1) { + gcid = extractGCCycleId(packedGcCycleIdAndEnd); + end = extractGCCycleIdTextualLength(packedGcCycleIdAndEnd); } else { gcid = -1; end = 0; } final String lineAfterGcId = line.substring(end); - parseRules.stream() - .map(Map.Entry::getKey) - .map(rule -> new AbstractMap.SimpleEntry<>(rule, rule.parse(lineAfterGcId))) - .filter(tuple -> tuple.getValue() != null) - .findAny() - .ifPresentOrElse( - tuple -> { - // Typically, "end" will be greater than zero, but not always. - setForwardReference(gcid, end > 0 ? line.substring(0, end) : line); - applyRule(tuple.getKey(), tuple.getValue(), line); - }, - () -> log(line) - ); + GCParseRule matchedParseRule = null; + GCLogTrace parsed = null; + for (Map.Entry> entry : parseRules.entrySet()) { + GCParseRule rule = entry.getKey(); + parsed = rule.parse(lineAfterGcId); + if (parsed != null) { + matchedParseRule = rule; + break; + } + } + if (parsed != null) { + // Typically, "end" will be greater than zero, but not always. + setForwardReference(gcid, end > 0 ? line.substring(0, end) : line); + applyRule(matchedParseRule, parsed, line); + } else { + log(line); + } } diff --git a/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedGenerationalParser.java b/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedGenerationalParser.java index 5ddd788a..b132651a 100644 --- a/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedGenerationalParser.java +++ b/parser/src/main/java/com/microsoft/gctoolkit/parser/UnifiedGenerationalParser.java @@ -170,11 +170,11 @@ private void youngHeader(GCLogTrace trace, String line) { if (pauseEvent != null) LOGGER.warning("Young pause event not recorded: " + pauseEvent.getGcID()); if (diary.isCMS()) - pauseEvent = new GenerationalForwardReference(ParNew, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(ParNew, new Decorators(line), extractGCID(line)); else if (diary.isPSYoung()) - pauseEvent = new GenerationalForwardReference(PSYoungGen, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(PSYoungGen, new Decorators(line), extractGCID(line)); else if (diary.isSerialFull()) - pauseEvent = new GenerationalForwardReference(DefNew, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(DefNew, new Decorators(line), extractGCID(line)); else { LOGGER.warning("Unrecognized collection phase -> " + line); return; @@ -261,7 +261,7 @@ private void tenuringAgeBreakout(GCLogTrace trace, String line) { private void initialMark(GCLogTrace trace, String line) { if (concurrentCyclePauseEvent != null) LOGGER.warning("Pause event not completely recorded: " + pauseEvent.getGcID()); - concurrentCyclePauseEvent = new GenerationalForwardReference(InitialMark, new Decorators(line), GCID_COUNTER.parse(line).getIntegerGroup(1)); + concurrentCyclePauseEvent = new GenerationalForwardReference(InitialMark, new Decorators(line), extractGCID(line)); concurrentCyclePauseEvent.setStartTime(getClock()); } @@ -278,7 +278,7 @@ private void concurrentPhaseStart(GCLogTrace trace, String line) { LOGGER.warning("Unknown concurrent phase: " + line); return; } - concurrentEvent = new GenerationalForwardReference(gcType, new Decorators(line), GCID_COUNTER.parse(line).getIntegerGroup(1)); + concurrentEvent = new GenerationalForwardReference(gcType, new Decorators(line), extractGCID(line)); concurrentEvent.setStartTime(getClock()); inConcurrentPhase = true; } @@ -299,7 +299,7 @@ private void workerThreads(GCLogTrace trace, String line) { private void remark(GCLogTrace trace, String line) { if (concurrentCyclePauseEvent != null) LOGGER.warning("Pause event not recorded and is about to be lost: " + pauseEvent.getGcID()); - concurrentCyclePauseEvent = new GenerationalForwardReference(Remark, new Decorators(line), GCID_COUNTER.parse(line).getIntegerGroup(1)); + concurrentCyclePauseEvent = new GenerationalForwardReference(Remark, new Decorators(line), extractGCID(line)); concurrentCyclePauseEvent.setStartTime(getClock()); } @@ -328,9 +328,9 @@ private void promotionFailed(GCLogTrace trace, String line) { private void fullGC(GCLogTrace trace, String line) { if (pauseEvent == null) { if (diary.isPSOldGen()) - pauseEvent = new GenerationalForwardReference(PSFull, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(PSFull, new Decorators(line), extractGCID(line)); else - pauseEvent = new GenerationalForwardReference(FullGC, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(FullGC, new Decorators(line), extractGCID(line)); pauseEvent.setStartTime(getClock()); } else if (pauseEvent.getGarbageCollectionType() == ParNew) { pauseEvent.convertToConcurrentModeFailure(); @@ -338,7 +338,7 @@ private void fullGC(GCLogTrace trace, String line) { pauseEvent.convertToSerialFull(); } else if (pauseEvent.getGarbageCollectionType() != ConcurrentModeFailure) { LOGGER.warning("Maybe Full Pause event not recorded: " + pauseEvent.getGcID()); //todo: difficult to know if this is a full or a CMF - pauseEvent = new GenerationalForwardReference(FullGC, new Decorators(line), super.GCID_COUNTER.parse(line).getIntegerGroup(1)); + pauseEvent = new GenerationalForwardReference(FullGC, new Decorators(line), extractGCID(line)); pauseEvent.setStartTime(getClock()); } pauseEvent.setGCCause(trace.gcCause(1, 0)); @@ -384,10 +384,9 @@ private void jvmExit(GCLogTrace trace, String line) { private ArrayList cache = new ArrayList<>(); private void cpuBreakout(GCLogTrace trace, String line) { - GCLogTrace gcidTrace = GCID_COUNTER.parse(line); - if (gcidTrace != null) { + int gcid = extractGCID(line); + if (gcid != -1) { CPUSummary cpuSummary = new CPUSummary(trace.getDoubleGroup(1), trace.getDoubleGroup(2), trace.getDoubleGroup(3)); - int gcid = gcidTrace.getIntegerGroup(1); // There are 3 cases to consider. // - pause event outside of a concurrent cycle // - pause event that is part of the concurrent cycle diff --git a/parser/src/test/java/com/microsoft/gctoolkit/parser/GCLogParserTest.java b/parser/src/test/java/com/microsoft/gctoolkit/parser/GCLogParserTest.java new file mode 100644 index 00000000..9ee8e4c7 --- /dev/null +++ b/parser/src/test/java/com/microsoft/gctoolkit/parser/GCLogParserTest.java @@ -0,0 +1,32 @@ +package com.microsoft.gctoolkit.parser; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class GCLogParserTest { + @Test + void extractPackedGCCycleIdAndTextualLength() { + long packed = GCLogParser.extractGCCycleIdAndTextualLength("[2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure)"); + assertEquals(35, GCLogParser.extractGCCycleId(packed)); + assertEquals(48, GCLogParser.extractGCCycleIdTextualLength(packed)); + } + + @Test + void extractGCID() { + assertEquals(35, GCLogParser.extractGCID("[2025-10-21T16:44:29.311+0200][3645.640s] GC(35) Pause Young (Allocation Failure)")); + } + + @Test + void extractPackedGCCycleIdAndTextualLength_malformed() { + long packed = GCLogParser.extractGCCycleIdAndTextualLength("[2025-10-21T16:44:29.311+0200][3645.640s] GC(3"); + assertEquals(-1 , packed); + assertEquals(-1, GCLogParser.extractGCCycleId(packed)); + assertEquals(0, GCLogParser.extractGCCycleIdTextualLength(packed)); + } + + @Test + void extractGCID_malformed() { + assertEquals(-1, GCLogParser.extractGCID("[2025-10-21T16:44:29.311+0200][3645.640s] GC(3")); + } +}