|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +from logmech import ReaderParser |
| 4 | +from pattern import PATTERN |
| 5 | + |
| 6 | +parser = ReaderParser() |
| 7 | + |
| 8 | +# Custom delimiters can be specified, though the default is usually sufficient |
| 9 | +parser.set_delimiters(" \t\r\n:,!;%@/()[].") |
| 10 | + |
| 11 | +# Step 1 - Timestamp pattern: Extract full HH:MM:SS as components |
| 12 | +parser.add_variable_pattern("TIMESTAMP", rf"(?<hour>\d{{2}}):(?<minute>\d{{2}}):(?<second>\d{{2}})") |
| 13 | + |
| 14 | +# Step 2 - Verbosity level: INFO, WARN, ERROR |
| 15 | +parser.add_variable_pattern("LEVEL", rf"(?<level>(INFO)|(WARN)|(ERROR))") |
| 16 | + |
| 17 | +# Step 3 - Java exception pattern |
| 18 | +parser.add_variable_pattern( |
| 19 | + "SYSTEM_EXCEPTION", |
| 20 | + rf"(?<system_exception_type>({PATTERN.JAVA_PACKAGE_SEGMENT})+[{PATTERN.JAVA_IDENTIFIER_CHARSET}]*Exception): " |
| 21 | + rf"(?<system_exception_msg>{PATTERN.LOG_LINE})" |
| 22 | +) |
| 23 | + |
| 24 | +# Stack trace patterns - simplified to avoid issues |
| 25 | +parser.add_variable_pattern( |
| 26 | + "SYSTEM_STACK_TRACE", |
| 27 | + rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.(java|kt|scala)):(?<line_num>\d+)\)" |
| 28 | +) |
| 29 | + |
| 30 | +# Stack trace with jar info |
| 31 | +parser.add_variable_pattern( |
| 32 | + "STACK_WITH_JAR", |
| 33 | + rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.java):(?<line_num>\d+)\) ~?\[(?<jar>[^\]]+\.jar):(?<version>[^\]]+)\]" |
| 34 | +) |
| 35 | + |
| 36 | +# Stack trace with na: prefix |
| 37 | +parser.add_variable_pattern( |
| 38 | + "STACK_WITH_NA", |
| 39 | + rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.java):(?<line_num>\d+)\) ~?\[na:(?<version>\d+\.\d+[\._]\d+)\]" |
| 40 | +) |
| 41 | + |
| 42 | +# Cassandra-specific patterns |
| 43 | +parser.add_variable_pattern("STREAM_ID", rf"Stream #(?<stream_id>{PATTERN.UUID})") |
| 44 | +parser.add_variable_pattern("HINT_FILE", rf"(?<hint_file>{PATTERN.UUID}\-\d+\-\d+\.hints)") |
| 45 | +parser.add_variable_pattern("KEYSPACE_TABLE", rf"Initializing (?<keyspace>[a-z0-9_]+)\.(?<table>[a-z0-9_]+)") |
| 46 | +parser.add_variable_pattern("CASSANDRA_HOST", rf"cassandra\-(?<hostname>[a-z0-9\-]+)") |
| 47 | +parser.add_variable_pattern("BOOTSTRAP_TOKEN", rf"tokens \[(?<tokens>\-?\d+(, \-?\d+)*)\]") |
| 48 | + |
| 49 | +# Memory patterns |
| 50 | +parser.add_variable_pattern("MEMORY_MB", rf"(?<memory>{PATTERN.INT})MB") |
| 51 | +parser.add_variable_pattern("MEMORY_BYTES", rf"(?<bytes>\d+)\((?<kb>\d+)K\)") |
| 52 | +parser.add_variable_pattern("MEMORY_TYPE", rf"Global memtable (?<memtype>(on\-heap)|(off\-heap)) threshold") |
| 53 | + |
| 54 | +# Duration |
| 55 | +parser.add_variable_pattern("DURATION_MS", rf"(?<duration>{PATTERN.INT})\s*ms") |
| 56 | + |
| 57 | +# Netty channel patterns |
| 58 | +parser.add_variable_pattern("NETTY_CHANNEL_FULL", rf"channel = \[id: (?<channel_id>0x[a-f0-9]+), /(?<src_ip>{PATTERN.IPV4}):(?<src_port>{PATTERN.PORT}) =\> /(?<dst_ip>{PATTERN.IPV4}):(?<dst_port>{PATTERN.PORT})\]") |
| 59 | +parser.add_variable_pattern("NETTY_CHANNEL_SHORT", rf"channel = \[id: (?<channel_id>0x[a-f0-9]+), L:/(?<local_ip>{PATTERN.IPV4})") |
| 60 | + |
| 61 | +# IP patterns |
| 62 | +parser.add_variable_pattern("HANDSHAKING_IP", rf"Handshaking version with (?<hostname>[\w\-]+)/(?<ip>{PATTERN.IPV4})") |
| 63 | +parser.add_variable_pattern("SESSION_WITH_IP", rf"Session with /(?<ip>{PATTERN.IPV4})") |
| 64 | +parser.add_variable_pattern("STREAMING_TO_IP", rf"streaming to /(?<ip>{PATTERN.IPV4})") |
| 65 | +parser.add_variable_pattern("CQL_LISTENING", rf"Starting listening for CQL clients on /(?<ip>{PATTERN.IPV4}):(?<port>{PATTERN.PORT})") |
| 66 | + |
| 67 | +# Hinted handoff |
| 68 | +parser.add_variable_pattern("HANDOFF_FINISHED", rf"Finished hinted handoff of file (?<file>{PATTERN.UUID}\-\d+\-\d+\.hints) to endpoint /(?<ip>{PATTERN.IPV4}): (?<uuid>{PATTERN.UUID})") |
| 69 | + |
| 70 | +# Directory paths |
| 71 | +parser.add_variable_pattern("CASSANDRA_DIR", rf"Directory /var/lib/cassandra/(?<dirtype>(commitlog)|(data)|(saved_caches)|(hints))") |
| 72 | + |
| 73 | +# CompilerOracle patterns |
| 74 | +parser.add_variable_pattern("ORACLE_METHOD", rf"CompilerOracle: (?<action>(inline)|(dontinline)) (?<classname>[a-zA-Z0-9_/$]+)\.(?<method>\w+)") |
| 75 | +parser.add_variable_pattern("ORACLE_VARIANT_METHOD", rf"org/apache/cassandra/db/transform/StoppingTransformation\.(?<stopmethod>(stop)|(stopInPartition))") |
| 76 | +parser.add_variable_pattern("MEMORY_CLASS_METHOD", rf"org/apache/cassandra/io/util/(?<memclass>(Memory)|(SafeMemory))\.checkBounds") |
| 77 | + |
| 78 | +# Status patterns |
| 79 | +parser.add_variable_pattern("JOINING_STATUS", rf"JOINING: (?<status>(schema)|(calculation)) complete") |
| 80 | + |
| 81 | +# Thread names |
| 82 | +parser.add_variable_pattern("THREAD_NAME", rf"Thread\[(?<thread>[^\]]+)\]") |
| 83 | + |
| 84 | +# Unknown column |
| 85 | +parser.add_variable_pattern("UNKNOWN_COLUMN", rf"Unknown column (?<column>\w+)") |
| 86 | + |
| 87 | +# Frames omitted |
| 88 | +parser.add_variable_pattern("FRAMES_OMITTED", rf"\.\.\. (?<frames>\d+) common frames omitted") |
| 89 | + |
| 90 | +# General paths |
| 91 | +parser.add_variable_pattern("PATH", rf"(?<path>/[\w/\-\.]+)") |
| 92 | +parser.add_variable_pattern("FILE_PATH", rf"(?<filepath>[^\s]+\.(jar|properties|yaml|log))") |
| 93 | + |
| 94 | +# Class names |
| 95 | +parser.add_variable_pattern("CLASS_NAME", rf"(?<classname>[a-zA-Z0-9_$]+)") |
| 96 | + |
| 97 | +# Long numbers (7-20 digits) |
| 98 | +parser.add_variable_pattern("LONG_NUMBER", rf"(?<long>\-?\d{{7,20}})") |
| 99 | + |
| 100 | +# Generic patterns - should be last |
| 101 | +parser.add_variable_pattern("SYSTEM_IP", rf"(?<ip>{PATTERN.IPV4})") |
| 102 | +parser.add_variable_pattern("SYSTEM_UUID", rf"(?<uuid>{PATTERN.UUID})") |
| 103 | +parser.add_variable_pattern("GENERIC_FLOAT", rf"(?<float>{PATTERN.FLOAT})") |
| 104 | +parser.add_variable_pattern("GENERIC_INT", rf"(?<int>{PATTERN.INT})") |
| 105 | +parser.add_variable_pattern("HEX_NUMBER", rf"(?<hex>0x[a-f0-9]+)") |
| 106 | +parser.add_variable_pattern("PORT_NUMBER", rf"(?<port>{PATTERN.PORT})") |
| 107 | + |
| 108 | +parser.compile() |
0 commit comments