Skip to content

Commit 8012660

Browse files
author
Adrian
committed
Big chungus
1 parent 7570b9d commit 8012660

File tree

20 files changed

+935
-460
lines changed

20 files changed

+935
-460
lines changed

rust/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
/target
22

3+
__pycache__/
4+
35
/examples/**/*.o
46

57
*.vim

rust/Cargo.toml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,20 @@ version = "0.1.0"
44
edition = "2024"
55

66
[lib]
7-
crate-type = ["lib", "staticlib", "cdylib"]
7+
crate-type = [
8+
"lib",
9+
"staticlib",
10+
"cdylib"
11+
]
812

913
[features]
10-
python = []
14+
default = [
15+
"python",
16+
]
17+
python = [
18+
"pyo3/extension-module",
19+
"pyo3/macros",
20+
]
1121

1222
[build-dependencies]
1323
cbindgen = "0.29"
@@ -18,3 +28,4 @@ tracing = "0.1"
1828

1929
[dependencies.pyo3]
2030
version = "0.27"
31+
default-features = false

rust/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ include/log_mechanic.generated.hpp: FORCE
1212
cbindgen --config cbindgen.toml --crate log-mechanic > $@
1313

1414
target/debug/liblog_mechanic.a: FORCE
15-
cargo build
15+
cargo build --no-default-features

rust/cbindgen.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ include = [
6262
"type_traits",
6363
"string_view",
6464
]
65-
exclude = ["Box"]
65+
exclude = [
66+
"Box",
67+
"Option",
68+
]
6669
# prefix = "CAPI_"
6770
item_types = []
6871
renaming_overrides_prefixing = false

rust/examples/cpp_usage/usage.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <cassert>
22
#include <cstdio>
3+
#include <cstring>
34

45
#include "log_mechanic.hpp"
56

@@ -10,17 +11,22 @@ int main() {
1011

1112
clp_log_mechanic_schema_add_rule(schema, "hello", "abc|def");
1213

13-
Box<Lexer> lexer { clp_log_mechanic_lexer_new(schema) };
14+
Box<Parser> parser { clp_log_mechanic_parser_new(schema) };
1415

1516
size_t pos { 0 };
1617

17-
CLogFragment fragment { clp_log_mechanic_lexer_next_fragment(lexer, "def", &pos, nullptr, nullptr) };
18-
assert(fragment.rule == 1);
19-
assert(fragment.start + 3 == fragment.end);
18+
char const* input = "def";
19+
20+
Box<LogEvent> event { clp_log_mechanic_parser_next_event(parser, input, &pos) };
21+
assert(clp_log_mechanic_event_token_count(event) == 1);
22+
Token const* token { clp_log_mechanic_event_token(event, 0) };
23+
assert(clp_log_mechanic_event_token_rule(token) == 1);
24+
assert(pos == strlen(input));
2025

2126
printf("good!\n");
2227

23-
clp_log_mechanic_lexer_delete(lexer);
28+
clp_log_mechanic_event_delete(event);
29+
clp_log_mechanic_parser_delete(parser);
2430
clp_log_mechanic_schema_delete(schema);
2531

2632
return 0;
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python3
2+
3+
from schema import parser
4+
5+
log_file = 'cassandra-node2.log'
6+
7+
with open(log_file) as f:
8+
contents = f.read()
9+
10+
parser.set_input_stream(contents)
11+
12+
lines = 0
13+
log_types = {}
14+
logs = []
15+
16+
while True:
17+
event = parser.next_log_event()
18+
19+
if event is None:
20+
break
21+
22+
template = ""
23+
variables = []
24+
25+
for token in event.tokens:
26+
if token.rule is None:
27+
template += token.text
28+
else:
29+
template += f"%{token.rule}%"
30+
variables.append((token.text, token.captures))
31+
32+
if template in log_types:
33+
x = log_types[template]
34+
else:
35+
x = len(log_types)
36+
log_types[template] = x
37+
38+
logs.append((x, variables))
39+
40+
templates = list(log_types.items())
41+
templates.sort(key= lambda x: x[1])
42+
43+
for i in range(len(templates)):
44+
print(f"{i}. {templates[i]}")
45+
46+
print(f"{len(logs)} logs, {len(log_types)} log types")
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/usr/bin/env python3
2+
3+
from logmech import ReaderParser
4+
from pattern import PATTERN
5+
6+
parser = ReaderParser()
7+
8+
# Custom delimiters can be specified, though the default is usually sufficient
9+
parser.set_delimiters(" \t\r\n:,!;%@/()[].")
10+
11+
# Step 1 - Timestamp pattern: Extract full HH:MM:SS as components
12+
parser.add_variable_pattern("TIMESTAMP", rf"(?<hour>\d{{2}}):(?<minute>\d{{2}}):(?<second>\d{{2}})")
13+
14+
# Step 2 - Verbosity level: INFO, WARN, ERROR
15+
parser.add_variable_pattern("LEVEL", rf"(?<level>(INFO)|(WARN)|(ERROR))")
16+
17+
# Step 3 - Java exception pattern
18+
parser.add_variable_pattern(
19+
"SYSTEM_EXCEPTION",
20+
rf"(?<system_exception_type>({PATTERN.JAVA_PACKAGE_SEGMENT})+[{PATTERN.JAVA_IDENTIFIER_CHARSET}]*Exception): "
21+
rf"(?<system_exception_msg>{PATTERN.LOG_LINE})"
22+
)
23+
24+
# Stack trace patterns - simplified to avoid issues
25+
parser.add_variable_pattern(
26+
"SYSTEM_STACK_TRACE",
27+
rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.(java|kt|scala)):(?<line_num>\d+)\)"
28+
)
29+
30+
# Stack trace with jar info
31+
parser.add_variable_pattern(
32+
"STACK_WITH_JAR",
33+
rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.java):(?<line_num>\d+)\) ~?\[(?<jar>[^\]]+\.jar):(?<version>[^\]]+)\]"
34+
)
35+
36+
# Stack trace with na: prefix
37+
parser.add_variable_pattern(
38+
"STACK_WITH_NA",
39+
rf"\s+at (?<class_method>[a-zA-Z0-9_$\.]+)\((?<source_file>[a-zA-Z0-9_]+\.java):(?<line_num>\d+)\) ~?\[na:(?<version>\d+\.\d+[\._]\d+)\]"
40+
)
41+
42+
# Cassandra-specific patterns
43+
parser.add_variable_pattern("STREAM_ID", rf"Stream #(?<stream_id>{PATTERN.UUID})")
44+
parser.add_variable_pattern("HINT_FILE", rf"(?<hint_file>{PATTERN.UUID}\-\d+\-\d+\.hints)")
45+
parser.add_variable_pattern("KEYSPACE_TABLE", rf"Initializing (?<keyspace>[a-z0-9_]+)\.(?<table>[a-z0-9_]+)")
46+
parser.add_variable_pattern("CASSANDRA_HOST", rf"cassandra\-(?<hostname>[a-z0-9\-]+)")
47+
parser.add_variable_pattern("BOOTSTRAP_TOKEN", rf"tokens \[(?<tokens>\-?\d+(, \-?\d+)*)\]")
48+
49+
# Memory patterns
50+
parser.add_variable_pattern("MEMORY_MB", rf"(?<memory>{PATTERN.INT})MB")
51+
parser.add_variable_pattern("MEMORY_BYTES", rf"(?<bytes>\d+)\((?<kb>\d+)K\)")
52+
parser.add_variable_pattern("MEMORY_TYPE", rf"Global memtable (?<memtype>(on\-heap)|(off\-heap)) threshold")
53+
54+
# Duration
55+
parser.add_variable_pattern("DURATION_MS", rf"(?<duration>{PATTERN.INT})\s*ms")
56+
57+
# Netty channel patterns
58+
parser.add_variable_pattern("NETTY_CHANNEL_FULL", rf"channel = \[id: (?<channel_id>0x[a-f0-9]+), /(?<src_ip>{PATTERN.IPV4}):(?<src_port>{PATTERN.PORT}) =\> /(?<dst_ip>{PATTERN.IPV4}):(?<dst_port>{PATTERN.PORT})\]")
59+
parser.add_variable_pattern("NETTY_CHANNEL_SHORT", rf"channel = \[id: (?<channel_id>0x[a-f0-9]+), L:/(?<local_ip>{PATTERN.IPV4})")
60+
61+
# IP patterns
62+
parser.add_variable_pattern("HANDSHAKING_IP", rf"Handshaking version with (?<hostname>[\w\-]+)/(?<ip>{PATTERN.IPV4})")
63+
parser.add_variable_pattern("SESSION_WITH_IP", rf"Session with /(?<ip>{PATTERN.IPV4})")
64+
parser.add_variable_pattern("STREAMING_TO_IP", rf"streaming to /(?<ip>{PATTERN.IPV4})")
65+
parser.add_variable_pattern("CQL_LISTENING", rf"Starting listening for CQL clients on /(?<ip>{PATTERN.IPV4}):(?<port>{PATTERN.PORT})")
66+
67+
# Hinted handoff
68+
parser.add_variable_pattern("HANDOFF_FINISHED", rf"Finished hinted handoff of file (?<file>{PATTERN.UUID}\-\d+\-\d+\.hints) to endpoint /(?<ip>{PATTERN.IPV4}): (?<uuid>{PATTERN.UUID})")
69+
70+
# Directory paths
71+
parser.add_variable_pattern("CASSANDRA_DIR", rf"Directory /var/lib/cassandra/(?<dirtype>(commitlog)|(data)|(saved_caches)|(hints))")
72+
73+
# CompilerOracle patterns
74+
parser.add_variable_pattern("ORACLE_METHOD", rf"CompilerOracle: (?<action>(inline)|(dontinline)) (?<classname>[a-zA-Z0-9_/$]+)\.(?<method>\w+)")
75+
parser.add_variable_pattern("ORACLE_VARIANT_METHOD", rf"org/apache/cassandra/db/transform/StoppingTransformation\.(?<stopmethod>(stop)|(stopInPartition))")
76+
parser.add_variable_pattern("MEMORY_CLASS_METHOD", rf"org/apache/cassandra/io/util/(?<memclass>(Memory)|(SafeMemory))\.checkBounds")
77+
78+
# Status patterns
79+
parser.add_variable_pattern("JOINING_STATUS", rf"JOINING: (?<status>(schema)|(calculation)) complete")
80+
81+
# Thread names
82+
parser.add_variable_pattern("THREAD_NAME", rf"Thread\[(?<thread>[^\]]+)\]")
83+
84+
# Unknown column
85+
parser.add_variable_pattern("UNKNOWN_COLUMN", rf"Unknown column (?<column>\w+)")
86+
87+
# Frames omitted
88+
parser.add_variable_pattern("FRAMES_OMITTED", rf"\.\.\. (?<frames>\d+) common frames omitted")
89+
90+
# General paths
91+
parser.add_variable_pattern("PATH", rf"(?<path>/[\w/\-\.]+)")
92+
parser.add_variable_pattern("FILE_PATH", rf"(?<filepath>[^\s]+\.(jar|properties|yaml|log))")
93+
94+
# Class names
95+
parser.add_variable_pattern("CLASS_NAME", rf"(?<classname>[a-zA-Z0-9_$]+)")
96+
97+
# Long numbers (7-20 digits)
98+
parser.add_variable_pattern("LONG_NUMBER", rf"(?<long>\-?\d{{7,20}})")
99+
100+
# Generic patterns - should be last
101+
parser.add_variable_pattern("SYSTEM_IP", rf"(?<ip>{PATTERN.IPV4})")
102+
parser.add_variable_pattern("SYSTEM_UUID", rf"(?<uuid>{PATTERN.UUID})")
103+
parser.add_variable_pattern("GENERIC_FLOAT", rf"(?<float>{PATTERN.FLOAT})")
104+
parser.add_variable_pattern("GENERIC_INT", rf"(?<int>{PATTERN.INT})")
105+
parser.add_variable_pattern("HEX_NUMBER", rf"(?<hex>0x[a-f0-9]+)")
106+
parser.add_variable_pattern("PORT_NUMBER", rf"(?<port>{PATTERN.PORT})")
107+
108+
parser.compile()

rust/examples/python_usage/usage.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,33 @@
44

55
a = ReaderParser()
66

7-
a.add_var("number", r"[0-9]+")
8-
a.add_var("username", r"@(?<inside>[a-z]+)(?<parts>\.[a-z]+)*")
7+
a.add_variable_pattern("number", r"[0-9]+")
8+
a.add_variable_pattern("username", r"@(?<inside>[a-z]+)(?<parts>\.[a-z]+)*")
99

1010
a.compile()
1111

12-
text = "123 awesrgesrgesrg 6346346 @someone foo@username @someone.foo.bar.baz"
12+
text = """
13+
123 awesrgesrgesrg 6346346 @someone foo@username @someone.foo.bar.baz
14+
foo
15+
bar
16+
baz
17+
"""
18+
1319
a.set_input_stream(text)
1420

1521
print(f"py: input [{text}]")
22+
print()
23+
1624
while True:
1725
event = a.next_log_event()
1826

1927
if event is None:
2028
break
2129

22-
if event.variable is not None:
23-
print(f"py: static text: [{event.static_text}] {event.variable.name}: [{event.variable.text}] [{event.variable.captures!r}]")
24-
else:
25-
# Last static text before eof
26-
print(f"py: static text: [{event.static_text}] (end)")
30+
print(f"header is: '{event.header!r}'")
31+
print(f"tokens are:")
32+
for t in event.tokens:
33+
# print(f"- ({t!r}): {t.rule}, '{t.text}', {t.captures}")
34+
print(f"- {t.rule}, '{t.text}'")
35+
36+
print()

rust/include/log_mechanic.generated.hpp

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,13 @@
88

99
namespace clp::log_mechanic {
1010

11-
struct Lexer;
11+
struct LogEvent;
1212

13-
struct Schema;
14-
15-
struct CLogFragment {
16-
// Custom
17-
CLogFragment() = default;
13+
struct Parser;
1814

19-
// Generated
20-
/// `0` iff no variable found (static text until end of input).
21-
size_t rule;
22-
/// Start of variable (if found).
23-
const uint8_t *start;
24-
/// End of variable (if found).
25-
const uint8_t *end;
26-
27-
CLogFragment(size_t const& rule,
28-
const uint8_t *const& start,
29-
const uint8_t *const& end)
30-
: rule(rule),
31-
start(start),
32-
end(end)
33-
{}
15+
struct Schema;
3416

35-
};
17+
struct Token;
3618

3719
template<typename T>
3820
struct CSlice {
@@ -61,20 +43,26 @@ CSlice(char const* c_str)
6143

6244
using CStringView = CSlice<char>;
6345

64-
using LogFragmentOnCapture = void(*)(const void *data, CStringView name, CStringView lexeme);
65-
6646

6747
extern "C" {
6848

69-
void clp_log_mechanic_lexer_delete(Box<Lexer> lexer);
49+
CStringView clp_log_mecahnic_event_token_name(const Token *token);
50+
51+
void clp_log_mechanic_event_delete(Box<LogEvent> event);
52+
53+
const Token *clp_log_mechanic_event_token(const LogEvent *event, size_t i);
54+
55+
size_t clp_log_mechanic_event_token_count(const LogEvent *event);
56+
57+
size_t clp_log_mechanic_event_token_rule(const Token *token);
58+
59+
void clp_log_mechanic_parser_delete(Box<Parser> parser);
7060

71-
Box<Lexer> clp_log_mechanic_lexer_new(const Schema *schema);
61+
Box<Parser> clp_log_mechanic_parser_new(const Schema *schema);
7262

73-
CLogFragment clp_log_mechanic_lexer_next_fragment(Lexer *lexer,
74-
CStringView input,
75-
size_t *pos,
76-
LogFragmentOnCapture maybe_closure,
77-
const void *data);
63+
Option<Box<LogEvent>> clp_log_mechanic_parser_next_event(Parser *parser,
64+
CStringView input,
65+
size_t *pos);
7866

7967
void clp_log_mechanic_schema_add_rule(Schema *schema, CStringView name, CStringView pattern);
8068

rust/include/log_mechanic.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
#ifndef LOG_MECHANIC_HPP
22
#define LOG_MECHANIC_HPP
33

4+
#include <type_traits>
5+
46
namespace clp::log_mechanic {
57

68
template<typename T> using Box = T*;
9+
10+
template <typename T> struct is_rust_box_t: std::false_type {};
11+
template <typename T> struct is_rust_box_t<Box<T>>: std::true_type {};
12+
13+
template<typename T> using Option = std::enable_if_t<is_rust_box_t<T>::value, T>;
714
}
815

916
#include "log_mechanic.generated.hpp"

0 commit comments

Comments
 (0)