Skip to content

Commit 2f29283

Browse files
committed
support grokker
1 parent 7b35958 commit 2f29283

File tree

3 files changed

+65
-4
lines changed

3 files changed

+65
-4
lines changed

logprep/util/grok/grok.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
import re
2727
import string
28-
import sys
2928
from hashlib import md5
3029
from importlib import resources
3130
from itertools import chain
@@ -38,7 +37,7 @@
3837
from logprep.util.decorators import timeout
3938

4039
DEFAULT_PATTERNS_DIRS = [str(resources.files(__package__) / "patterns/ecs-v1")]
41-
LOGSTASH_NOTATION = r"(([^\[\]\{\}\.:]*)?(\[[^\[\]\{\}\.:]*\])*)"
40+
LOGSTASH_NOTATION = r"(([^\[\]\{\}\.:]*)?(\[[^\[\]\{\}:]*\])*)"
4241
GROK = r"%\{" + rf"([A-Z0-9_]*)(:({LOGSTASH_NOTATION}))?(:(int|float))?" + r"\}"
4342
ONIGURUMA = r"\(\?<([^()]*)>\(?(([^()]*|\(([^()]*|\([^()]*\))*\))*)\)?\)"
4443
NON_RESOLVED_ONIGURUMA = r"\(\?<[^md5].*>"
@@ -53,7 +52,15 @@ class Grok:
5352
grok_pattern = re.compile(GROK)
5453
oniguruma = re.compile(ONIGURUMA)
5554

56-
pattern: str = field(validator=validators.instance_of((str, list)))
55+
pattern: str | list[str] = field(
56+
validator=validators.or_(
57+
validators.instance_of(str),
58+
validators.deep_iterable(
59+
iterable_validator=validators.instance_of(list),
60+
member_validator=validators.instance_of(str),
61+
),
62+
)
63+
)
5764
custom_patterns_dir: str = field(default="")
5865
custom_patterns: dict = field(factory=dict)
5966
fullmatch: bool = field(default=True)
@@ -128,7 +135,7 @@ def _to_dundered_field(fields: str) -> str:
128135
def _to_dotted_field(fields: str) -> str:
129136
if not "__" in fields:
130137
return fields
131-
return fields.replace("__", ".")
138+
return fields.replace(".", "\\.").replace("__", ".")
132139

133140
def _resolve_grok(self, match: re.Match) -> str:
134141
name = match.group(1)

tests/unit/ng/processor/grokker/test_grokker.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,33 @@
197197
"parent": {"some_ip": "123.123.123.123", "port": 1234},
198198
},
199199
),
200+
(
201+
"normalization from escaped & nested grok",
202+
{
203+
"filter": "win\\.log.event\\._id: 123456789",
204+
"grokker": {
205+
"mapping": {
206+
"win\\.log.event_data.normalize me!": r"%{IP:[par\\ent][...]} \w+ %{NUMBER:[par\\ent][\\port\\]:int} %[ts]+ %{NUMBER:te\\.st\\:int}"
207+
},
208+
},
209+
},
210+
{
211+
"win.log": {
212+
"api": "wineventlog",
213+
"event._id": 123456789,
214+
"event_data": {"normalize me!": "123.123.123.123 555 1234 %ttss 11"},
215+
}
216+
},
217+
{
218+
"win.log": {
219+
"api": "wineventlog",
220+
"event._id": 123456789,
221+
"event_data": {"normalize me!": "123.123.123.123 555 1234 %ttss 11"},
222+
},
223+
"te.st\\": 11,
224+
"par\\ent": {"...": "123.123.123.123", "\\port\\": 1234},
225+
},
226+
),
200227
(
201228
"example log message",
202229
{

tests/unit/processor/grokker/test_grokker.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,33 @@
192192
"parent": {"some_ip": "123.123.123.123", "port": 1234},
193193
},
194194
),
195+
(
196+
"normalization from escaped & nested grok",
197+
{
198+
"filter": "win\\.log.event\\._id: 123456789",
199+
"grokker": {
200+
"mapping": {
201+
"win\\.log.event_data.normalize me!": r"%{IP:[par\\ent][...]} \w+ %{NUMBER:[par\\ent][\\port\\]:int} %[ts]+ %{NUMBER:te\\.st\\:int}"
202+
},
203+
},
204+
},
205+
{
206+
"win.log": {
207+
"api": "wineventlog",
208+
"event._id": 123456789,
209+
"event_data": {"normalize me!": "123.123.123.123 555 1234 %ttss 11"},
210+
}
211+
},
212+
{
213+
"win.log": {
214+
"api": "wineventlog",
215+
"event._id": 123456789,
216+
"event_data": {"normalize me!": "123.123.123.123 555 1234 %ttss 11"},
217+
},
218+
"te.st\\": 11,
219+
"par\\ent": {"...": "123.123.123.123", "\\port\\": 1234},
220+
},
221+
),
195222
(
196223
"example log message",
197224
{

0 commit comments

Comments
 (0)