Skip to content

Commit d7e495f

Browse files
ekneg54ppcad
andauthored
add replacer processor (#672)
* update changelog * add replacer processor boilerplate * fix rule tests * add replacer rule tests and behavior * fix failing tests by providing testdata * add equality tests * WIP * Implement replacer and add documentation * Fix mypy * Add type hints to named tuples in replacer * Remove unused import from replacer * Add more matching options to replacer * Appease mypy * Add missing type hints to replacer * Fix typos in replacer * Refactor unused variables in replacer * Add more replacer tests * Remove while loop from replacer * Appease mypy * Disable pylint protected-access in replacer test --------- Co-authored-by: Piotr Pauksztelo <piotr.pauksztelo@fkie.fraunhofer.de>
1 parent fa468b5 commit d7e495f

File tree

11 files changed

+1328
-0
lines changed

11 files changed

+1328
-0
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
## next release
44
### Breaking
55
### Features
6+
7+
* add `replacer` processor to replace substrings in fields using a syntax similar to the `dissector`
8+
69
### Improvements
710
### Bugfix
811

@@ -33,6 +36,7 @@
3336
### Breaking
3437

3538
* remove `hyperscan_resolver` processor because it is not significantly faster as the `generic_resolver` with enabled cache
39+
3640
### Features
3741

3842
* add support for rule files with suffix `.yaml`

doc/source/configuration/processor.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Processors
2525
.. automodule:: logprep.processor.list_comparison.processor
2626
.. automodule:: logprep.processor.pre_detector.processor
2727
.. automodule:: logprep.processor.pseudonymizer.processor
28+
.. automodule:: logprep.processor.replacer.processor
2829
.. automodule:: logprep.processor.requester.processor
2930
.. automodule:: logprep.processor.selective_extractor.processor
3031
.. automodule:: logprep.processor.string_splitter.processor

logprep/processor/replacer/__init__.py

Whitespace-only changes.
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
Replacer
3+
============
4+
5+
The `replacer` is a processor that replaces parts of a string with strings defined in rules.
6+
7+
Processor Configuration
8+
^^^^^^^^^^^^^^^^^^^^^^^
9+
.. code-block:: yaml
10+
:linenos:
11+
12+
- samplename:
13+
type: replacer
14+
rules:
15+
- tests/testdata/rules/
16+
17+
.. autoclass:: logprep.processor.replacer.processor.Replacer.Config
18+
:members:
19+
:undoc-members:
20+
:inherited-members:
21+
:noindex:
22+
23+
.. automodule:: logprep.processor.replacer.rule
24+
"""
25+
26+
from logprep.processor.field_manager.processor import FieldManager
27+
from logprep.processor.replacer.rule import ReplacerRule, ReplacementTemplate, Replacement
28+
from logprep.util.helper import add_fields_to, get_dotted_field_value
29+
30+
31+
class Replacer(FieldManager):
32+
"""A processor that replaces parts of a string via templates defined in rules."""
33+
34+
rule_class = ReplacerRule
35+
36+
def _apply_rules(self, event: dict, rule: ReplacerRule) -> None:
37+
for source_field in rule.mapping:
38+
template = rule.templates.get(source_field)
39+
if template is None:
40+
continue
41+
42+
value_to_replace = get_dotted_field_value(event, source_field)
43+
if value_to_replace is None and not rule.ignore_missing_fields:
44+
error = BaseException(f"replacer: mapping field '{source_field}' does not exist")
45+
self._handle_warning_error(event, rule, error)
46+
value_to_replace = str(value_to_replace)
47+
48+
if not value_to_replace.startswith(template.prefix):
49+
continue
50+
51+
result = self.replace_by_templates(template, value_to_replace)
52+
53+
if result is not None:
54+
target = rule.target_field if rule.target_field else source_field
55+
add_fields_to(event, {target: result}, rule, overwrite_target=rule.overwrite_target)
56+
57+
@staticmethod
58+
def replace_by_templates(template: ReplacementTemplate, to_replace: str) -> str | None:
59+
"""Replace parts of `to_replace` by strings defined in `template`."""
60+
first_replacement = template.replacements[0]
61+
result = "" if first_replacement.keep_original else template.prefix
62+
if first_replacement.match:
63+
if not to_replace.startswith(template.prefix + first_replacement.match):
64+
return None
65+
result += first_replacement.match
66+
67+
replacement: Replacement | None
68+
for idx, replacement in enumerate(template.replacements):
69+
replacement = Replacer._handle_wildcard(replacement, to_replace)
70+
if replacement is None:
71+
return None
72+
73+
if replacement.match:
74+
pre, match, _ = result.rpartition(replacement.match)
75+
if not match:
76+
return None
77+
result = pre + replacement.value + replacement.next
78+
continue
79+
80+
if not replacement.next:
81+
result += replacement.value
82+
break
83+
84+
_, separator, to_replace = to_replace.partition(replacement.next)
85+
if not separator:
86+
return None
87+
88+
if replacement.greedy:
89+
to_replace = Replacer._partition_greedily(replacement, to_replace)
90+
91+
if idx == len(template.replacements) - 1 and not replacement.next.endswith(to_replace):
92+
return None
93+
94+
result += replacement.value + replacement.next
95+
96+
return result
97+
98+
@staticmethod
99+
def _partition_greedily(replacement: Replacement, to_replace: str) -> str:
100+
"""Ensure to replace as much as possible if next is contained in string to replace"""
101+
last_index = to_replace.rfind(replacement.next)
102+
return to_replace[last_index + len(replacement.next) :] if last_index != -1 else to_replace
103+
104+
@staticmethod
105+
def _handle_wildcard(replacement: Replacement, to_replace: str) -> Replacement | None:
106+
"""Do not replace anything if replacement value is `*`.
107+
108+
This is used to match a variable string without having to replace it.
109+
A pattern consisting of a single `*` can be escaped with a backslash to replace with `*`.
110+
Other patterns require no escaping of `*`.
111+
"""
112+
if replacement.keep_original:
113+
match_idx = to_replace.find(replacement.next)
114+
if match_idx < 0:
115+
return None
116+
original = to_replace[:match_idx] if match_idx else to_replace
117+
return Replacement(
118+
original,
119+
replacement.next,
120+
replacement.match,
121+
replacement.keep_original,
122+
replacement.greedy,
123+
)
124+
return replacement

0 commit comments

Comments
 (0)