Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
### Features

### Improvements
* fix several mypy issues

### Bugfix
* fix calculator silently failing on syntax errors


## 18.1.0
Expand All @@ -20,7 +22,6 @@
### Improvements
* simplify Dockerfile and remove docker build support for `LOGPREP_VERSION`
* pytest.param now works with test_cases document generation
* fix several mypy issues

### Bugfix
* generic_resolver now follows yaml standard and accepts a list instead of relying on the ordering of a dict
Expand Down
331 changes: 190 additions & 141 deletions logprep/abc/input.py

Large diffs are not rendered by default.

10 changes: 2 additions & 8 deletions logprep/abc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
add_and_overwrite,
add_fields_to,
get_dotted_field_value,
has_dotted_field,
pop_dotted_field_value,
)
from logprep.util.rule_loader import RuleLoader
Expand Down Expand Up @@ -279,14 +280,7 @@ def load_rules(self, rules_targets: Sequence[str | dict]) -> None:

@staticmethod
def _field_exists(event: dict, dotted_field: str) -> bool:
fields = dotted_field.split(".")
dict_ = event
for field_ in fields:
if field_ in dict_ and isinstance(dict_, dict):
dict_ = dict_[field_]
else:
return False
return True
return has_dotted_field(event, dotted_field)

def _handle_warning_error(self, event, rule, error, failure_tags=None):
tags = get_dotted_field_value(event, "tags")
Expand Down
10 changes: 5 additions & 5 deletions logprep/connector/confluent_kafka/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ class Config(Input.Config):

"""

_last_valid_record: Message
_last_valid_record: Message | None

__slots__ = ["_last_valid_record"]

Expand All @@ -278,7 +278,7 @@ def __init__(self, name: str, configuration: "ConfluentKafkaInput.Config") -> No

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast(ConfluentKafkaInput.Config, self._config)

@property
Expand Down Expand Up @@ -433,7 +433,7 @@ def _get_raw_event(self, timeout: float) -> bytes | None:

Returns
-------
message_value : bytearray
message_value : bytes
A raw document obtained from Kafka.

Raises
Expand Down Expand Up @@ -469,7 +469,7 @@ def _get_event(self, timeout: float) -> Tuple[None, None] | Tuple[dict, bytes]:
-------
event_dict : dict
A parsed document obtained from Kafka.
raw_event : bytearray
raw_event : bytes
A raw document obtained from Kafka.

Raises
Expand Down Expand Up @@ -545,7 +545,7 @@ def _revoke_callback(self, _: Any, topic_partitions: list[TopicPartition]) -> No
)
self.batch_finished_callback()

def _lost_callback(self, topic_partitions: list[TopicPartition]) -> None:
def _lost_callback(self, _: Consumer, topic_partitions: list[TopicPartition]) -> None:
for topic_partition in topic_partitions:
self.metrics.number_of_warnings += 1
member_id = self._get_memberid()
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/confluent_kafka/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ class Config(Output.Config):

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast(ConfluentKafkaOutput.Config, self._config)

@property
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/dummy/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Config(Input.Config):

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast("DummyInput.Config", self._config)

@cached_property
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/dummy/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(self, name: str, configuration: "DummyOutput.Config"):

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast("DummyOutput.Config", self._config)

def store(self, document: dict):
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/file/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def __init__(self, name: str, configuration: "FileInput.Config"):

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast(FileInput.Config, self._config)

def _calc_file_fingerprint(
Expand Down
7 changes: 5 additions & 2 deletions logprep/connector/http/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,10 @@ class Config(Input.Config):
:code:`target_field`."""

def __attrs_post_init__(self):
if "add_full_event_to_target_field" in self.preprocessing and self.original_event_field:
if (
self.preprocessing.add_full_event_to_target_field is not None
and self.original_event_field
):
raise InvalidConfigurationError(
"Cannot configure both add_full_event_to_target_field and original_event_field."
)
Expand Down Expand Up @@ -546,7 +549,7 @@ def __init__(self, name: str, configuration: "HttpInput.Config") -> None:

@property
def config(self) -> Config:
"""Provides the properly typed rule configuration object"""
"""Provides the properly typed configuration object"""
return typing.cast(HttpInput.Config, self._config)

def setup(self) -> None:
Expand Down
18 changes: 10 additions & 8 deletions logprep/filter/expression/filter_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import re
from abc import ABC, abstractmethod
from itertools import chain, zip_longest
from typing import Any, List
from typing import Any, Sequence

from logprep.util.helper import field_list_to_dotted_field


class FilterExpressionError(Exception):
Expand Down Expand Up @@ -78,7 +80,7 @@ def does_match(self, document: dict) -> bool:
"""

@staticmethod
def _get_value(key: List[str], document: dict) -> Any:
def _get_value(key: Sequence[str], document: dict) -> Any:
"""Return the value for the given key from the document."""
if not key:
raise KeyDoesNotExistError
Expand Down Expand Up @@ -159,10 +161,10 @@ def does_match(self, document: dict) -> bool:
class KeyBasedFilterExpression(FilterExpression):
"""Base class of filter expressions that match a certain value on a given key."""

def __init__(self, key: List[str]):
def __init__(self, key: Sequence[str]):
super().__init__()
self.key = key
self._key_as_dotted_string = ".".join([str(i) for i in self.key])
self._key_as_dotted_string = field_list_to_dotted_field([str(i) for i in self.key])

def __repr__(self) -> str:
return f"{self.key_as_dotted_string}"
Expand All @@ -186,7 +188,7 @@ def key_as_dotted_string(self) -> str:
class KeyValueBasedFilterExpression(KeyBasedFilterExpression):
"""Base class of filter expressions that match a certain value on a given key."""

def __init__(self, key: List[str], expected_value: Any):
def __init__(self, key: Sequence[str], expected_value: Any):
super().__init__(key)
self._expected_value = expected_value

Expand Down Expand Up @@ -219,7 +221,7 @@ class WildcardStringFilterExpression(KeyValueBasedFilterExpression):
wc = re.compile(r"((?:\\)*\*)")
wq = re.compile(r"((?:\\)*\?)")

def __init__(self, key: List[str], expected_value: Any):
def __init__(self, key: Sequence[str], expected_value: Any):
super().__init__(key, expected_value)
new_string = re.escape(str(self._expected_value))

Expand Down Expand Up @@ -290,7 +292,7 @@ def does_match(self, document: dict) -> bool:
class RangeBasedFilterExpression(KeyBasedFilterExpression):
"""Base class of filter expressions that match for a range of values."""

def __init__(self, key: List[str], lower_bound: float, upper_bound: float):
def __init__(self, key: Sequence[str], lower_bound: float, upper_bound: float):
super().__init__(key)
self._lower_bound = lower_bound
self._upper_bound = upper_bound
Expand Down Expand Up @@ -326,7 +328,7 @@ class RegExFilterExpression(KeyValueBasedFilterExpression):
match_escaping_pattern = re.compile(r".*?(?P<escaping>\\*)\$$")
match_parts_pattern = re.compile(r"^(?P<flag>\(\?\w\))?(?P<start>\^)?(?P<pattern>.*)")

def __init__(self, key: List[str], regex: str):
def __init__(self, key: Sequence[str], regex: str):
self._regex = self._normalize_regex(regex)
self._matcher = re.compile(self._regex)
super().__init__(key, f"/{self._regex.strip('^$')}/")
Expand Down
29 changes: 17 additions & 12 deletions logprep/filter/lucene_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
import logging
import re
from itertools import chain, zip_longest
from typing import Sequence

import luqum
from luqum.parser import IllegalCharacterError, ParseSyntaxError, parser
Expand Down Expand Up @@ -124,6 +125,7 @@
SigmaFilterExpression,
StringFilterExpression,
)
from logprep.util.helper import field_list_to_dotted_field, get_dotted_field_list

# pylint: enable=anomalous-backslash-in-string

Expand Down Expand Up @@ -228,6 +230,12 @@ def _escape_ends_of_expressions(query_string):
new_string = new_string[: -(cnt_backslashes + 1)] + new_end
return new_string

@staticmethod
def dotted_field_to_list(dotted_field: str, unescape: bool) -> Sequence[str]:
if unescape:
return [v.replace("\\\\", "\\") for v in get_dotted_field_list(dotted_field)]
return get_dotted_field_list(dotted_field)


class LuceneTransformer:
"""A transformer that converts a luqum tree into a FilterExpression."""
Expand Down Expand Up @@ -304,7 +312,7 @@ def _create_field_group_expression(
Parsed filter expression.

"""
key = dotted_field.split(".")
key = LuceneFilter.dotted_field_to_list(dotted_field, unescape=False)
value = self._strip_quote_from_string(tree.value)
value = self._remove_lucene_escaping(value)

Expand All @@ -320,17 +328,15 @@ def _collect_children(self, tree: luqum.tree) -> list[FilterExpression]:

def _create_field(self, tree: luqum.tree) -> FilterExpression:
if isinstance(tree.expr, (Phrase, Word)):
key = tree.name.replace("\\", "")
key = key.split(".")
key = LuceneFilter.dotted_field_to_list(tree.name, unescape=True)
if tree.expr.value == "null":
return Null(key)

value = self._strip_quote_from_string(tree.expr.value)
value = self._remove_lucene_escaping(value)
return self._get_filter_expression(key, value)
if isinstance(tree.expr, Regex):
key = tree.name.replace("\\", "")
key = key.split(".")
key = LuceneFilter.dotted_field_to_list(tree.name, unescape=True)
if tree.expr.value == "null":
return Null(key)

Expand All @@ -340,22 +346,22 @@ def _create_field(self, tree: luqum.tree) -> FilterExpression:
raise LuceneFilterError(f'The expression "{str(tree)}" is invalid!')

@staticmethod
def _check_key_and_modifier(key, value):
def _check_key_and_modifier(key: Sequence[str], value):
key_and_modifier = key[-1].split("|")
if len(key_and_modifier) == 2:
if key_and_modifier[-1] == "re":
return RegExFilterExpression(key[:-1] + key_and_modifier[:-1], value)
return RegExFilterExpression([*key[:-1], *key_and_modifier[:-1]], value)
return None

def _get_filter_expression(
self, key: list[str], value
self, key: Sequence[str], value
) -> RegExFilterExpression | StringFilterExpression | SigmaFilterExpression:

key_and_modifier_check = LuceneTransformer._check_key_and_modifier(key, value)
if key_and_modifier_check is not None:
return key_and_modifier_check

dotted_field = ".".join(key)
dotted_field = field_list_to_dotted_field(key)

if self._special_fields.items():
for sf_key, sf_value in self._special_fields.items():
Expand All @@ -371,7 +377,7 @@ def _get_filter_expression(
return StringFilterExpression(key, value)

def _get_filter_expression_regex(
self, key: list[str], value
self, key: Sequence[str], value
) -> RegExFilterExpression | StringFilterExpression:

key_and_modifier_check = LuceneTransformer._check_key_and_modifier(key, value)
Expand All @@ -383,8 +389,7 @@ def _get_filter_expression_regex(

@staticmethod
def _create_value_expression(word: luqum.tree) -> Exists | Always:
value = word.value.replace("\\", "")
value = value.split(".")
value = LuceneFilter.dotted_field_to_list(word.value, unescape=True)
if value == ["*"]:
return Always(True)
return Exists(value)
Expand Down
20 changes: 12 additions & 8 deletions logprep/framework/rule_tree/rule_tagger.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""This module implements functionality to add tags to filter expressions."""

from typing import Union, List
from typing import List, Union

from logprep.filter.expression.filter_expression import (
StringFilterExpression,
Exists,
Not,
KeyBasedFilterExpression,
Not,
StringFilterExpression,
)
from logprep.util.helper import get_dotted_field_list

Expand Down Expand Up @@ -66,10 +66,14 @@ def _add_tags_to_rule_expressions(self, rule_expressions):
self._add_tag(rule_expressions, self._tag_map[next_expression.key[0]])

def _expression_in_tag_map(self, expression):
return (
isinstance(expression, KeyBasedFilterExpression)
and expression.key[0] in self._tag_map.keys()
)
try:
return (
isinstance(expression, KeyBasedFilterExpression)
and expression.key[0] in self._tag_map.keys()
)
except IndexError as error:
print("LOL")
raise error

@staticmethod
def _add_tag(expressions: List[KeyBasedFilterExpression], tag_map_value: str):
Expand Down Expand Up @@ -98,7 +102,7 @@ def _add_tag(expressions: List[KeyBasedFilterExpression], tag_map_value: str):
key, value = tag_map_value.split(":")
expressions.insert(0, StringFilterExpression(get_dotted_field_list(key), value))
else:
expressions.insert(0, Exists(tag_map_value.split(".")))
expressions.insert(0, Exists(get_dotted_field_list(tag_map_value)))

@staticmethod
def _tag_exists(expression: KeyBasedFilterExpression, tag: str) -> bool:
Expand Down
12 changes: 7 additions & 5 deletions logprep/generator/manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
versions.
"""

import datetime
import logging
from datetime import datetime
from functools import reduce
from typing import TYPE_CHECKING, List

from logprep.util.helper import get_dotted_field_list

if TYPE_CHECKING:
from logprep.generator.input import EventClassConfig

Expand Down Expand Up @@ -57,10 +58,11 @@ def _replace_timestamp(self, event: dict, timestamp) -> None:
field_key = timestamp.key
timestamp_format = timestamp.format
time_delta = timestamp.time_delta
output_field_key = [event, *field_key.split(".")]
target_key = output_field_key.pop()
target_field = reduce(self._add_and_overwrite_key, output_field_key)
target_field |= {target_key: (datetime.now() + time_delta).strftime(timestamp_format)}
output_field_key = get_dotted_field_list(field_key)
target_field = reduce(self._add_and_overwrite_key, output_field_key[:-1], event)
target_field |= {
output_field_key[-1]: (datetime.now() + time_delta).strftime(timestamp_format)
}

def _add_and_overwrite_key(self, sub_dict: dict, key: str) -> dict:
"""
Expand Down
Loading