2323.. automodule:: logprep.processor.generic_resolver.rule
2424"""
2525
26+ import typing
27+ from copy import deepcopy
2628from functools import cached_property , lru_cache
27- from typing import Optional
29+ from typing import Callable
2830
2931from attrs import define , field , validators
3032
3133from logprep .abc .processor import Processor
3234from logprep .metrics .metrics import GaugeMetric
3335from logprep .processor .base .exceptions import FieldExistsWarning
36+ from logprep .processor .base .rule import Rule
3437from logprep .processor .field_manager .processor import FieldManager
3538from logprep .processor .generic_resolver .rule import GenericResolverRule
36- from logprep .util .helper import add_fields_to , get_dotted_field_value
39+ from logprep .util .helper import (
40+ MISSING ,
41+ FieldValue ,
42+ Missing ,
43+ add_fields_to ,
44+ get_dotted_field_value ,
45+ )
46+ from logprep .util .typing import is_lru_cached
3747
3848
3949class GenericResolver (FieldManager ):
@@ -43,9 +53,7 @@ class GenericResolver(FieldManager):
4353 class Config (Processor .Config ):
4454 """GenericResolver config"""
4555
46- max_cache_entries : Optional [int ] = field (
47- validator = validators .optional (validators .instance_of (int )), default = 0
48- )
56+ max_cache_entries : int = field (validator = validators .instance_of (int ), default = 0 )
4957 """(Optional) Size of cache for results when resolving from a list.
5058 The cache can be disabled by setting this option to :code:`0`.
5159
@@ -56,9 +64,7 @@ class Config(Processor.Config):
5664 and OOM situations caused by the generic resolver cache.
5765
5866 """
59- cache_metrics_interval : Optional [int ] = field (
60- validator = validators .optional (validators .instance_of (int )), default = 1
61- )
67+ cache_metrics_interval : int = field (validator = validators .instance_of (int ), default = 1 )
6268 """(Optional) Cache metrics won't be updated immediately.
6369 Instead updating is skipped for a number of events before it's next update.
6470 :code:`cache_metrics_interval` sets the number of events between updates (default: 1)."""
@@ -104,24 +110,32 @@ class Metrics(FieldManager.Metrics):
104110 rule_class = GenericResolverRule
105111
106112 @property
107- def max_cache_entries (self ):
113+ def config (self ) -> Config :
114+ """Returns the typed GenericResolver.Config"""
115+ return typing .cast (GenericResolver .Config , self ._config )
116+
117+ @property
118+ def max_cache_entries (self ) -> int :
108119 """Returns the configured number of max_cache_entries"""
109- return self ._config .max_cache_entries
120+ return self .config .max_cache_entries
110121
111122 @property
112- def cache_metrics_interval (self ):
123+ def cache_metrics_interval (self ) -> int :
113124 """Returns the configured cache_metrics_interval"""
114- return self ._config .cache_metrics_interval
125+ return self .config .cache_metrics_interval
115126
116127 @cached_property
117- def _get_lru_cached_value_from_list (self ):
118- """Returns lru cashed method to retrieve values from list if configured"""
128+ def _get_lru_cached_value_from_list (
129+ self ,
130+ ) -> Callable [[GenericResolverRule , str ], FieldValue | Missing ]:
131+ """Returns lru cached method to retrieve values from list if configured"""
119132 if self .max_cache_entries <= 0 :
120133 return self ._resolve_value_from_list
121134 return lru_cache (maxsize = self .max_cache_entries )(self ._resolve_value_from_list )
122135
123- def _apply_rules (self , event : dict , rule : GenericResolverRule ) -> None :
136+ def _apply_rules (self , event : dict , rule : Rule ) -> None :
124137 """Apply the given rule to the current event"""
138+ rule = typing .cast (GenericResolverRule , rule )
125139 source_field_values = [
126140 get_dotted_field_value (event , source_field )
127141 for source_field in rule .field_mapping .keys ()
@@ -130,25 +144,30 @@ def _apply_rules(self, event: dict, rule: GenericResolverRule) -> None:
130144 conflicting_fields = []
131145 for source_field , target_field in rule .field_mapping .items ():
132146 source_field_value = str (get_dotted_field_value (event , source_field ))
133- content = self ._find_content_of_first_matching_pattern (rule , source_field_value )
134- if not content :
147+ resolved_content = self ._find_content_of_first_matching_pattern (
148+ rule , source_field_value
149+ )
150+ if resolved_content is MISSING :
135151 continue
136152 current_content = get_dotted_field_value (event , target_field )
137- if isinstance (current_content , list ) and content in current_content :
153+ if isinstance (current_content , list ) and resolved_content in current_content :
138154 continue
155+ if isinstance (resolved_content , (list , dict )):
156+ resolved_content = deepcopy (resolved_content )
139157 try :
140158 add_fields_to (
141159 event ,
142160 fields = {
143161 target_field : (
144- [content ]
162+ [resolved_content ]
145163 if rule .merge_with_target and current_content is None
146- else content
164+ else resolved_content
147165 )
148166 },
149167 rule = rule ,
150168 merge_with_target = rule .merge_with_target ,
151169 overwrite_target = rule .overwrite_target ,
170+ skip_none = False ,
152171 )
153172 except FieldExistsWarning as error :
154173 conflicting_fields .extend (error .skipped_fields )
@@ -160,28 +179,28 @@ def _apply_rules(self, event: dict, rule: GenericResolverRule) -> None:
160179
161180 def _find_content_of_first_matching_pattern (
162181 self , rule : GenericResolverRule , source_field_value : str
163- ) -> str | None :
182+ ) -> FieldValue | Missing :
164183 if rule .resolve_from_file :
165184 matches = rule .pattern .match (source_field_value )
166185 if matches :
167186 mapping = matches .group ("mapping" )
168187 if rule .ignore_case :
169188 mapping = mapping .upper ()
170- content = rule .additions .get (mapping )
171- if content :
189+ content = rule .additions .get (mapping , MISSING )
190+ if content is not MISSING :
172191 return content
173192 return self ._get_lru_cached_value_from_list (rule , source_field_value )
174193
175194 def _resolve_value_from_list (
176195 self , rule : GenericResolverRule , source_field_value : str
177- ) -> Optional [ str ] :
196+ ) -> FieldValue | Missing :
178197 for pattern , content in rule .compiled_resolve_list :
179198 if pattern .search (source_field_value ):
180199 return content
181- return None
200+ return MISSING
182201
183- def _update_cache_metrics (self ):
184- if self .max_cache_entries <= 0 :
202+ def _update_cache_metrics (self ) -> None :
203+ if not is_lru_cached ( self ._get_lru_cached_value_from_list ) :
185204 return
186205 self ._cache_metrics_skip_count += 1
187206 if self ._cache_metrics_skip_count < self .cache_metrics_interval :
@@ -192,8 +211,8 @@ def _update_cache_metrics(self):
192211 self .metrics .new_results += cache_info .misses
193212 self .metrics .cached_results += cache_info .hits
194213 self .metrics .num_cache_entries += cache_info .currsize
195- self .metrics .cache_load += cache_info .currsize / cache_info . maxsize
214+ self .metrics .cache_load += cache_info .currsize / self . max_cache_entries
196215
197- def setup (self ):
216+ def setup (self ) -> None :
198217 super ().setup ()
199218 self ._cache_metrics_skip_count = 0
0 commit comments