Skip to content

Commit 9d6c5ff

Browse files
Pablu23mhoff
andauthored
feat: make mapping of Generic Resolver yaml compliant (#928)
* Add list[tuple[str]] as valid resolve_list type, change test to account for tuple, COMMITED WITH NO-VERIFY * Add converter func * Apparently resolve_list is a mapping from str to dict * Add test to test converter * update changelog * Add typevar and rename func * Rename INPUT_TYPE and add KEY_TYPE, extract to own utils file, with own utils test, add wrapper function and keep merge_dicts relativly clean * Remove assert isinstance for now, rework converters, update tests * Add FieldValue as Dict value * pull up path * Update tests and add doc * Update logprep/processor/generic_resolver/rule.py Co-authored-by: Michael Hoff <mail@michael-hoff.net> * fix review comments * Convert from file load, fix tests, fix Key and Value vartypes * Upgrade uv.lock for protobuf cve fix * Update logprep/processor/generic_resolver/rule.py Co-authored-by: Michael Hoff <9436725+mhoff@users.noreply.github.com> * Rename test id, add pydoc to converters * Split long comment line into multiple * Change test to expect ValueError not InvalidConfigurationError --------- Co-authored-by: Michael Hoff <mail@michael-hoff.net> Co-authored-by: Michael Hoff <9436725+mhoff@users.noreply.github.com>
1 parent 2d8f9f4 commit 9d6c5ff

File tree

7 files changed

+582
-429
lines changed

7 files changed

+582
-429
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* simplify Dockerfile and remove docker build support for `LOGPREP_VERSION`
99

1010
### Bugfix
11+
* generic resolver now follows yaml standard and accepts a list instead of relying on the ordering of a dict
1112

1213
## 18.0.1
1314
### Breaking

examples/exampledata/config/pipeline.yml

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ logger:
99
format: "%(asctime)-15s %(hostname)-5s %(name)-10s %(levelname)-8s: %(message)s"
1010
datefmt: "%Y-%m-%d %H:%M:%S"
1111
loggers:
12-
"py.warnings": {"level": "ERROR"}
13-
"Runner": {"level": "DEBUG"}
14-
"Processor": {"level": "ERROR"}
15-
"Exporter": {"level": "ERROR"}
16-
"uvicorn": {"level": "ERROR"}
17-
"uvicorn.access": {"level": "ERROR"}
18-
"OpenSearchOutput": {"level": "ERROR"}
12+
"py.warnings": { "level": "ERROR" }
13+
"Runner": { "level": "DEBUG" }
14+
"Processor": { "level": "ERROR" }
15+
"Exporter": { "level": "ERROR" }
16+
"uvicorn": { "level": "ERROR" }
17+
"uvicorn.access": { "level": "ERROR" }
18+
"OpenSearchOutput": { "level": "ERROR" }
1919
metrics:
2020
enabled: true
2121
port: 8001
@@ -73,6 +73,15 @@ pipeline:
7373
calculator:
7474
target_field: "calculation"
7575
calc: "1 + 1"
76+
- generic_resolver:
77+
type: generic_resolver
78+
rules:
79+
- filter: "test_label"
80+
generic_resolver:
81+
field_mapping:
82+
test_label: resolved
83+
resolve_list:
84+
- .*Hello.*: Greeting
7685
input:
7786
kafka:
7887
type: confluentkafka_input

logprep/processor/generic_resolver/rule.py

Lines changed: 58 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,23 @@
2828
resolve_list:
2929
.*Hello.*: Greeting
3030
31+
For YAML compliance, it is possible to declare the resolve list as follows
32+
to maintain ordering when using the configuration file with different programs.
33+
Both styles will be supported in future; however, this one is recommended for clarity and YAML compliance.
34+
35+
.. code-block:: yaml
36+
:linenos:
37+
:caption: Example
38+
39+
filter: to_resolve
40+
generic_resolver:
41+
field_mapping:
42+
to_resolve: resolved
43+
resolve_list:
44+
- .*Hello.*: Greeting
45+
- .*error.*: Error
46+
- never_match: Panic
47+
3148
Alternatively, a YML file with a resolve list and a regex pattern can be used to resolve values.
3249
For this, a field :code:`resolve_from_file` with the subfields :code:`path` and :code:`pattern`
3350
must be added.
@@ -117,15 +134,19 @@
117134
"""
118135

119136
import re
120-
from functools import cached_property
137+
import typing
138+
from functools import cached_property, partial
121139
from pathlib import Path
122-
from typing import List, Optional, Tuple, Union
123140

124141
from attrs import define, field, validators
125142

126143
from logprep.factory_error import InvalidConfigurationError
127144
from logprep.processor.field_manager.rule import FieldManagerRule
145+
from logprep.util.converters import (
146+
convert_ordered_mapping_or_keep_mapping,
147+
)
128148
from logprep.util.getter import GetterFactory, RefreshableGetter
149+
from logprep.util.helper import FieldValue
129150

130151

131152
class GenericResolverRule(FieldManagerRule):
@@ -145,15 +166,22 @@ class Config(FieldManagerRule.Config):
145166
]
146167
)
147168
"""Mapping in form of :code:`{SOURCE_FIELD: DESTINATION_FIELD}`"""
148-
resolve_list: dict = field(validator=(validators.instance_of(dict)), factory=dict)
169+
resolve_list: dict[str, FieldValue] = field(
170+
validator=validators.deep_mapping(
171+
key_validator=validators.instance_of(str),
172+
mapping_validator=validators.instance_of(dict),
173+
),
174+
converter=convert_ordered_mapping_or_keep_mapping,
175+
factory=dict,
176+
)
149177
"""lookup mapping in form of
150178
:code:`{REGEX_PATTERN_0: ADDED_VALUE_0, ..., REGEX_PATTERN_N: ADDED_VALUE_N}`"""
151179
resolve_from_file: dict = field(
152180
validator=[
153181
validators.instance_of(dict),
154182
validators.deep_mapping(
155183
key_validator=validators.in_(["path", "pattern"]),
156-
value_validator=validators.instance_of(Union[str, int]),
184+
value_validator=validators.instance_of((str, int)),
157185
),
158186
],
159187
factory=dict,
@@ -178,38 +206,39 @@ class Config(FieldManagerRule.Config):
178206
authenticity and integrity of the loaded values.
179207
180208
"""
181-
ignore_case: Optional[str] = field(validator=validators.instance_of(bool), default=False)
209+
ignore_case: bool = field(validator=validators.instance_of(bool), default=False)
182210
"""(Optional) Ignore case when matching resolve values. Defaults to :code:`False`."""
183211

184212
additions: dict = field(default={}, eq=False, init=False)
185213
"""Contains a dictionary of field names and values that should be added."""
186214

187215
@property
188-
def _file_path(self):
216+
def _file_path(self) -> None | str:
189217
"""Returns the file path"""
190218
return self.resolve_from_file.get("path")
191219

192220
def __attrs_post_init__(self):
193221
if self._file_path:
194222
getter = GetterFactory.from_string(self._file_path)
195223
if isinstance(getter, RefreshableGetter):
196-
getter.add_callback(self._add_from_path)
197-
self._add_from_path()
224+
getter.add_callback(partial(self._add_from_path, self._file_path))
225+
self._add_from_path(self._file_path)
198226

199-
def _add_from_path(self):
227+
def _add_from_path(self, path: str):
200228
self._raise_if_pattern_is_invalid()
201-
self._raise_if_file_does_not_exist()
202-
additions = self._get_additions()
229+
self._raise_if_file_does_not_exist(path)
230+
additions = self._get_additions_from_path(path)
203231
if self.ignore_case:
204232
additions = {key.upper(): value for key, value in additions.items()}
205233
self.additions = additions
206234

207-
def _get_additions(self) -> dict:
235+
def _get_additions_from_path(self, path: str) -> dict:
208236
try:
209-
additions = GetterFactory.from_string(self._file_path).get_dict()
237+
additions = GetterFactory.from_string(path).get_collection()
238+
additions = convert_ordered_mapping_or_keep_mapping(additions)
210239
except ValueError as error:
211240
raise InvalidConfigurationError(
212-
f"Error loading additions from '{self._file_path}': {error}"
241+
f"Error loading additions from '{path}': {error}"
213242
) from error
214243
return additions
215244

@@ -219,39 +248,44 @@ def _raise_if_pattern_is_invalid(self):
219248
f"Mapping group is missing in mapping file pattern! (Rule ID: '{self.id}')"
220249
)
221250

222-
def _raise_if_file_does_not_exist(self):
223-
if not (self._file_path.startswith("http") or Path(self._file_path).is_file()):
251+
def _raise_if_file_does_not_exist(self, path: str):
252+
if not (path.startswith("http") or Path(path).is_file()):
224253
raise InvalidConfigurationError(
225-
f"Additions file '{self._file_path}' not found! (Rule ID: '{self.id}')",
254+
f"Additions file '{path}' not found! (Rule ID: '{self.id}')",
226255
)
227256

257+
@property
258+
def config(self) -> Config:
259+
"""Returns the typed GenericResolverRule.Config"""
260+
return typing.cast(GenericResolverRule.Config, self._config)
261+
228262
@property
229263
def field_mapping(self) -> dict:
230264
"""Returns the field mapping"""
231-
return self._config.field_mapping
265+
return self.config.field_mapping
232266

233267
@property
234268
def resolve_list(self) -> dict:
235269
"""Returns the resolve list"""
236-
return self._config.resolve_list
270+
return self.config.resolve_list
237271

238272
@cached_property
239-
def compiled_resolve_list(self) -> List[Tuple[re.Pattern, str]]:
273+
def compiled_resolve_list(self) -> list[tuple[re.Pattern, FieldValue]]:
240274
"""Returns the resolve list with tuple pairs of compiled patterns and values"""
241275
return [
242276
(re.compile(pattern, re.I if self.ignore_case else 0), val)
243-
for pattern, val in self._config.resolve_list.items()
277+
for pattern, val in self.config.resolve_list.items()
244278
]
245279

246280
@property
247281
def resolve_from_file(self) -> dict:
248282
"""Returns the resolve file"""
249-
return self._config.resolve_from_file
283+
return self.config.resolve_from_file
250284

251285
@property
252286
def ignore_case(self) -> bool:
253287
"""Returns if the matching should be case-sensitive or not"""
254-
return self._config.ignore_case
288+
return self.config.ignore_case
255289

256290
@cached_property
257291
def pattern(self) -> re.Pattern:
@@ -261,4 +295,4 @@ def pattern(self) -> re.Pattern:
261295
@property
262296
def additions(self) -> dict:
263297
"""Returns additions from the resolve file"""
264-
return self._config.additions
298+
return self.config.additions

logprep/util/converters.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import TypeVar
2+
3+
Key = TypeVar("Key")
4+
Value = TypeVar("Value")
5+
6+
7+
def convert_ordered_mapping_or_keep_mapping(
8+
dict_or_sequence: dict[Key, Value] | list[dict[Key, Value]],
9+
) -> dict[Key, Value]:
10+
"""Convert a list of key Values to a single dict, with no reocurring keys and only single item dicts, if input is already singular dict, return early"""
11+
if isinstance(dict_or_sequence, dict):
12+
return dict_or_sequence
13+
14+
if not isinstance(dict_or_sequence, list):
15+
raise ValueError("expected list")
16+
17+
return convert_ordered_mapping(dict_or_sequence)
18+
19+
20+
def convert_ordered_mapping(dicts: list[dict[Key, Value]]) -> dict[Key, Value]:
21+
"""Convert a list of key Values to a single dict, with no reocurring keys and only single item dicts"""
22+
ordered_mapping = {}
23+
for element in dicts:
24+
keys = list(element.keys())
25+
if len(keys) != 1:
26+
raise ValueError("dict has not exactly one key")
27+
if keys[0] in ordered_mapping:
28+
raise ValueError("dict already has key")
29+
ordered_mapping[keys[0]] = element[keys[0]]
30+
31+
return ordered_mapping

0 commit comments

Comments
 (0)