Skip to content

Commit 82a4663

Browse files
author
ashariyar
committed
rename highlight-patterns()
1 parent 6376826 commit 82a4663

File tree

3 files changed

+30
-34
lines changed

3 files changed

+30
-34
lines changed

pdfalyzer/output/theme.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pdfalyzer.util import adobe_strings
2323
from pdfalyzer.util.helpers.collections_helper import prefix_keys, safe_json
2424
from pdfalyzer.util.helpers.rich_helper import vertically_padded_panel
25-
from pdfalyzer.util.helpers.string_helper import regex_to_highlight_pattern
25+
from pdfalyzer.util.helpers.string_helper import highlight_pattern
2626

2727
ClassStyle = namedtuple('ClassStyle', ['cls', 'style'])
2828

@@ -73,6 +73,8 @@
7373
ClassStyle(NoneType, NULL_STYLE),
7474
]
7575

76+
PDF_OBJ_TYPE_STYLE_DICT = {f"{cs.cls.__name__}": cs.style for cs in PDF_OBJ_TYPE_STYLES}
77+
7678
# Subclasses of the key type will be styled with the value string
7779
OBJ_TYPE_STYLES = PDF_OBJ_TYPE_STYLES + [
7880
ClassStyle(Number, 'cyan bold'),
@@ -146,28 +148,22 @@
146148
adobe_strings.TRUE: 'green bold',
147149
})
148150

151+
# Compile regexes as keys
152+
NODE_STYLE_REGEXES = {re.compile(k): v for k, v in NODE_STYLES_BASE_DICT.items()}
153+
154+
# Unite class styles for things like ArrayObject with node styles for things like /Parent
149155
NODE_STYLES_THEME_DICT = {
150156
**PdfHighlighter.prefix_styles({k.removeprefix('/'): v for k, v in NODE_STYLES_BASE_DICT.items()}),
151-
**PdfHighlighter.prefix_styles({f"{cs.cls.__name__}": cs.style for cs in PDF_OBJ_TYPE_STYLES})
157+
**PdfHighlighter.prefix_styles(PDF_OBJ_TYPE_STYLE_DICT)
152158
}
153159

160+
# Merge all the theme dicts
154161
LOG_THEME_DICT = LogHighlighter.prefix_styles(LOG_HIGHLIGHT_STYLES)
155162
COMPLETE_THEME_DICT = {**PDFALYZER_THEME_DICT, **LOG_THEME_DICT, **NODE_STYLES_THEME_DICT}
156163

157-
# Compile regexes as keys
158-
NODE_STYLE_REGEXES = {re.compile(k): v for k, v in NODE_STYLES_BASE_DICT.items()}
159-
160-
161164
# Add patterns to highlighters
162-
LogHighlighter.set_highlights(
163-
LOG_HIGHLIGHT_PATTERNS +
164-
[regex_to_highlight_pattern(cs.cls.__name__) for cs in PDF_OBJ_TYPE_STYLES]
165-
)
166-
167-
PdfHighlighter.set_highlights(
168-
[regex_to_highlight_pattern(r) for r in NODE_STYLE_REGEXES.keys()]
169-
)
170-
165+
LogHighlighter.set_highlights(LOG_HIGHLIGHT_PATTERNS + [highlight_pattern(k) for k in PDF_OBJ_TYPE_STYLE_DICT.keys()])
166+
PdfHighlighter.set_highlights([highlight_pattern(r) for r in NODE_STYLE_REGEXES.keys()])
171167

172168
# Push themes into the console objects that manage stdout.
173169
console.push_theme(Theme(COMPLETE_THEME_DICT))

pdfalyzer/util/helpers/string_helper.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ def has_a_common_substring(strings: list[str]) -> bool:
7070
return all([is_substring_of_longer_strings_in_list(s, strings) for s in strings])
7171

7272

73+
def highlight_pattern(regex: re.Pattern | str) -> str:
74+
"""Build a rich.Highlighter style pattern, e.g. `(?P<stream_object>((De|En)coded)?StreamObject)`."""
75+
pattern = regex.pattern if isinstance(regex, re.Pattern) else regex
76+
label = regex_to_capture_group_label(regex)
77+
78+
if len(pattern) <= 2:
79+
pattern = fr"^{pattern}$"
80+
else:
81+
pattern = fr"{pattern}\b".removeprefix('/')
82+
83+
if not pattern.startswith('^'):
84+
pattern = fr"[\b/]{pattern}"
85+
86+
return fr"(?P<{label}>{pattern})"
87+
88+
7389
def is_array_idx(address: str) -> bool:
7490
"""True if address looks like '[23]'."""
7591
return bool(ARRAY_IDX_REGEX.match(address))
@@ -118,22 +134,6 @@ def regex_to_capture_group_label(pattern: re.Pattern | str) -> str:
118134
return NON_WORD_CHAR_REGEX.sub('', pattern.replace('|', '_'))
119135

120136

121-
def regex_to_highlight_pattern(regex: re.Pattern | str) -> str:
122-
"""(?P<stream_object>((De|En)coded)?StreamObject)."""
123-
pattern = regex.pattern if isinstance(regex, re.Pattern) else regex
124-
label = regex_to_capture_group_label(regex)
125-
126-
if len(pattern) <= 2:
127-
pattern = fr"^{pattern}$"
128-
else:
129-
pattern = fr"{pattern}\b".removeprefix('/')
130-
131-
if not pattern.startswith('^'):
132-
pattern = fr"[\b/]{pattern}"
133-
134-
return fr"(?P<{label}>{pattern})"
135-
136-
137137
def replace_digits(string_with_digits: str) -> str:
138138
"""Turn all digits to X chars in a string."""
139139
return DIGIT_REGEX.sub('x', string_with_digits)

tests/pdfalyzer/helpers/test_string_helper.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ def test_regex_to_capture_group_label():
3636

3737

3838
def test_regex_to_highlight_pattern():
39-
assert regex_to_highlight_pattern(OBJ_REGEX) == r"(?P<JavaScript_JS_OpenAction>^(JavaScript|JS|OpenAction)\b)"
40-
assert regex_to_highlight_pattern(OBJ_REGEX.pattern[1:]) == r"(?P<JavaScript_JS_OpenAction>[\b/](JavaScript|JS|OpenAction)\b)"
41-
assert regex_to_highlight_pattern(re.compile('/W')) == r"(?P<W>^/W$)"
39+
assert highlight_pattern(OBJ_REGEX) == r"(?P<JavaScript_JS_OpenAction>^(JavaScript|JS|OpenAction)\b)"
40+
assert highlight_pattern(OBJ_REGEX.pattern[1:]) == r"(?P<JavaScript_JS_OpenAction>[\b/](JavaScript|JS|OpenAction)\b)"
41+
assert highlight_pattern(re.compile('/W')) == r"(?P<W>^/W$)"

0 commit comments

Comments
 (0)