Skip to content

Commit 1636eb0

Browse files
authored
[pr] changes based on feedback (#16)
Files: * lib.rs: Fix Java placeholder regex, it was consuming too much stuff in between the curlies. * source_ref.rs: Add comments for the regexes. Don't reuse `text` in `build_matcher()`, it's really confusing.
1 parent 414b7f1 commit 1636eb0

File tree

3 files changed

+47
-18
lines changed

3 files changed

+47
-18
lines changed

src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ static RUST_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
279279
});
280280

281281
static JAVA_PLACEHOLDER_REGEX: LazyLock<Regex> =
282-
LazyLock::new(|| Regex::new(r#"\{.*}|\\\{(.*)}"#).unwrap());
282+
LazyLock::new(|| Regex::new(r#"\{[^}]*}|\\\{([^}]*)}"#).unwrap());
283283

284284
static CPP_PLACEHOLDER_REGEX: LazyLock<Regex> = LazyLock::new(|| {
285285
Regex::new(r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]|\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"#).unwrap()
@@ -338,7 +338,7 @@ impl SourceLanguage {
338338
arguments: [
339339
(argument_list (template_expression
340340
template_argument: (string_literal) @arguments))
341-
(argument_list (string_literal) @arguments)
341+
(argument_list . (string_literal) @arguments)
342342
]
343343
(#match? @object-name "log(ger)?|LOG(GER)?")
344344
(#match? @method-name "fine|debug|info|warn|trace")
@@ -586,7 +586,7 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec
586586
let range = result.range;
587587
let source = code.buffer.as_str();
588588
let text = source[range.start_byte..range.end_byte].to_string();
589-
// println!("text={} matched.len()={}", text, matched.len());
589+
// eprintln!("text={} matched.len()={}", text, matched.len());
590590
// check the text doesn't match any of the logging related identifiers
591591
if code
592592
.info
@@ -921,7 +921,7 @@ def main(args):
921921
logging.info(f'Hello, {args[1]}!')
922922
logger.warning(f"warning message:\nlow disk space")
923923
logger.info(rf"""info message:
924-
processing started -- {args[0]}""")
924+
processing \started -- {args[0]}""")
925925
"#;
926926

927927
#[test]

src/snapshots/log2src__tests__basic_python.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ expression: src_refs
4444
endLineNumber: 7
4545
column: 16
4646
name: main
47-
text: "rf\"\"\"info message:\nprocessing started -- {args[0]}\"\"\""
48-
quality: 33
49-
pattern: "(?s)^info message:\\nprocessing started -- (.+)$"
47+
text: "rf\"\"\"info message:\nprocessing \\started -- {args[0]}\"\"\""
48+
quality: 35
49+
pattern: "(?s)^info message:\\nprocessing \\\\started -- (.+)$"
5050
args:
5151
- Named: "args[0]"
5252
vars: []

src/source_ref.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,19 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
119119
let mut quality = 0;
120120
for cap in language.get_placeholder_regex().captures_iter(text) {
121121
let placeholder = cap.get(0).unwrap();
122-
let text = escape_ignore_newlines(raw, &text[last_end..placeholder.start()]);
123-
quality += text.chars().filter(|c| !c.is_whitespace()).count();
124-
pattern.push_str(text.as_str());
122+
let subtext = escape_ignore_newlines(raw, &text[last_end..placeholder.start()]);
123+
quality += subtext.chars().filter(|c| !c.is_whitespace()).count();
124+
pattern.push_str(subtext.as_str());
125125
last_end = placeholder.end();
126126
pattern.push_str("(.+)");
127127
args.push(language.captures_to_format_arg(&cap));
128128
}
129-
let text = escape_ignore_newlines(raw, &text[last_end..]);
130-
quality += text.chars().filter(|c| !c.is_whitespace()).count();
129+
let subtext = escape_ignore_newlines(raw, &text[last_end..]);
130+
quality += subtext.chars().filter(|c| !c.is_whitespace()).count();
131131
if quality == 0 {
132132
None
133133
} else {
134-
pattern.push_str(text.as_str());
134+
pattern.push_str(subtext.as_str());
135135
pattern.push('$');
136136
Some(MessageMatcher {
137137
matcher: Regex::new(pattern.as_str()).unwrap(),
@@ -142,14 +142,27 @@ fn build_matcher(raw: bool, text: &str, language: SourceLanguage) -> Option<Mess
142142
}
143143
}
144144

145+
/// Regex for finding values that need to be escaped in a string-literal. The components are
146+
/// as follows:
147+
///
148+
/// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
149+
/// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
150+
/// * `\\[0-7]{3}|\\0` - Regex does not support octal escape-sequences, so we need to turn
151+
/// them into a hex escape.
152+
/// * `\\N\{[^}]+}` - Python named-Unicode escape that is turned into a `\w` since it would be
153+
/// challenging to get the names all right.
145154
static ESCAPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
146155
Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\[0-7]{3}|\\0)|(\\N\{[^}]+})"#).unwrap()
147156
});
148157

149-
// A regex for raw strings that doesn't try to interpret escape sequences.
150-
static RAW_ESCAPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
151-
Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap()
152-
});
158+
/// Regex for finding values that need to be escaped in a raw string-literal. The components are
159+
/// as follows:
160+
///
161+
/// * `[.*+?^${}()|\[\]]` - Characters that are used in regexes and need to be escaped.
162+
/// * `[\n\r\t]` - White space characters that we should turn into regex escape sequences.
163+
/// * `\\` - A backslash
164+
static RAW_ESCAPE_REGEX: LazyLock<Regex> =
165+
LazyLock::new(|| Regex::new(r#"([.*+?^${}()|\[\]])|([\n\r\t])|(\\)"#).unwrap());
153166

154167
/// Escape special chars except newlines and carriage returns in order to support multiline strings
155168
fn escape_ignore_newlines(raw: bool, segment: &str) -> String {
@@ -181,7 +194,7 @@ fn escape_ignore_newlines(raw: bool, segment: &str) -> String {
181194
} else if let Some(c) = cap.get(3) {
182195
if raw {
183196
result.push('\\');
184-
result.push_str(c.as_str());
197+
result.push_str(c.as_str());
185198
} else {
186199
let c = c.as_str();
187200
let c = &c[1..];
@@ -303,4 +316,20 @@ mod tests {
303316
matcher.as_str()
304317
);
305318
}
319+
320+
#[test]
321+
fn test_build_matcher_raw() {
322+
let MessageMatcher { matcher, .. } = build_matcher(
323+
true,
324+
"Hard-coded \\Windows\\Path",
325+
SourceLanguage::Rust,
326+
)
327+
.unwrap();
328+
assert_eq!(
329+
Regex::new(r#"(?s)^Hard-coded \\Windows\\Path$"#)
330+
.unwrap()
331+
.as_str(),
332+
matcher.as_str()
333+
);
334+
}
306335
}

0 commit comments

Comments
 (0)