Skip to content

Commit a63ee3a

Browse files
authored
[source_ref] include the ending line of a log statement (ttiimm#13)
If a log statement is broken up over multiple lines, the user might refer to any of those lines when setting a breakpoint. So, we need to include the ending line number in the SourceRef. Files: * lib.rs: Add a method to get the StatementsInFile object from the LogMatcher. This is needed to handle a breakpoint request. Also, adjust the java tree-sitter query so template strings are handled using Named placeholders. * source_hier.rs: Completely ignore some directories in the source hierarchy, like version control ones. Add methods to find a file in the source hierarchy. * source_query.rs: Only return results from captures after the format string. * source_ref.rs: Add ending line number. * test_java.rs: Add a java test that uses slf4j-style log statements. * [source_ref] add (?s) to match multi-line values
1 parent 39189f8 commit a63ee3a

17 files changed

+214
-80
lines changed

src/lib.rs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub enum LogError {
6161
/// Collection of log statements in a single source file
6262
#[derive(Debug)]
6363
pub struct StatementsInFile {
64-
pub filename: String,
64+
pub path: String,
6565
id: SourceFileID,
6666
pub log_statements: Vec<SourceRef>,
6767
/// A single matcher for all log statements.
@@ -113,14 +113,24 @@ impl LogMatcher {
113113
}
114114

115115
/// Check if the given path is covered by any of the roots in this matcher.
116-
pub fn match_path(&self, path: &Path) -> Option<PathBuf> {
116+
pub fn match_path(&self, path: &Path) -> Option<(&PathBuf, &SourceTree)> {
117117
self.roots
118118
.iter()
119119
.filter(|(existing_path, _coll)| path.starts_with(existing_path))
120-
.map(|(path, _coll)| path.clone())
121120
.next()
122121
}
123122

123+
pub fn find_source_file_statements(&self, path: &Path) -> Option<&StatementsInFile> {
124+
if let Some((_root_path, src_tree)) = self.match_path(path) {
125+
src_tree
126+
.tree
127+
.find_file(path)
128+
.and_then(|info| src_tree.files_with_statements.get(&info.id))
129+
} else {
130+
None
131+
}
132+
}
133+
124134
/// Traverse the roots looking for supported source files.
125135
#[must_use]
126136
pub fn discover_sources(&mut self, tracker: &ProgressTracker) -> Vec<LogError> {
@@ -197,7 +207,7 @@ impl LogMatcher {
197207
// XXX this block and the else are basically the same, try to refactor
198208
coll.files_with_statements
199209
.values()
200-
.filter(|stmts| stmts.filename.contains(filename))
210+
.filter(|stmts| stmts.path.contains(filename))
201211
.flat_map(|stmts| {
202212
let file_matches = stmts.matcher.matches(body);
203213
match file_matches.iter().next() {
@@ -231,7 +241,7 @@ impl LogMatcher {
231241
}
232242
}
233243

234-
#[derive(Debug, PartialEq, Copy, Clone, Serialize)]
244+
#[derive(Debug, Eq, PartialEq, Copy, Clone, Serialize)]
235245
pub enum SourceLanguage {
236246
Rust,
237247
Java,
@@ -254,6 +264,14 @@ const IDENTS_JAVA: &[&str] = &["logger", "log", "fine", "debug", "info", "warn",
254264
const IDENTS_CPP: &[&str] = &["debug", "info", "warn", "trace"];
255265

256266
impl SourceLanguage {
267+
pub fn as_str(&self) -> &'static str {
268+
match self {
269+
SourceLanguage::Rust => "Rust",
270+
SourceLanguage::Java => "Java",
271+
SourceLanguage::Cpp => "C++",
272+
}
273+
}
274+
257275
fn from_extension(extension: &OsStr) -> Option<Self> {
258276
match extension.to_str() {
259277
Some("rs") => Some(Self::Rust),
@@ -288,12 +306,11 @@ impl SourceLanguage {
288306
(method_invocation
289307
object: (identifier) @object-name
290308
name: (identifier) @method-name
291-
arguments: (argument_list [
292-
(_ (string_literal) @log (_ (this)? @this (identifier) @arguments))
293-
(_ (string_literal (_ (this)? @this (identifier) @arguments)) @log)
294-
(string_literal) @log (this)? @this (identifier) @arguments
295-
(string_literal) @log (this)? @this
296-
])
309+
arguments: [
310+
(argument_list (template_expression
311+
template_argument: (string_literal) @arguments))
312+
(argument_list (string_literal) @arguments)
313+
]
297314
(#match? @object-name "log(ger)?|LOG(GER)?")
298315
(#match? @method-name "fine|debug|info|warn|trace")
299316
)
@@ -490,7 +507,7 @@ pub fn extract_logging(sources: &[CodeSource], tracker: &ProgressTracker) -> Vec
490507
matched.push(src_ref);
491508
}
492509
}
493-
"identifier" | "this" => {
510+
"args" | "this" => {
494511
if !matched.is_empty() {
495512
let range = result.range;
496513
let source = code.buffer.as_str();
@@ -506,6 +523,7 @@ pub fn extract_logging(sources: &[CodeSource], tracker: &ProgressTracker) -> Vec
506523
{
507524
let length = matched.len() - 1;
508525
let prior_result: &mut SourceRef = matched.get_mut(length).unwrap();
526+
prior_result.end_line_no = result.range.end_point.row + 1;
509527
prior_result.vars.push(text.trim().to_string());
510528
}
511529
}
@@ -519,7 +537,7 @@ pub fn extract_logging(sources: &[CodeSource], tracker: &ProgressTracker) -> Vec
519537
None
520538
} else {
521539
Some(StatementsInFile {
522-
filename: matched.first().unwrap().source_path.clone(),
540+
path: matched.first().unwrap().source_path.clone(),
523541
id: code.info.id,
524542
log_statements: matched,
525543
matcher: RegexSet::new(patterns).expect("To combine patterns"),

src/snapshots/log2src__source_hier__test__with_resources_dir-3.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ expression: new_and_updated_events
77
- 1
88
- DeletedFile:
99
- test_java.rs
10-
- 5
10+
- 6
1111
- NewFile:
1212
- Basic.java
1313
- language: Java
14-
id: 7
14+
id: 8
1515
- NewFile:
1616
- new.rs
1717
- language: Rust
18-
id: 8
18+
id: 9

src/snapshots/log2src__source_hier__test__with_resources_dir-4.snap

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@ expression: deleted_dir_events
44
---
55
- DeletedFile:
66
- BasicWithUpper.java
7-
- 4
7+
- 5
88
- DeletedFile:
99
- BasicWithLog.java
10-
- 3
10+
- 4
1111
- DeletedFile:
1212
- BasicWithCustom.java
13+
- 3
14+
- DeletedFile:
15+
- BasicSlf4j.java
1316
- 2
1417
- DeletedFile:
1518
- Basic.java
16-
- 7
19+
- 8

src/snapshots/log2src__source_hier__test__with_resources_dir.snap

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,25 @@ expression: events
55
- NewFile:
66
- test_rust.rs
77
- language: Rust
8-
id: 6
8+
id: 7
99
- NewFile:
1010
- test_java.rs
1111
- language: Rust
12-
id: 5
12+
id: 6
1313
- NewFile:
1414
- BasicWithUpper.java
1515
- language: Java
16-
id: 4
16+
id: 5
1717
- NewFile:
1818
- BasicWithLog.java
1919
- language: Java
20-
id: 3
20+
id: 4
2121
- NewFile:
2222
- BasicWithCustom.java
23+
- language: Java
24+
id: 3
25+
- NewFile:
26+
- BasicSlf4j.java
2327
- language: Java
2428
id: 2
2529
- NewFile:

src/source_hier.rs

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ use serde::Serialize;
33
use std::cell::RefCell;
44
use std::collections::BTreeMap;
55
use std::ffi::{OsStr, OsString};
6-
use std::path::{Path, PathBuf};
6+
use std::path::{Component, Path, PathBuf};
77
use std::time::SystemTime;
88
use std::{fs, io};
99

10+
fn is_ignored_dir(name: &OsStr) -> bool {
11+
name == ".git" || name == ".hg" || name == ".svn" || name == ".vscode"
12+
}
13+
1014
/// Result of a shallow check of a file system path. Mainly interested in getting a directory
1115
/// listing without descending into the child trees.
1216
enum ShallowCheckResult {
@@ -24,7 +28,7 @@ enum ShallowCheckResult {
2428
pub struct SourceFileID(usize);
2529

2630
/// A summary of a source code file
27-
#[derive(Copy, Clone, Debug, Serialize)]
31+
#[derive(Copy, Clone, Debug, Serialize, Eq, PartialEq)]
2832
pub struct SourceFileInfo {
2933
pub language: SourceLanguage,
3034
pub id: SourceFileID,
@@ -80,6 +84,7 @@ impl SourceHierContent {
8084
Ok(entries) => Self::Directory {
8185
entries: entries
8286
.into_iter()
87+
.filter(|entry| !is_ignored_dir(&entry.0))
8388
.map(|(entry_name, meta)| {
8489
(
8590
entry_name.to_os_string(),
@@ -210,7 +215,8 @@ impl SourceHierContent {
210215
let mut new_entries: Vec<(OsString, Result<fs::Metadata, io::Error>)> =
211216
Vec::new();
212217
for (name, meta) in latest_entries {
213-
if let Some(existing_entry) = entries.get_mut(&name) {
218+
if is_ignored_dir(&name.as_os_str()) {
219+
} else if let Some(existing_entry) = entries.get_mut(&name) {
214220
existing_entry.sync(&path.join(&name), meta, deleted_events)
215221
} else {
216222
new_entries.push((name, meta));
@@ -229,6 +235,24 @@ impl SourceHierContent {
229235
};
230236
true
231237
}
238+
239+
pub fn find_file(&self, self_path: &Path, desired_path: &Path) -> Option<SourceFileInfo> {
240+
match self {
241+
SourceHierContent::File { info, .. } if desired_path == Path::new("") => Some(*info),
242+
SourceHierContent::Directory { ref entries } => {
243+
let mut components = desired_path.components();
244+
if let Some(Component::Normal(name)) = components.next() {
245+
if let Some(node) = entries.get(name) {
246+
return node
247+
.content
248+
.find_file(&self_path.join(name), components.as_path());
249+
}
250+
}
251+
None
252+
}
253+
_ => None,
254+
}
255+
}
232256
}
233257

234258
/// A node in the SourceHierTree. It contains information that is common to all types of content
@@ -432,6 +456,13 @@ impl SourceHierTree {
432456
}
433457
}
434458

459+
pub fn find_file(&self, path: &Path) -> Option<SourceFileInfo> {
460+
match path.strip_prefix(&self.root_path) {
461+
Ok(sub_path) => self.root_node.content.find_file(&self.root_path, sub_path),
462+
Err(_) => None,
463+
}
464+
}
465+
435466
/// Visit every node in the hierarchy, depth-first, calling `f` on each.
436467
pub fn visit<F>(&self, mut f: F)
437468
where
@@ -468,17 +499,16 @@ impl SourceHierTree {
468499

469500
#[cfg(test)]
470501
mod test {
471-
use crate::source_hier::{ScanEvent, SourceHierTree};
502+
use crate::source_hier::{ScanEvent, SourceFileID, SourceFileInfo, SourceHierTree};
503+
use crate::SourceLanguage;
472504
use fs_extra::dir::copy;
473505
use fs_extra::dir::CopyOptions;
474506
use insta::assert_yaml_snapshot;
475507
use std::fs;
476508
use std::fs::File;
477509
use std::io::Write;
478-
use std::ops::Sub;
479510
use std::path::Path;
480511
use std::path::PathBuf;
481-
use std::time::{Duration, SystemTime};
482512
use tempfile::{tempdir, TempDir};
483513

484514
fn setup_test_environment(source_dir: &Path) -> TempDir {
@@ -511,6 +541,10 @@ mod test {
511541
fn test_with_resources_dir() {
512542
let tests_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests");
513543
let temp_test_dir = setup_test_environment(&tests_path);
544+
let _ = fs::create_dir(temp_test_dir.path().join(".git")).unwrap();
545+
let _ = File::create_new(temp_test_dir.path().join(".git/config"))
546+
.unwrap().write("abc".as_bytes())
547+
.unwrap();
514548
let basic_path = temp_test_dir.path().join("tests/java/Basic.java");
515549
{
516550
let metadata = fs::metadata(&basic_path).unwrap();
@@ -522,6 +556,14 @@ mod test {
522556
tree.sync();
523557
let events: Vec<ScanEvent> = tree.scan().map(redact_event).collect();
524558
assert_yaml_snapshot!(events);
559+
let find_res = tree.find_file(&basic_path);
560+
assert_eq!(
561+
find_res,
562+
Some(SourceFileInfo {
563+
language: SourceLanguage::Java,
564+
id: SourceFileID(1)
565+
})
566+
);
525567
let no_events: Vec<ScanEvent> = tree.scan().map(redact_event).collect();
526568
assert_yaml_snapshot!(no_events);
527569
let _ = fs::remove_file(temp_test_dir.path().join("tests/test_java.rs")).unwrap();

src/source_query.rs

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::ops::Range;
2-
32
use tree_sitter::{
43
Language, Node, Parser, Point, Query, QueryCursor, Range as TSRange, StreamingIterator, Tree,
54
};
@@ -42,8 +41,16 @@ impl<'a> SourceQuery<'a> {
4241
let mut results = Vec::new();
4342
let matches = cursor.matches(&query, self.tree.root_node(), self.source.as_bytes());
4443
matches.for_each(|m| {
44+
let mut got_string_literal = false;
4545
for capture in m.captures {
4646
let mut child = capture.node;
47+
if child.kind() == "string_literal" {
48+
// only return results after the format string literal, other captures
49+
// are not relevant.
50+
got_string_literal = true;
51+
} else if !got_string_literal {
52+
continue;
53+
}
4754
let mut arg_start: Option<(usize, Point)> = None;
4855

4956
if filter_idx.is_none() || filter_idx.is_some_and(|f| f == capture.index) {
@@ -52,24 +59,24 @@ impl<'a> SourceQuery<'a> {
5259
range: capture.node.range(),
5360
name_range: Self::find_fn_range(child),
5461
});
55-
}
56-
while let Some(next_child) = child.next_sibling() {
57-
if matches!(next_child.kind(), "," | ")") {
58-
if let Some(start) = arg_start {
59-
results.push(QueryResult {
60-
kind: "identifier".to_string(),
61-
range: TSRange {
62-
start_byte: start.0,
63-
start_point: start.1,
64-
end_byte: next_child.start_byte(),
65-
end_point: next_child.start_position(),
66-
},
67-
name_range: Self::find_fn_range(child),
68-
});
62+
while let Some(next_child) = child.next_sibling() {
63+
if matches!(next_child.kind(), "," | ")") {
64+
if let Some(start) = arg_start {
65+
results.push(QueryResult {
66+
kind: "args".to_string(),
67+
range: TSRange {
68+
start_byte: start.0,
69+
start_point: start.1,
70+
end_byte: next_child.start_byte(),
71+
end_point: next_child.start_position(),
72+
},
73+
name_range: Self::find_fn_range(child),
74+
});
75+
}
76+
arg_start = Some((next_child.end_byte(), next_child.end_position()));
6977
}
70-
arg_start = Some((next_child.end_byte(), next_child.end_position()));
78+
child = next_child;
7179
}
72-
child = next_child;
7380
}
7481
}
7582
});

0 commit comments

Comments
 (0)