Skip to content

Commit e0a49e2

Browse files
authored
Merge pull request github#12486 from aibaars/windows-long-paths
Ruby: support long paths on Windows
2 parents e4837f7 + 41a53ec commit e0a49e2

File tree

3 files changed

+156
-106
lines changed

3 files changed

+156
-106
lines changed

ruby/extractor/src/extractor.rs

Lines changed: 11 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::diagnostics;
2+
use crate::file_paths;
23
use crate::trap;
34
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
45
use std::collections::BTreeMap as Map;
@@ -9,14 +10,15 @@ use std::path::Path;
910
use tree_sitter::{Language, Node, Parser, Range, Tree};
1011

1112
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
12-
let (file_label, fresh) =
13-
writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
13+
let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
14+
&file_paths::normalize_path(absolute_path),
15+
));
1416
if fresh {
1517
writer.add_tuple(
1618
"files",
1719
vec![
1820
trap::Arg::Label(file_label),
19-
trap::Arg::String(normalize_path(absolute_path)),
21+
trap::Arg::String(file_paths::normalize_path(absolute_path)),
2022
],
2123
);
2224
populate_parent_folders(writer, file_label, absolute_path.parent());
@@ -54,8 +56,9 @@ pub fn populate_parent_folders(
5456
match path {
5557
None => break,
5658
Some(folder) => {
57-
let (folder_label, fresh) =
58-
writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
59+
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
60+
&file_paths::normalize_path(folder),
61+
));
5962
writer.add_tuple(
6063
"containerparent",
6164
vec![
@@ -68,7 +71,7 @@ pub fn populate_parent_folders(
6871
"folders",
6972
vec![
7073
trap::Arg::Label(folder_label),
71-
trap::Arg::String(normalize_path(folder)),
74+
trap::Arg::String(file_paths::normalize_path(folder)),
7275
],
7376
);
7477
path = folder.parent();
@@ -119,8 +122,8 @@ pub fn extract(
119122
path: &Path,
120123
source: &[u8],
121124
ranges: &[Range],
122-
) -> std::io::Result<()> {
123-
let path_str = format!("{}", path.display());
125+
) {
126+
let path_str = file_paths::normalize_path(&path);
124127
let span = tracing::span!(
125128
tracing::Level::TRACE,
126129
"extract",
@@ -150,46 +153,6 @@ pub fn extract(
150153
traverse(&tree, &mut visitor);
151154

152155
parser.reset();
153-
Ok(())
154-
}
155-
156-
/// Normalizes the path according the common CodeQL specification. Assumes that
157-
/// `path` has already been canonicalized using `std::fs::canonicalize`.
158-
fn normalize_path(path: &Path) -> String {
159-
if cfg!(windows) {
160-
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
161-
// have to do a bit of work removing certain prefixes and replacing
162-
// backslashes.
163-
let mut components: Vec<String> = Vec::new();
164-
for component in path.components() {
165-
match component {
166-
std::path::Component::Prefix(prefix) => match prefix.kind() {
167-
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
168-
components.push(format!("{}:", letter as char));
169-
}
170-
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
171-
components.push(x.to_string_lossy().to_string());
172-
}
173-
std::path::Prefix::UNC(server, share)
174-
| std::path::Prefix::VerbatimUNC(server, share) => {
175-
components.push(server.to_string_lossy().to_string());
176-
components.push(share.to_string_lossy().to_string());
177-
}
178-
},
179-
std::path::Component::Normal(n) => {
180-
components.push(n.to_string_lossy().to_string());
181-
}
182-
std::path::Component::RootDir => {}
183-
std::path::Component::CurDir => {}
184-
std::path::Component::ParentDir => {}
185-
}
186-
}
187-
components.join("/")
188-
} else {
189-
// For other operating systems, we can use the canonicalized path
190-
// without modifications.
191-
format!("{}", path.display())
192-
}
193156
}
194157

195158
struct ChildNode {

ruby/extractor/src/file_paths.rs

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
use std::path::{Path, PathBuf};
2+
3+
/// Normalizes the path according the common CodeQL specification. Assumes that
4+
/// `path` has already been canonicalized using `std::fs::canonicalize`.
5+
pub fn normalize_path(path: &Path) -> String {
6+
if cfg!(windows) {
7+
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
8+
// have to do a bit of work removing certain prefixes and replacing
9+
// backslashes.
10+
let mut components: Vec<String> = Vec::new();
11+
for component in path.components() {
12+
match component {
13+
std::path::Component::Prefix(prefix) => match prefix.kind() {
14+
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
15+
components.push(format!("{}:", letter as char));
16+
}
17+
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
18+
components.push(x.to_string_lossy().to_string());
19+
}
20+
std::path::Prefix::UNC(server, share)
21+
| std::path::Prefix::VerbatimUNC(server, share) => {
22+
components.push(server.to_string_lossy().to_string());
23+
components.push(share.to_string_lossy().to_string());
24+
}
25+
},
26+
std::path::Component::Normal(n) => {
27+
components.push(n.to_string_lossy().to_string());
28+
}
29+
std::path::Component::RootDir => {}
30+
std::path::Component::CurDir => {}
31+
std::path::Component::ParentDir => {}
32+
}
33+
}
34+
components.join("/")
35+
} else {
36+
// For other operating systems, we can use the canonicalized path
37+
// without modifications.
38+
format!("{}", path.display())
39+
}
40+
}
41+
42+
/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows.
43+
pub fn path_from_string(path: &str) -> PathBuf {
44+
let mut path = PathBuf::from(path);
45+
// make path absolute
46+
if path.is_relative() {
47+
path = std::env::current_dir().unwrap().join(path)
48+
};
49+
let mut components = path.components();
50+
51+
// make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths.
52+
let mut result = match components.next() {
53+
None => unreachable!("empty path"),
54+
55+
Some(component) => match component {
56+
std::path::Component::Prefix(prefix) => match prefix.kind() {
57+
std::path::Prefix::Disk(drive) => {
58+
let root = format!(r"\\?\{}:\", drive as char);
59+
PathBuf::from(root)
60+
}
61+
std::path::Prefix::UNC(server, share) => {
62+
let mut root = std::ffi::OsString::from(r"\\?\UNC\");
63+
root.push(server);
64+
root.push(r"\");
65+
root.push(share);
66+
PathBuf::from(root)
67+
}
68+
std::path::Prefix::Verbatim(_)
69+
| std::path::Prefix::VerbatimUNC(_, _)
70+
| std::path::Prefix::VerbatimDisk(_)
71+
| std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(),
72+
},
73+
_ => Path::new(&component).to_path_buf(),
74+
},
75+
};
76+
// remove `.` and `..` components
77+
for component in components {
78+
match component {
79+
std::path::Component::CurDir => continue,
80+
std::path::Component::ParentDir => {
81+
result.pop();
82+
}
83+
_ => result.push(component),
84+
}
85+
}
86+
result
87+
}
88+
89+
pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
90+
let mut result = PathBuf::from(dir);
91+
for component in path.components() {
92+
match component {
93+
std::path::Component::Prefix(prefix) => match prefix.kind() {
94+
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
95+
result.push(format!("{}_", letter as char))
96+
}
97+
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
98+
result.push(x);
99+
}
100+
std::path::Prefix::UNC(server, share)
101+
| std::path::Prefix::VerbatimUNC(server, share) => {
102+
result.push("unc");
103+
result.push(server);
104+
result.push(share);
105+
}
106+
},
107+
std::path::Component::RootDir => {
108+
// skip
109+
}
110+
std::path::Component::Normal(_) => {
111+
result.push(component);
112+
}
113+
std::path::Component::CurDir => {
114+
// skip
115+
}
116+
std::path::Component::ParentDir => {
117+
result.pop();
118+
}
119+
}
120+
}
121+
if !ext.is_empty() {
122+
match result.extension() {
123+
Some(x) => {
124+
let mut new_ext = x.to_os_string();
125+
new_ext.push(".");
126+
new_ext.push(ext);
127+
result.set_extension(new_ext);
128+
}
129+
None => {
130+
result.set_extension(ext);
131+
}
132+
}
133+
}
134+
result
135+
}

ruby/extractor/src/main.rs

Lines changed: 10 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
mod diagnostics;
22
mod extractor;
3+
mod file_paths;
34
mod trap;
45

56
#[macro_use]
@@ -122,15 +123,15 @@ fn main() -> std::io::Result<()> {
122123
let src_archive_dir = matches
123124
.value_of("source-archive-dir")
124125
.expect("missing --source-archive-dir");
125-
let src_archive_dir = PathBuf::from(src_archive_dir);
126+
let src_archive_dir = file_paths::path_from_string(src_archive_dir);
126127

127128
let trap_dir = matches
128129
.value_of("output-dir")
129130
.expect("missing --output-dir");
130-
let trap_dir = PathBuf::from(trap_dir);
131+
let trap_dir = file_paths::path_from_string(trap_dir);
131132

132133
let file_list = matches.value_of("file-list").expect("missing --file-list");
133-
let file_list = fs::File::open(file_list)?;
134+
let file_list = fs::File::open(file_paths::path_from_string(file_list))?;
134135

135136
let language = tree_sitter_ruby::language();
136137
let erb = tree_sitter_embedded_template::language();
@@ -148,7 +149,7 @@ fn main() -> std::io::Result<()> {
148149
.try_for_each(|line| {
149150
let mut diagnostics_writer = diagnostics.logger();
150151
let path = PathBuf::from(line).canonicalize()?;
151-
let src_archive_file = path_for(&src_archive_dir, &path, "");
152+
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
152153
let mut source = std::fs::read(&path)?;
153154
let mut needs_conversion = false;
154155
let code_ranges;
@@ -164,7 +165,7 @@ fn main() -> std::io::Result<()> {
164165
&path,
165166
&source,
166167
&[],
167-
)?;
168+
);
168169

169170
let (ranges, line_breaks) = scan_erb(
170171
erb,
@@ -205,7 +206,7 @@ fn main() -> std::io::Result<()> {
205206
"character-decoding-error",
206207
"Character decoding error",
207208
)
208-
.file(&path.to_string_lossy())
209+
.file(&file_paths::normalize_path(&path))
209210
.message(
210211
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
211212
&[
@@ -225,7 +226,7 @@ fn main() -> std::io::Result<()> {
225226
diagnostics_writer.write(
226227
diagnostics_writer
227228
.new_entry("unknown-character-encoding", "Unknown character encoding")
228-
.file(&path.to_string_lossy())
229+
.file(&file_paths::normalize_path(&path))
229230
.message(
230231
"Unknown character encoding {} in {} {}.",
231232
&[
@@ -251,7 +252,7 @@ fn main() -> std::io::Result<()> {
251252
&path,
252253
&source,
253254
&code_ranges,
254-
)?;
255+
);
255256
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
256257
if needs_conversion {
257258
std::fs::write(&src_archive_file, &source)?;
@@ -274,7 +275,7 @@ fn write_trap(
274275
trap_writer: &trap::Writer,
275276
trap_compression: trap::Compression,
276277
) -> std::io::Result<()> {
277-
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
278+
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
278279
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
279280
trap_writer.write_to_file(&trap_file, trap_compression)
280281
}
@@ -321,54 +322,6 @@ fn scan_erb(
321322
(result, line_breaks)
322323
}
323324

324-
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
325-
let mut result = PathBuf::from(dir);
326-
for component in path.components() {
327-
match component {
328-
std::path::Component::Prefix(prefix) => match prefix.kind() {
329-
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
330-
result.push(format!("{}_", letter as char))
331-
}
332-
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
333-
result.push(x);
334-
}
335-
std::path::Prefix::UNC(server, share)
336-
| std::path::Prefix::VerbatimUNC(server, share) => {
337-
result.push("unc");
338-
result.push(server);
339-
result.push(share);
340-
}
341-
},
342-
std::path::Component::RootDir => {
343-
// skip
344-
}
345-
std::path::Component::Normal(_) => {
346-
result.push(component);
347-
}
348-
std::path::Component::CurDir => {
349-
// skip
350-
}
351-
std::path::Component::ParentDir => {
352-
result.pop();
353-
}
354-
}
355-
}
356-
if !ext.is_empty() {
357-
match result.extension() {
358-
Some(x) => {
359-
let mut new_ext = x.to_os_string();
360-
new_ext.push(".");
361-
new_ext.push(ext);
362-
result.set_extension(new_ext);
363-
}
364-
None => {
365-
result.set_extension(ext);
366-
}
367-
}
368-
}
369-
result
370-
}
371-
372325
fn skip_space(content: &[u8], index: usize) -> usize {
373326
let mut index = index;
374327
while index < content.len() {
@@ -382,7 +335,6 @@ fn skip_space(content: &[u8], index: usize) -> usize {
382335
}
383336
index
384337
}
385-
386338
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
387339
let mut index = 0;
388340
// skip UTF-8 BOM marker if there is one

0 commit comments

Comments
 (0)