Skip to content

Commit ca27785

Browse files
committed
Rust: extract files on a per-project basis
This way we have only one "project" database in-memory at a time. This should avoid running out of memory when analyzing large mono-repos.
1 parent db28f1b commit ca27785

File tree

2 files changed

+96
-63
lines changed

2 files changed

+96
-63
lines changed

rust/extractor/src/main.rs

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
use std::{
2+
collections::HashMap,
3+
path::{Path, PathBuf},
4+
};
5+
16
use anyhow::Context;
7+
use archive::Archiver;
28
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
9+
use ra_ap_project_model::ProjectManifest;
10+
use rust_analyzer::RustAnalyzer;
311
mod archive;
412
mod config;
513
pub mod generated;
@@ -9,10 +17,13 @@ pub mod trap;
917

1018
fn extract(
1119
rust_analyzer: &mut rust_analyzer::RustAnalyzer,
20+
archiver: &Archiver,
1221
traps: &trap::TrapFileProvider,
13-
file: std::path::PathBuf,
14-
) -> anyhow::Result<()> {
15-
let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(&file);
22+
file: &std::path::Path,
23+
) -> () {
24+
archiver.archive(&file);
25+
26+
let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(file);
1627
let line_index = LineIndex::new(input.as_ref());
1728
let display_path = file.to_string_lossy();
1829
let mut trap = traps.create("source", &file);
@@ -40,26 +51,63 @@ fn extract(
4051
);
4152
}
4253
translator.emit_source_file(ast);
43-
translator.trap.commit()?;
44-
Ok(())
54+
translator.trap.commit().unwrap_or_else(|err| {
55+
log::error!(
56+
"Failed to write trap file for: {}: {}",
57+
display_path,
58+
err.to_string()
59+
)
60+
});
4561
}
4662
fn main() -> anyhow::Result<()> {
4763
let cfg = config::Config::extract().context("failed to load configuration")?;
4864
stderrlog::new()
4965
.module(module_path!())
5066
.verbosity(2 + cfg.verbose as usize)
5167
.init()?;
52-
let mut rust_analyzer = rust_analyzer::RustAnalyzer::new(&cfg)?;
5368

5469
let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
5570
let archiver = archive::Archiver {
5671
root: cfg.source_archive_dir,
5772
};
58-
for file in cfg.inputs {
59-
let file = std::path::absolute(&file).unwrap_or(file);
60-
let file = std::fs::canonicalize(&file).unwrap_or(file);
61-
archiver.archive(&file);
62-
extract(&mut rust_analyzer, &traps, file)?;
73+
let files: Vec<PathBuf> = cfg
74+
.inputs
75+
.iter()
76+
.map(|file| {
77+
let file = std::path::absolute(&file).unwrap_or(file.to_path_buf());
78+
std::fs::canonicalize(&file).unwrap_or(file)
79+
})
80+
.collect();
81+
let manifests = rust_analyzer::find_project_manifests(&files)?;
82+
let mut map: HashMap<&Path, (&ProjectManifest, Vec<&Path>)> = manifests
83+
.iter()
84+
.map(|x| (x.manifest_path().parent().as_ref(), (x, Vec::new())))
85+
.collect();
86+
let mut other_files = Vec::new();
87+
88+
'outer: for file in &files {
89+
let mut p = file.as_path();
90+
while let Some(parent) = p.parent() {
91+
p = parent;
92+
if let Some((_, files)) = map.get_mut(parent) {
93+
files.push(file);
94+
continue 'outer;
95+
}
96+
}
97+
other_files.push(file);
98+
}
99+
for (manifest, files) in map.values() {
100+
if files.is_empty() {
101+
break;
102+
}
103+
let mut rust_analyzer = RustAnalyzer::new(manifest, &cfg.scratch_dir);
104+
for file in files {
105+
extract(&mut rust_analyzer, &archiver, &traps, file);
106+
}
107+
}
108+
let mut rust_analyzer = RustAnalyzer::WithoutDatabase();
109+
for file in other_files {
110+
extract(&mut rust_analyzer, &archiver, &traps, file);
63111
}
64112

65113
Ok(())

rust/extractor/src/rust_analyzer.rs

Lines changed: 37 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use crate::config::Config;
2-
use anyhow::Context;
31
use itertools::Itertools;
42
use log::info;
53
use ra_ap_base_db::SourceDatabase;
@@ -9,6 +7,7 @@ use ra_ap_ide_db::RootDatabase;
97
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
108
use ra_ap_paths::Utf8PathBuf;
119
use ra_ap_project_model::CargoConfig;
10+
use ra_ap_project_model::ProjectManifest;
1211
use ra_ap_project_model::RustLibSource;
1312
use ra_ap_span::Edition;
1413
use ra_ap_span::EditionedFileId;
@@ -20,19 +19,18 @@ use ra_ap_vfs::AbsPathBuf;
2019
use ra_ap_vfs::Vfs;
2120
use ra_ap_vfs::VfsPath;
2221
use std::borrow::Cow;
23-
use std::collections::HashMap;
2422
use std::path::{Path, PathBuf};
2523
use triomphe::Arc;
26-
pub struct RustAnalyzer {
27-
workspace: HashMap<PathBuf, (Vfs, RootDatabase)>,
24+
pub enum RustAnalyzer {
25+
WithDatabase { db: RootDatabase, vfs: Vfs },
26+
WithoutDatabase(),
2827
}
2928

3029
impl RustAnalyzer {
31-
pub fn new(cfg: &Config) -> anyhow::Result<RustAnalyzer> {
32-
let mut workspace = HashMap::new();
30+
pub fn new(project: &ProjectManifest, scratch_dir: &Path) -> Self {
3331
let config = CargoConfig {
3432
sysroot: Some(RustLibSource::Discover),
35-
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir.to_path_buf())
33+
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(scratch_dir.to_path_buf())
3634
.map(|x| x.join("target"))
3735
.ok(),
3836
..Default::default()
@@ -43,25 +41,19 @@ impl RustAnalyzer {
4341
with_proc_macro_server: ProcMacroServerChoice::Sysroot,
4442
prefill_caches: false,
4543
};
46-
let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?;
47-
for project in projects {
48-
let manifest = project.manifest_path();
44+
let manifest = project.manifest_path();
4945

50-
match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
51-
Ok((db, vfs, _macro_server)) => {
52-
let path: &Path = manifest.parent().as_ref();
53-
workspace.insert(path.to_path_buf(), (vfs, db));
54-
}
55-
Err(err) => {
56-
log::error!("failed to load workspace for {}: {}", manifest, err);
57-
}
46+
match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
47+
Ok((db, vfs, _macro_server)) => RustAnalyzer::WithDatabase { db, vfs },
48+
Err(err) => {
49+
log::error!("failed to load workspace for {}: {}", manifest, err);
50+
RustAnalyzer::WithoutDatabase()
5851
}
5952
}
60-
Ok(RustAnalyzer { workspace })
6153
}
6254
pub fn parse(
6355
&mut self,
64-
path: &PathBuf,
56+
path: &Path,
6557
) -> (
6658
SourceFile,
6759
Arc<str>,
@@ -82,37 +74,30 @@ impl RustAnalyzer {
8274
};
8375
let (input, err) = from_utf8_lossy(&input);
8476

85-
let mut p = path.as_path();
86-
while let Some(parent) = p.parent() {
87-
p = parent;
88-
if self.workspace.contains_key(parent) {
89-
let (vfs, db) = self.workspace.get_mut(parent).unwrap();
90-
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
91-
.ok()
92-
.and_then(|x| AbsPathBuf::try_from(x).ok())
93-
.map(VfsPath::from)
94-
.and_then(|x| vfs.file_id(&x))
95-
{
96-
db.set_file_text(file_id, &input);
97-
let semi = Semantics::new(db);
77+
if let RustAnalyzer::WithDatabase { vfs, db } = self {
78+
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
79+
.ok()
80+
.and_then(|x| AbsPathBuf::try_from(x).ok())
81+
.map(VfsPath::from)
82+
.and_then(|x| vfs.file_id(&x))
83+
{
84+
db.set_file_text(file_id, &input);
85+
let semi = Semantics::new(db);
9886

99-
let file_id = EditionedFileId::current_edition(file_id);
100-
let source_file = semi.parse(file_id);
101-
errors.extend(
102-
db.parse_errors(file_id)
103-
.into_iter()
104-
.flat_map(|x| x.to_vec()),
105-
);
106-
return (
107-
source_file,
108-
input.as_ref().into(),
109-
errors,
110-
Some(file_id),
111-
Some(semi),
112-
);
113-
} else {
114-
break;
115-
}
87+
let file_id = EditionedFileId::current_edition(file_id);
88+
let source_file = semi.parse(file_id);
89+
errors.extend(
90+
db.parse_errors(file_id)
91+
.into_iter()
92+
.flat_map(|x| x.to_vec()),
93+
);
94+
return (
95+
source_file,
96+
input.as_ref().into(),
97+
errors,
98+
Some(file_id),
99+
Some(semi),
100+
);
116101
}
117102
}
118103
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
@@ -122,7 +107,7 @@ impl RustAnalyzer {
122107
}
123108
}
124109

125-
fn find_project_manifests(
110+
pub fn find_project_manifests(
126111
files: &[PathBuf],
127112
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
128113
let current = std::env::current_dir()?;

0 commit comments

Comments
 (0)