Skip to content

Commit 93fd467

Browse files
yangdanny97meta-codesync[bot]
authored andcommitted
defer suggested stdlib imports to diagnostic display & use global cache
Summary: strsim::levenshtein was using 12% of our CPU time from profiling initial indexing on langchain/dify when we fail to find a module, we check the distance between the module name & all the stdlib modules, and compute a list of suggestions (did you mean...?) for the error message. this computation is very expensive and uncached, and mostly unnecessary since we don't emit diagnostics for most files during indexing. this diff makes it so that we only compute the suggestions when we display an error, and use a global cache for the suggestions. Reviewed By: rchen152 Differential Revision: D91750918 fbshipit-source-id: 1e88754c540ad79fcfc4f8bf84f5c13c8078cd8f
1 parent d2e29e3 commit 93fd467

File tree

2 files changed

+27
-11
lines changed

2 files changed

+27
-11
lines changed

pyrefly/lib/module/finder.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,23 @@
88
use std::iter;
99
use std::path::Path;
1010
use std::path::PathBuf;
11+
use std::sync::LazyLock;
1112

1213
use pyrefly_python::COMPILED_FILE_SUFFIXES;
1314
use pyrefly_python::module_name::ModuleName;
1415
use pyrefly_python::module_path::ModulePath;
1516
use pyrefly_python::module_path::ModuleStyle;
17+
use pyrefly_util::locked_map::LockedMap;
1618
use pyrefly_util::suggest::best_suggestion;
1719
use ruff_python_ast::name::Name;
1820
use starlark_map::small_map::SmallMap;
1921
use vec1::Vec1;
2022

23+
/// Global cache for stdlib import suggestions.
24+
/// Keyed by the missing module name, returns the suggested module name (if any).
25+
static STDLIB_SUGGESTION_CACHE: LazyLock<LockedMap<ModuleName, Option<ModuleName>>> =
26+
LazyLock::new(LockedMap::new);
27+
2128
use crate::config::config::ConfigFile;
2229
use crate::module::bundled::BundledStub;
2330
use crate::module::third_party::get_bundled_third_party;
@@ -641,7 +648,6 @@ pub fn find_import_filtered(
641648
config.structured_import_lookup_path(origin),
642649
module,
643650
&config.source,
644-
suggest_stdlib_import(module),
645651
))
646652
}
647653
}
@@ -698,7 +704,14 @@ fn recommended_stubs_package(module: ModuleName) -> Option<ModuleName> {
698704

699705
/// Suggest a similar stdlib module name for a mistyped import.
700706
/// Uses Levenshtein distance to find the closest match from typeshed's stdlib modules.
701-
fn suggest_stdlib_import(missing: ModuleName) -> Option<ModuleName> {
707+
/// Results are cached globally since typeshed doesn't change during a session.
708+
pub fn suggest_stdlib_import(missing: ModuleName) -> Option<ModuleName> {
709+
*STDLIB_SUGGESTION_CACHE
710+
.ensure(&missing, || suggest_stdlib_import_uncached(missing))
711+
.0
712+
}
713+
714+
fn suggest_stdlib_import_uncached(missing: ModuleName) -> Option<ModuleName> {
702715
let ts = typeshed().ok()?;
703716
let missing_str = missing.as_str();
704717

@@ -1054,7 +1067,6 @@ mod tests {
10541067
config.structured_import_lookup_path(None),
10551068
ModuleName::from_str("spp_priority.d"),
10561069
&config.source,
1057-
None,
10581070
)),
10591071
);
10601072
}

pyrefly/lib/state/loader.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use crate::config::config::ImportLookupPathPart;
2424
use crate::error::context::ErrorContext;
2525
use crate::module::finder::find_import;
2626
use crate::module::finder::find_import_filtered;
27+
use crate::module::finder::suggest_stdlib_import;
2728

2829
#[derive(Debug, Clone, Dupe, PartialEq, Eq)]
2930
pub enum FindError {
@@ -51,7 +52,6 @@ impl FindError {
5152
path: Vec<ImportLookupPathPart>,
5253
module: ModuleName,
5354
config_source: &ConfigSource,
54-
suggestion: Option<ModuleName>,
5555
) -> FindError {
5656
let config_suffix = match config_source {
5757
ConfigSource::File(p) => format!(" (from config in `{}`)", p.display()),
@@ -79,18 +79,22 @@ impl FindError {
7979
format!("Looked in these locations{config_suffix}:")
8080
}];
8181
explanation.extend(nonempty_paths);
82-
if let Some(suggested) = suggestion {
83-
explanation.insert(0, format!("Did you mean `{suggested}`?"));
84-
}
8582
FindError::NotFound(module, Arc::new(explanation))
8683
}
8784

8885
pub fn display(&self) -> (Option<Box<dyn Fn() -> ErrorContext + '_>>, Vec1<String>) {
8986
match self {
90-
Self::NotFound(module, err) => (
91-
Some(Box::new(|| ErrorContext::ImportNotFound(*module))),
92-
(**err).clone(),
93-
),
87+
Self::NotFound(module, err) => {
88+
let mut lines = (**err).clone();
89+
// Compute suggestion lazily at display time, using global cache
90+
if let Some(suggested) = suggest_stdlib_import(*module) {
91+
lines.insert(0, format!("Did you mean `{suggested}`?"));
92+
}
93+
(
94+
Some(Box::new(|| ErrorContext::ImportNotFound(*module))),
95+
lines,
96+
)
97+
}
9498
Self::Ignored => (None, vec1!["Ignored import".to_owned()]),
9599
Self::NoSource(module) => (
96100
None,

0 commit comments

Comments
 (0)