Skip to content

Commit 893e720

Browse files
REPLCompletions: async cache PATH scan (#52833)
1 parent f14bf29 commit 893e720

File tree

2 files changed

+87
-47
lines changed

2 files changed

+87
-47
lines changed

stdlib/REPL/src/REPLCompletions.jl

Lines changed: 80 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,78 @@ function do_string_escape(s)
269269
return escape_string(s, ('\"','$'))
270270
end
271271

272+
const PATH_cache_lock = Base.ReentrantLock()
273+
const PATH_cache = Set{String}()
274+
cached_PATH_string::Union{String,Nothing} = nothing
275+
function cached_PATH_changed()
276+
global cached_PATH_string
277+
@lock(PATH_cache_lock, cached_PATH_string) !== get(ENV, "PATH", nothing)
278+
end
279+
const PATH_cache_finished = Base.Condition() # used for sync in tests
280+
281+
# caches all reachable files in PATH dirs
282+
function cache_PATH()
283+
global cached_PATH_string
284+
path = @lock PATH_cache_lock begin
285+
empty!(PATH_cache)
286+
cached_PATH_string = get(ENV, "PATH", nothing)
287+
end
288+
path isa String || return
289+
290+
@debug "caching PATH files" PATH=path
291+
pathdirs = split(path, @static Sys.iswindows() ? ";" : ":")
292+
293+
t = @elapsed for pathdir in pathdirs
294+
actualpath = try
295+
realpath(pathdir)
296+
catch ex
297+
ex isa Base.IOError || rethrow()
298+
# Bash doesn't expect every folder in PATH to exist, so neither shall we
299+
continue
300+
end
301+
302+
if actualpath != pathdir && in(actualpath, pathdirs)
303+
# Remove paths which (after resolving links) are in the env path twice.
304+
# Many distros eg. point /bin to /usr/bin but have both in the env path.
305+
continue
306+
end
307+
308+
filesinpath = try
309+
readdir(pathdir)
310+
catch e
311+
# Bash allows dirs in PATH that can't be read, so we should as well.
312+
if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
313+
continue
314+
else
315+
# We only handle IOError and ArgumentError here
316+
rethrow()
317+
end
318+
end
319+
for file in filesinpath
320+
# In a perfect world, we would filter on whether the file is executable
321+
# here, or even on whether the current user can execute the file in question.
322+
try
323+
if isfile(joinpath(pathdir, file))
324+
@lock PATH_cache_lock push!(PATH_cache, file)
325+
end
326+
catch e
327+
# `isfile()` can throw in rare cases such as when probing a
328+
# symlink that points to a file within a directory we do not
329+
# have read access to.
330+
if isa(e, Base.IOError)
331+
continue
332+
else
333+
rethrow()
334+
end
335+
end
336+
yield() # so startup doesn't block when -t1
337+
end
338+
end
339+
notify(PATH_cache_finished)
340+
@debug "caching PATH files took $t seconds" length(pathdirs) length(PATH_cache)
341+
return PATH_cache
342+
end
343+
272344
function complete_path(path::AbstractString;
273345
use_envpath=false,
274346
shell_escape=false,
@@ -308,54 +380,15 @@ function complete_path(path::AbstractString;
308380
end
309381

310382
if use_envpath && isempty(dir)
311-
# Look for files in PATH as well
312-
pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":")
313-
314-
for pathdir in pathdirs
315-
actualpath = try
316-
realpath(pathdir)
317-
catch ex
318-
ex isa Base.IOError || rethrow()
319-
# Bash doesn't expect every folder in PATH to exist, so neither shall we
320-
continue
321-
end
322-
323-
if actualpath != pathdir && in(actualpath, pathdirs)
324-
# Remove paths which (after resolving links) are in the env path twice.
325-
# Many distros eg. point /bin to /usr/bin but have both in the env path.
326-
continue
327-
end
328-
329-
filesinpath = try
330-
readdir(pathdir)
331-
catch e
332-
# Bash allows dirs in PATH that can't be read, so we should as well.
333-
if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
334-
continue
335-
else
336-
# We only handle IOError and ArgumentError here
337-
rethrow()
338-
end
339-
end
340-
341-
for file in filesinpath
342-
# In a perfect world, we would filter on whether the file is executable
343-
# here, or even on whether the current user can execute the file in question.
344-
try
345-
if startswith(file, prefix) && isfile(joinpath(pathdir, file))
346-
push!(matches, file)
347-
end
348-
catch e
349-
# `isfile()` can throw in rare cases such as when probing a
350-
# symlink that points to a file within a directory we do not
351-
# have read access to.
352-
if isa(e, Base.IOError)
353-
continue
354-
else
355-
rethrow()
356-
end
357-
end
383+
# Look for files in PATH as well. These are cached in `cache_PATH` in a separate task in REPL init.
384+
# If we cannot get lock because its still caching just pass over this so that initial
385+
# typing isn't laggy. If the PATH string has changed since last cache re-cache it
386+
cached_PATH_changed() && Base.errormonitor(Threads.@spawn REPLCompletions.cache_PATH())
387+
if trylock(PATH_cache_lock)
388+
for file in PATH_cache
389+
startswith(file, prefix) && push!(matches, file)
358390
end
391+
unlock(PATH_cache_lock)
359392
end
360393
end
361394

stdlib/REPL/test/replcompletions.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,11 @@ let s, c, r
11471147
withenv("PATH" => string(path, ":", unreadable)) do
11481148
s = "tmp-execu"
11491149
c,r = test_scomplete(s)
1150+
# Files reachable by PATH are cached async when PATH is seen to have been changed by `complete_path`
1151+
# so changes are unlikely to appear in the first complete. For testing purposes we can wait for
1152+
# caching to finish
1153+
wait(REPL.REPLCompletions.PATH_cache_finished)
1154+
c,r = test_scomplete(s)
11501155
@test "tmp-executable" in c
11511156
@test r == 1:9
11521157
@test s[r] == "tmp-execu"
@@ -1175,6 +1180,8 @@ let s, c, r
11751180
withenv("PATH" => string(tempdir(), ":", dir)) do
11761181
s = string("repl-completio")
11771182
c,r = test_scomplete(s)
1183+
wait(REPL.REPLCompletions.PATH_cache_finished) # wait for caching to complete
1184+
c,r = test_scomplete(s)
11781185
@test ["repl-completion"] == c
11791186
@test s[r] == "repl-completio"
11801187
end

0 commit comments

Comments
 (0)