diff --git a/src/API.jl b/src/API.jl index 03eadf982e..99081fd8a0 100644 --- a/src/API.jl +++ b/src/API.jl @@ -579,21 +579,7 @@ end const UsageDict = Dict{String, DateTime} const UsageByDepotDict = Dict{String, UsageDict} -""" - gc(ctx::Context=Context(); verbose=false, force=false, kwargs...) - -Garbage-collect package and artifact installations by sweeping over all known -`Manifest.toml` and `Artifacts.toml` files, noting those that have been deleted, and then -finding artifacts and packages that are thereafter not used by any other projects. -Unused packages, artifacts, repos, and scratch spaces are immediately deleted. - -Garbage collection is only applied to the "user depot", e.g. the first entry in the -depot path. If you want to run `gc` on all depots set `force=true` (this might require -admin privileges depending on the setup). - -Use verbose mode (`verbose=true`) for detailed output. -""" -function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = nothing, verbose = false, force = false, kwargs...) +function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = nothing, verbose = false, force = false, collect_unused_for::Union{Period, Nothing} = nothing, kwargs...) Context!(ctx; kwargs...) if collect_delay !== nothing @warn "The `collect_delay` parameter is no longer used. Packages are now deleted immediately when they become unreachable." @@ -690,6 +676,33 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no all_scratch_dirs = Set(filter(Pkg.isdir_nothrow, all_scratch_dirs)) all_scratch_parents = Set(filter(Pkg.isfile_nothrow, all_scratch_parents)) + # Apply time-based filtering if collect_unused_for is specified + # This creates a separate filtered set for marking packages as active, + # but preserves the full manifest list for writing back to usage files + manifest_tomls_for_gc = all_manifest_tomls + if collect_unused_for !== nothing + # Create a unified usage dict to check timestamps across all depots + unified_manifest_usage = UsageDict() + for (depot, usage) in manifest_usage_by_depot + for (manifest, time) in usage + # Keep the most recent time if a manifest appears in multiple depots + unified_manifest_usage[manifest] = max(get(unified_manifest_usage, manifest, DateTime(0)), time) + end + end + + cutoff_time = now() - collect_unused_for + # Filter out manifests that haven't been used since the cutoff time + # This only affects which packages are marked as active for this GC run + manifest_tomls_for_gc = Set(f for f in all_manifest_tomls if get(unified_manifest_usage, f, DateTime(0)) >= cutoff_time) + + if verbose + n_filtered = length(all_manifest_tomls) - length(manifest_tomls_for_gc) + if n_filtered > 0 + printpkgstyle(ctx.io, :Filtered, "$(n_filtered) manifest(s) older than $(collect_unused_for)") + end + end + end + # Immediately write these back as condensed toml files function write_condensed_toml(f::Function, usage_by_depot, fname) for (depot, usage) in usage_by_depot @@ -868,9 +881,10 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no # Scan manifests, parse them, read in all UUIDs listed and mark those as active + # Use manifest_tomls_for_gc which excludes old manifests if collect_unused_for is set # printpkgstyle(ctx.io, :Active, "manifests:") packages_to_keep = mark( - process_manifest_pkgs, all_manifest_tomls, ctx, + process_manifest_pkgs, manifest_tomls_for_gc, ctx, verbose = verbose, file_str = "manifest files" ) @@ -881,7 +895,7 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no x -> process_artifacts_toml(x, String[]), all_artifact_tomls, ctx; verbose = verbose, file_str = "artifact files" ) - repos_to_keep = mark(process_manifest_repos, all_manifest_tomls, ctx; do_print = false) + repos_to_keep = mark(process_manifest_repos, manifest_tomls_for_gc, ctx; do_print = false) # printpkgstyle(ctx.io, :Active, "scratchspaces:") spaces_to_keep = mark( x -> process_scratchspace(x, String[]), diff --git a/src/Pkg.jl b/src/Pkg.jl index 1a992788c1..bfb02036ec 100644 --- a/src/Pkg.jl +++ b/src/Pkg.jl @@ -417,17 +417,21 @@ Pkg.test("foo"; test_args=["--extended"]) const test = API.test """ - Pkg.gc(; collect_delay::Period=Day(7), io::IO=stderr) + Pkg.gc(; verbose=false, force=false, collect_unused_for=nothing, kwargs...) Garbage-collect package and artifact installations by sweeping over all known `Manifest.toml` and `Artifacts.toml` files, noting those that have been deleted, and then -finding artifacts and packages that are thereafter not used by any other projects, -marking them as "orphaned". This method will only remove orphaned objects (package -versions, artifacts, and scratch spaces) that have been continually un-used for a period -of `collect_delay`; which defaults to seven days. +finding artifacts and packages that are thereafter not used by any other projects. +Unused packages, artifacts, repos, and scratch spaces are immediately deleted. -To disable automatic garbage collection, you can set the environment variable -`JULIA_PKG_GC_AUTO` to `"false"` before starting Julia or call `API.auto_gc(false)`. +Garbage collection is only applied to the "user depot", e.g. the first entry in the +depot path. If you want to run `gc` on all depots set `force=true` (this might require +admin privileges depending on the setup). + +Use verbose mode (`verbose=true`) for detailed output. + +The `collect_unused_for` parameter can be set to a `Period` (e.g., `Day(30)`, `Week(2)`) to treat +manifests that have not been used for longer than the specified time as obsolete. """ const gc = API.gc diff --git a/test/pkg.jl b/test/pkg.jl index 2953d19f00..612ea4d5bb 100644 --- a/test/pkg.jl +++ b/test/pkg.jl @@ -694,6 +694,91 @@ end end end +@testset "Pkg.gc with collect_unused_for" begin + temp_pkg_dir() do project_path + # Create a test environment with a package + mktempdir() do env_dir + cd(env_dir) do + # Create a simple Project.toml + write( + "Project.toml", """ + name = "TestEnv" + + [deps] + Example = "7876af07-990d-54b4-ab0e-23690620f79a" + """ + ) + + # Create a Manifest.toml + manifest_path = joinpath(env_dir, "Manifest.toml") + write( + manifest_path, """ + # This file is machine-generated - editing it directly is not advised + + julia_version = "$(VERSION.major).$(VERSION.minor).$(VERSION.patch)" + manifest_format = "2.0" + + [[deps.Example]] + git-tree-sha1 = "46e44e869b4d90b96bd8ed1fdcf32244fddfb6cc" + uuid = "7876af07-990d-54b4-ab0e-23690620f79a" + version = "0.5.3" + """ + ) + + # Manually create a manifest usage entry with an old timestamp + usage_file = joinpath(Pkg.logdir(), "manifest_usage.toml") + mkpath(dirname(usage_file)) + + # Create usage data with a timestamp from 60 days ago + old_time = Dates.now() - Dates.Day(60) + usage_dict = Dict( + manifest_path => [Dict("time" => old_time)] + ) + open(usage_file, "w") do io + TOML.print(io, usage_dict) + end + + # Run gc with collect_unused_for=Day(30) - should filter this manifest + # We capture IO to check for filtering message + io_buf = IOBuffer() + Pkg.gc(verbose = true, collect_unused_for = Dates.Day(30), io = io_buf) + output = String(take!(io_buf)) + + # The manifest should not be in the active set when collect_unused_for is used + @test !occursin(manifest_path, output) || occursin("Filtered", output) + + # Now update the timestamp to be recent + recent_time = Dates.now() + usage_dict = Dict( + manifest_path => [Dict("time" => recent_time)] + ) + open(usage_file, "w") do io + TOML.print(io, usage_dict) + end + + # Run gc with collect_unused_for=Day(30) - should NOT filter this manifest + io_buf = IOBuffer() + Pkg.gc(verbose = true, collect_unused_for = Dates.Day(30), io = io_buf) + output = String(take!(io_buf)) + + # The manifest should be in the active set + @test occursin(manifest_path, output) || !occursin("Filtered.*1.*manifest", output) + end + end + end + + # Test that gc accepts different Period types + temp_pkg_dir() do project_path + with_temp_env() do + # These should not error + Pkg.gc(collect_unused_for = Dates.Day(7)) + Pkg.gc(collect_unused_for = Dates.Week(2)) + Pkg.gc(collect_unused_for = Dates.Month(1)) + @test true + end + end +end + if isdefined(Base.Filesystem, :delayed_delete_ref) @testset "Pkg.gc for delayed deletes" begin mktempdir() do root