Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions src/API.jl
Original file line number Diff line number Diff line change
Expand Up @@ -579,21 +579,7 @@ end
const UsageDict = Dict{String, DateTime}
const UsageByDepotDict = Dict{String, UsageDict}

"""
gc(ctx::Context=Context(); verbose=false, force=false, kwargs...)

Garbage-collect package and artifact installations by sweeping over all known
`Manifest.toml` and `Artifacts.toml` files, noting those that have been deleted, and then
finding artifacts and packages that are thereafter not used by any other projects.
Unused packages, artifacts, repos, and scratch spaces are immediately deleted.

Garbage collection is only applied to the "user depot", e.g. the first entry in the
depot path. If you want to run `gc` on all depots set `force=true` (this might require
admin privileges depending on the setup).

Use verbose mode (`verbose=true`) for detailed output.
"""
function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = nothing, verbose = false, force = false, kwargs...)
function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = nothing, verbose = false, force = false, collect_unused_for::Union{Period, Nothing} = nothing, kwargs...)
Context!(ctx; kwargs...)
if collect_delay !== nothing
@warn "The `collect_delay` parameter is no longer used. Packages are now deleted immediately when they become unreachable."
Expand Down Expand Up @@ -690,6 +676,33 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no
all_scratch_dirs = Set(filter(Pkg.isdir_nothrow, all_scratch_dirs))
all_scratch_parents = Set(filter(Pkg.isfile_nothrow, all_scratch_parents))

# Apply time-based filtering if collect_unused_for is specified
# This creates a separate filtered set for marking packages as active,
# but preserves the full manifest list for writing back to usage files
manifest_tomls_for_gc = all_manifest_tomls
if collect_unused_for !== nothing
# Create a unified usage dict to check timestamps across all depots
unified_manifest_usage = UsageDict()
for (depot, usage) in manifest_usage_by_depot
for (manifest, time) in usage
# Keep the most recent time if a manifest appears in multiple depots
unified_manifest_usage[manifest] = max(get(unified_manifest_usage, manifest, DateTime(0)), time)
end
end

cutoff_time = now() - collect_unused_for
# Filter out manifests that haven't been used since the cutoff time
# This only affects which packages are marked as active for this GC run
manifest_tomls_for_gc = Set(f for f in all_manifest_tomls if get(unified_manifest_usage, f, DateTime(0)) >= cutoff_time)

if verbose
n_filtered = length(all_manifest_tomls) - length(manifest_tomls_for_gc)
if n_filtered > 0
printpkgstyle(ctx.io, :Filtered, "$(n_filtered) manifest(s) older than $(collect_unused_for)")
end
end
end

# Immediately write these back as condensed toml files
function write_condensed_toml(f::Function, usage_by_depot, fname)
for (depot, usage) in usage_by_depot
Expand Down Expand Up @@ -868,9 +881,10 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no


# Scan manifests, parse them, read in all UUIDs listed and mark those as active
# Use manifest_tomls_for_gc which excludes old manifests if collect_unused_for is set
# printpkgstyle(ctx.io, :Active, "manifests:")
packages_to_keep = mark(
process_manifest_pkgs, all_manifest_tomls, ctx,
process_manifest_pkgs, manifest_tomls_for_gc, ctx,
verbose = verbose, file_str = "manifest files"
)

Expand All @@ -881,7 +895,7 @@ function gc(ctx::Context = Context(); collect_delay::Union{Period, Nothing} = no
x -> process_artifacts_toml(x, String[]),
all_artifact_tomls, ctx; verbose = verbose, file_str = "artifact files"
)
repos_to_keep = mark(process_manifest_repos, all_manifest_tomls, ctx; do_print = false)
repos_to_keep = mark(process_manifest_repos, manifest_tomls_for_gc, ctx; do_print = false)
# printpkgstyle(ctx.io, :Active, "scratchspaces:")
spaces_to_keep = mark(
x -> process_scratchspace(x, String[]),
Expand Down
18 changes: 11 additions & 7 deletions src/Pkg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -417,17 +417,21 @@ Pkg.test("foo"; test_args=["--extended"])
const test = API.test

"""
Pkg.gc(; collect_delay::Period=Day(7), io::IO=stderr)
Pkg.gc(; verbose=false, force=false, collect_unused_for=nothing, kwargs...)

Garbage-collect package and artifact installations by sweeping over all known
`Manifest.toml` and `Artifacts.toml` files, noting those that have been deleted, and then
finding artifacts and packages that are thereafter not used by any other projects,
marking them as "orphaned". This method will only remove orphaned objects (package
versions, artifacts, and scratch spaces) that have been continually un-used for a period
of `collect_delay`; which defaults to seven days.
finding artifacts and packages that are thereafter not used by any other projects.
Unused packages, artifacts, repos, and scratch spaces are immediately deleted.

To disable automatic garbage collection, you can set the environment variable
`JULIA_PKG_GC_AUTO` to `"false"` before starting Julia or call `API.auto_gc(false)`.
Garbage collection is only applied to the "user depot", e.g. the first entry in the
depot path. If you want to run `gc` on all depots set `force=true` (this might require
admin privileges depending on the setup).

Use verbose mode (`verbose=true`) for detailed output.

The `collect_unused_for` parameter can be set to a `Period` (e.g., `Day(30)`, `Week(2)`) to treat
manifests that have not been used for longer than the specified time as obsolete.
"""
const gc = API.gc

Expand Down
85 changes: 85 additions & 0 deletions test/pkg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,91 @@ end
end
end

@testset "Pkg.gc with collect_unused_for" begin
temp_pkg_dir() do project_path
# Create a test environment with a package
mktempdir() do env_dir
cd(env_dir) do
# Create a simple Project.toml
write(
"Project.toml", """
name = "TestEnv"

[deps]
Example = "7876af07-990d-54b4-ab0e-23690620f79a"
"""
)

# Create a Manifest.toml
manifest_path = joinpath(env_dir, "Manifest.toml")
write(
manifest_path, """
# This file is machine-generated - editing it directly is not advised

julia_version = "$(VERSION.major).$(VERSION.minor).$(VERSION.patch)"
manifest_format = "2.0"

[[deps.Example]]
git-tree-sha1 = "46e44e869b4d90b96bd8ed1fdcf32244fddfb6cc"
uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
version = "0.5.3"
"""
)

# Manually create a manifest usage entry with an old timestamp
usage_file = joinpath(Pkg.logdir(), "manifest_usage.toml")
mkpath(dirname(usage_file))

# Create usage data with a timestamp from 60 days ago
old_time = Dates.now() - Dates.Day(60)
usage_dict = Dict(
manifest_path => [Dict("time" => old_time)]
)
open(usage_file, "w") do io
TOML.print(io, usage_dict)
end

# Run gc with collect_unused_for=Day(30) - should filter this manifest
# We capture IO to check for filtering message
io_buf = IOBuffer()
Pkg.gc(verbose = true, collect_unused_for = Dates.Day(30), io = io_buf)
output = String(take!(io_buf))

# The manifest should not be in the active set when collect_unused_for is used
@test !occursin(manifest_path, output) || occursin("Filtered", output)

# Now update the timestamp to be recent
recent_time = Dates.now()
usage_dict = Dict(
manifest_path => [Dict("time" => recent_time)]
)
open(usage_file, "w") do io
TOML.print(io, usage_dict)
end

# Run gc with collect_unused_for=Day(30) - should NOT filter this manifest
io_buf = IOBuffer()
Pkg.gc(verbose = true, collect_unused_for = Dates.Day(30), io = io_buf)
output = String(take!(io_buf))

# The manifest should be in the active set
@test occursin(manifest_path, output) || !occursin("Filtered.*1.*manifest", output)
end
end
end

# Test that gc accepts different Period types
temp_pkg_dir() do project_path
with_temp_env() do
# These should not error
Pkg.gc(collect_unused_for = Dates.Day(7))
Pkg.gc(collect_unused_for = Dates.Week(2))
Pkg.gc(collect_unused_for = Dates.Month(1))
@test true
end
end
end

if isdefined(Base.Filesystem, :delayed_delete_ref)
@testset "Pkg.gc for delayed deletes" begin
mktempdir() do root
Expand Down
Loading