diff --git a/docs/src/reference/job-submission.md b/docs/src/reference/job-submission.md index a8c32fc62..40f3c2572 100644 --- a/docs/src/reference/job-submission.md +++ b/docs/src/reference/job-submission.md @@ -126,6 +126,30 @@ Batch jobs are Julia scripts with (optional) associated Julia package environmen See also: [`@script_str`](@ref), [`script`](@ref), [`appbundle`](@ref) for more details, and the [guide on submitting batch jobs](@ref jobs-guide-batch) for a tutorial. +### [`.juliabundleignore` file](@id jobs-batch-juliabundleignore) + +A `.juliabundleignore` file can be use to exclude certain files from the appbundle that gets submitted to JuliaHub. +This is useful if you have some temporary development or data files in your project directory, in particular if they are large. + +Generally, the file is similar to a `.gitignore` file (though not as feature rich), containing a list of [glob](https://en.wikipedia.org/wiki/Glob_(programming)) patterns. +More precisely, it uses [Glob.jl](https://github.com/vtjnash/Glob.jl) to match the patterns in the file against file system paths. + +For example, a valid `.juliabundleignore` might contain the following entries to exclude a particular directory and all CSV files: + +``` +output.log +*.csv +output-data/ +``` + +This `.juliabundleignore` will ignore: + +- The `output.log` file next to the `.juliabundleignore`. +- All `.csv` files next to the `.juliabundleignore` and in subdirectories. +- All the contents of the `output-data/` directory next to the `.juliabundleignore`. + +You can also have additional `.juliabundleignore` files in subdirectories and they will only apply to those directories and their subdirectories. + ### Specifying the job image JuliaHub batch jobs can run in various container images. diff --git a/src/jobsubmission.jl b/src/jobsubmission.jl index fa23a360e..9c95832d0 100644 --- a/src/jobsubmission.jl +++ b/src/jobsubmission.jl @@ -738,6 +738,9 @@ The following should be kept in mind about how appbundles are handled: Registered packages are installed via the package manager via the standard environment instantiation, and their source code is not included in the bundle directly. +* You can use `.juliabundleignore` files to omit some files from the appbundle (secrets, large data files etc). + See the [relevant section in the reference manual](@ref jobs-batch-juliabundleignore) for more details. + * When the JuliaHub job starts, the bundle is unpacked and the job's starting working directory is set to the root of the unpacked appbundle directory, and you can e.g. load the data from those files with just `read("my-data.txt", String)`. diff --git a/test/fixtures/bundle1/Manifest.toml b/test/fixtures/bundle1/Manifest.toml new file mode 100644 index 000000000..dfcf889d4 --- /dev/null +++ b/test/fixtures/bundle1/Manifest.toml @@ -0,0 +1,7 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.12.0-rc1" +manifest_format = "2.0" +project_hash = "71853c6197a6a7f222db0f1978c7cb232b87c5ee" + +[deps] diff --git a/test/fixtures/bundle1/Project.toml b/test/fixtures/bundle1/Project.toml new file mode 100644 index 000000000..81648c0b1 --- /dev/null +++ b/test/fixtures/bundle1/Project.toml @@ -0,0 +1 @@ +[deps] diff --git a/test/fixtures/bundle1/dir1/binary.exe b/test/fixtures/bundle1/dir1/binary.exe new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/dir1/foo.csv b/test/fixtures/bundle1/dir1/foo.csv new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/foo.csv b/test/fixtures/bundle1/foo.csv new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/keys.private b/test/fixtures/bundle1/keys.private new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/output-data/bar b/test/fixtures/bundle1/output-data/bar new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/output-data/foo b/test/fixtures/bundle1/output-data/foo new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/script.jl b/test/fixtures/bundle1/script.jl new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/test/fixtures/bundle1/script.jl @@ -0,0 +1 @@ + diff --git a/test/fixtures/bundle1/settings.json b/test/fixtures/bundle1/settings.json new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/subignore/keys.private b/test/fixtures/bundle1/subignore/keys.private new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/subignore/readme.txt b/test/fixtures/bundle1/subignore/readme.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/fixtures/bundle1/xyz.exe b/test/fixtures/bundle1/xyz.exe new file mode 100644 index 000000000..e69de29bb diff --git a/test/packagebundler.jl b/test/packagebundler.jl index 87a445fdc..d9cf7ab7b 100644 --- a/test/packagebundler.jl +++ b/test/packagebundler.jl @@ -266,3 +266,251 @@ end @test pred(joinpath(dir, "test", "foo", "test")) end end + +function bundle_and_file_listing(bundle_root_path::AbstractString) + out = tempname() + JuliaHub._PackageBundler.bundle( + bundle_root_path; + output=out, + verbose=false, + ) + tar_headers = Tar.list(out) + return sort([h.path for h in tar_headers if h.type == :file]) +end + +# In this test set, we bundle up the same directory multiple times. +# First, without any `.juliabundleignore`, and then we add different +# version of it, and see the effect on the bundle. +@testset "fixtures/bundle1" begin + # Ensure we have a clean copy of the files always + tmp = mktempdir() + cp( + joinpath(@__DIR__, "fixtures", "bundle1"), + joinpath(tmp, "bundle1"), + ) + chmod(tmp, 0o777; recursive=true) + bundle_root = joinpath(tmp, "bundle1") + @test isdir(bundle_root) + + @testset "no ignore file" begin + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/dir1/foo.csv", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/output-data/bar", + "bundle1/output-data/foo", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + @testset "empty ignore file" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/dir1/foo.csv", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/output-data/bar", + "bundle1/output-data/foo", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + # Note: the file will not be ignored in subdirectories + @testset "foo.csv" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + foo.csv + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/dir1/foo.csv", + "bundle1/keys.private", + "bundle1/output-data/bar", + "bundle1/output-data/foo", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + @testset "*/foo.csv" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + */foo.csv + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/output-data/bar", + "bundle1/output-data/foo", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + @testset "*.exe" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + *.exe + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/foo.csv", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/output-data/bar", + "bundle1/output-data/foo", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + ] + end + + @testset "directories" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + output-data/ + foo.csv/ + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/dir1/foo.csv", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + @testset "directories (invert)" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + foo.csv/ + output-data/ + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/binary.exe", + "bundle1/dir1/foo.csv", + "bundle1/foo.csv", + "bundle1/keys.private", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + "bundle1/xyz.exe", + ] + end + + @testset "realistic" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + *.exe + foo.csv + output-data/ + """, + ) + files = bundle_and_file_listing(bundle_root) + @test files == [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/foo.csv", + "bundle1/keys.private", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/keys.private", + "bundle1/subignore/readme.txt", + ] + end + + @testset "subignore" begin + write( + joinpath(bundle_root, ".juliabundleignore"), + """ + *.exe + foo.csv + output-data/ + """, + ) + write( + joinpath(bundle_root, "subignore", ".juliabundleignore"), + """ + keys.private + """, + ) + files = bundle_and_file_listing(bundle_root) + [ + "bundle1/.juliabundleignore", + "bundle1/Manifest.toml", + "bundle1/Project.toml", + "bundle1/dir1/foo.csv", + "bundle1/keys.private", + "bundle1/script.jl", + "bundle1/settings.json", + "bundle1/subignore/.juliabundleignore", + "bundle1/subignore/readme.txt", + ] + end +end +|