diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 03bb729b..37910135 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -13,13 +13,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] - java-version: [8, 17] - include: - - os: macos-latest - java-version: 17 - - os: macos-latest - java-version: 11 + os: [ubuntu-latest, windows-latest, macos-latest] + java-version: [11, 17] runs-on: ${{ matrix.os }} diff --git a/Readme.adoc b/Readme.adoc index 103afbea..f39e6f6d 100644 --- a/Readme.adoc +++ b/Readme.adoc @@ -985,9 +985,9 @@ os.remove(target: Path, checkExists: Boolean = false): Boolean ---- Remove the target file or folder. Folders need to be empty to be removed; if you -want to remove a folder tree recursively, use <>. +want to remove a folder tree recursively, use <>. Returns `true` if the file was present before. -It will fail with an exception when the file is missing but `checkExists` is `true`, +It will fail with an exception when the file is missing but `checkExists` is `true`, or when the directory to remove is not empty. [source,scala] @@ -1215,6 +1215,249 @@ os.write(tempDir / "file", "Hello") os.list(tempDir) ==> Seq(tempDir / "file") ---- +=== Zip & Unzip Files + +==== `os.zip` + +[source,scala] +---- +def apply(dest: os.Path, + sources: Seq[ZipSource] = List(), + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List(), + preserveMtimes: Boolean = false, + deletePatterns: Seq[Regex] = List(), + compressionLevel: Int = -1 /* 0-9 */): os.Path +---- + +The zip object provides functionality to create or modify zip archives. It supports: + +- Zipping Files and Directories: You can zip both individual files and entire directories. +- Appending to Existing Archives: Files can be appended to an existing zip archive. +- Exclude Patterns (-x): You can specify files or patterns to exclude while zipping. +- Include Patterns (-i): You can include specific files or patterns while zipping. +- Delete Patterns (-d): You can delete specific files from an existing zip archive. +- Configuring whether or not to preserve filesyste mtimes and permissions + +This will create a new zip archive at `dest` containing `file1.txt` and everything +inside `sources`. If `dest` already exists as a zip, the files will be appended to the +existing zip, and any existing zip entries matching `deletePatterns` will be removed. + +Note that `os.zip` doesn't support creating/unpacking symlinks or filesystem permissions +in Zip files, because the underlying `java.util.zip.Zip*Stream` doesn't support them. + +===== Zipping Files and Folders + +The example below demonstrates the core workflows: creating a zip, appending to it, and +unzipping it: + +[source,scala] +---- +// Zipping files and folders in a new zip file +val zipFileName = "zip-file-test.zip" +val zipFile1: os.Path = os.zip( + destination = wd / zipFileName, + sourcePaths = Seq( + wd / "File.txt", + wd / "folder1" + ) +) + +// Adding files and folders to an existing zip file +os.zip( + destination = zipFile1, + sourcePaths = Seq( + wd / "folder2", + wd / "Multi Line.txt" + ) +) + +// Unzip file to a destination folder +val unzippedFolder = os.unzip( + source = wd / zipFileName, + destination = wd / "unzipped folder" +) + +val paths = os.walk(unzippedFolder) +val expected = Seq( + // Files get included in the zip root using their name + wd / "unzipped folder/File.txt", + wd / "unzipped folder/Multi Line.txt", + // Folder contents get included relative to the source root + wd / "unzipped folder/nestedA", + wd / "unzipped folder/nestedB", + wd / "unzipped folder/one.txt", + wd / "unzipped folder/nestedA/a.txt", + wd / "unzipped folder/nestedB/b.txt", +) +assert(paths.sorted == expected) +---- + +===== Renaming files in the zip + +You can also pass in a mapping to `os.zip` to specify exactly where in the zip each +input source file or folder should go: + +```scala +val zipFileName = "zip-file-test.zip" +val zipFile1: os.Path = os.zip( + destination = wd / zipFileName, + sourcePaths = List( + // renaming files and folders + wd / "File.txt" -> os.sub / "renamed-file.txt", + wd / "folder1" -> os.sub / "renamed-folder" + ) +) + +val unzippedFolder = os.unzip( + source = zipFile1, + destination = wd / "unzipped folder" +) + +val paths = os.walk(unzippedFolder) +val expected = Seq( + wd / "unzipped folder/renamed-file.txt", + wd / "unzipped folder/renamed-folder", + wd / "unzipped folder/renamed-folder/one.txt", +) +assert(paths.sorted == expected) +``` + +===== Excluding/Including Files in Zip + +You can specify files or folders to be excluded or included when creating the zip: + +[source,scala] +---- +os.zip( + os.Path("/path/to/destination.zip"), + List(os.Path("/path/to/folder")), + excludePatterns = List(".*\\.log".r, "temp/.*".r), // Exclude log files and "temp" folder + includePatterns = List(".*\\.txt".r) // Include only .txt files +) + +---- + +This will include only `.txt` files, excluding any `.log` files and anything inside +the `temp` folder. + +==== `oz.zip.stream` + +You can use `os.zip.stream` to write the final zip to an `OutputStream` rather than a +concrete `os.Path`. `os.zip.stream` returns a `geny.Writable`, which has a `writeBytesToStream` +method: + +```scala +val zipFileName = "zipStreamFunction.zip" + +val stream = os.write.outputStream(wd / "zipStreamFunction.zip") + +val writable = zip.stream(sources = Seq(wd / "File.txt")) + +writable.writeBytesTo(stream) +stream.close() + +val unzippedFolder = os.unzip( + source = wd / zipFileName, + dest = wd / "zipStreamFunction" +) + +val paths = os.walk(unzippedFolder) +assert(paths == Seq(unzippedFolder / "File.txt")) +``` + +This can be useful for streaming the zipped data to places which are not files: +over the network, over a pipe, etc. + +==== `os.unzip` + +===== Unzipping Files +[source,scala] + +---- +os.unzip(os.Path("/path/to/archive.zip"), Some(os.Path("/path/to/destination"))) +---- + +This extracts the contents of `archive.zip` to the specified destination. + + +===== Excluding Files While Unzipping +You can exclude certain files from being extracted using patterns: + +[source,scala] +---- +os.unzip( + os.Path("/path/to/archive.zip"), + Some(os.Path("/path/to/destination")), + excludePatterns = List(".*\\.log".r, "temp/.*".r) // Exclude log files and the "temp" folder +) +---- + +===== `oz.unzip.list` +You can list the contents of the zip file without extracting them: + +[source,scala] +---- +os.unzip.list(os.Path("/path/to/archive.zip")) +---- + +This will print all the file paths contained in the zip archive. + +==== `oz.unzip.stream` + +You can unzip a zip file from any arbitrary `java.io.InputStream` containing its binary data +using the `os.unzip.stream` method: + +```scala +val readableZipStream: java.io.InputStream = ??? + +// Unzipping the stream to the destination folder +os.unzip.stream( + source = readableZipStream, + dest = unzippedFolder +) +``` + +This can be useful if the zip file does not exist on disk, e.g. if it is received over the network +or produced in-memory by application logic. + +OS-Lib also provides the `os.unzip.streamRaw` API, which is a lower level API used internally +within `os.unzip.stream` but can also be used directly if lower-level control is necessary. + +==== `os.zip.open` + +```scala +os.zip.open(path: Path): ZipRoot +``` + +`os.zip.open` allows you to treat zip files as filesystems, using normal `os.*` operations +on them. This provides a move flexible way to manipulate the contents of the zip in a fine-grained +manner when the normal `os.zip` or `os.unzip` operations do not suffice. + +```scala +val zipFile = os.zip.open(wd / "zip-test.zip") +try { + os.copy(wd / "File.txt", zipFile / "File.txt") + os.copy(wd / "folder1", zipFile / "folder1") + os.copy(wd / "folder2", zipFile / "folder2") +}finally zipFile.close() + +val zipFile2 = os.zip.open(wd / "zip-test.zip") +try{ + os.list(zipFile2) ==> Vector(zipFile2 / "File.txt", zipFile2 / "folder1", zipFile2 / "folder2") + os.remove.all(zipFile2 / "folder2") + os.remove(zipFile2 / "File.txt") +}finally zipFile2.close() + +val zipFile3 = os.zip.open(wd / "zip-test.zip") +try os.list(zipFile3) ==> Vector(zipFile3 / "folder1") +finally zipFile3.close() +``` + +`os.zip.open` returns a `ZipRoot`, which is identical to `os.Path` except it references the root +of the zip file rather than a bare path on the filesystem. Note that you need to call `ZipRoot#close()` +when you are done with it to avoid leaking filesystem resources. + === Filesystem Metadata ==== `os.stat` @@ -1708,13 +1951,13 @@ val yes10 = os.proc("yes") ---- This feature is implemented inside the library and will terminate any process reading the -stdin of other process in pipeline on every IO error. This behavior can be disabled via the -`handleBrokenPipe` flag on `call` and `spawn` methods. Note that Windows does not support -broken pipe behaviour, so a command like`yes` would run forever. `handleBrokenPipe` is set +stdin of other process in pipeline on every IO error. This behavior can be disabled via the +`handleBrokenPipe` flag on `call` and `spawn` methods. Note that Windows does not support +broken pipe behaviour, so a command like`yes` would run forever. `handleBrokenPipe` is set to false by default on Windows. Both `call` and `spawn` correspond in their behavior to their counterparts in the `os.proc`, -but `spawn` returns the `os.ProcessPipeline` instance instead. It offers the same +but `spawn` returns the `os.ProcessPipeline` instance instead. It offers the same `API` as `SubProcess`, but will operate on the set of processes instead of a single one. `Pipefail` is enabled by default, so if any of the processes in the pipeline fails, the whole @@ -2105,14 +2348,14 @@ explicitly choose to convert relative paths to absolute using some base. ==== Roots and filesystems -If you are using a system that supports different roots of paths, e.g. Windows, -you can use the argument of `os.root` to specify which root you want to use. +If you are using a system that supports different roots of paths, e.g. Windows, +you can use the argument of `os.root` to specify which root you want to use. If not specified, the default root will be used (usually, C on Windows, / on Unix). [source,scala] ---- -val root = os.root('C:\') / "Users/me" -assert(root == os.Path("C:\Users\me")) +val root = os.root("C:\\") / "Users/me" +assert(root == os.Path("C:\\Users\\me")) ---- Additionally, custom filesystems can be specified by passing a `FileSystem` to @@ -2128,11 +2371,11 @@ val fs = FileSystems.newFileSystem(uri, env); val path = os.root("/", fs) / "dir" ---- -Note that the jar file system operations suchs as writing to a file are supported -only on JVM 11+. Depending on the filesystem, some operations may not be supported - -for example, running an `os.proc` with pwd in a jar file won't work. You may also -meet limitations imposed by the implementations - in jar file system, the files are -created only after the file system is closed. Until that, the ones created in your +Note that the jar file system operations suchs as writing to a file are supported +only on JVM 11+. Depending on the filesystem, some operations may not be supported - +for example, running an `os.proc` with pwd in a jar file won't work. You may also +meet limitations imposed by the implementations - in jar file system, the files are +created only after the file system is closed. Until that, the ones created in your program are kept in memory. ==== `os.ResourcePath` @@ -2199,9 +2442,9 @@ By default, the following types of values can be used where-ever ``os.Source``s are required: * Any `geny.Writable` data type: - ** `Array[Byte]` - ** `java.lang.String` (these are treated as UTF-8) - ** `java.io.InputStream` +** `Array[Byte]` +** `java.lang.String` (these are treated as UTF-8) +** `java.io.InputStream` * `java.nio.channels.SeekableByteChannel` * Any `TraversableOnce[T]` of the above: e.g. `Seq[String]`, `List[Array[Byte]]`, etc. @@ -2266,9 +2509,9 @@ string, int or set representations of the `os.PermSet` via: === 0.10.7 * Allow multi-segment paths segments for literals https://github.com/com-lihaoyi/os-lib/pull/297: You - can now write `os.pwd / "foo/bar/qux"` rather than `os.pwd / "foo" / "bar" / "qux"`. Note that this - is only allowed for string literals, and non-literal path segments still need to be wrapped e.g. - `def myString = "foo/bar/qux"; os.pwd / os.SubPath(myString)` for security and safety purposes +can now write `os.pwd / "foo/bar/qux"` rather than `os.pwd / "foo" / "bar" / "qux"`. Note that this +is only allowed for string literals, and non-literal path segments still need to be wrapped e.g. +`def myString = "foo/bar/qux"; os.pwd / os.SubPath(myString)` for security and safety purposes [#0-10-6] === 0.10.6 @@ -2279,23 +2522,23 @@ string, int or set representations of the `os.PermSet` via: === 0.10.5 * Introduce `os.SubProcess.env` `DynamicVariable` to override default `env` - (https://github.com/com-lihaoyi/os-lib/pull/295) +(https://github.com/com-lihaoyi/os-lib/pull/295) [#0-10-4] === 0.10.4 * Add a lightweight syntax for `os.call()` and `os.spawn` APIs - (https://github.com/com-lihaoyi/os-lib/pull/292) +(https://github.com/com-lihaoyi/os-lib/pull/292) * Add a configurable grace period when subprocesses timeout and have to - be terminated to give a chance for shutdown logic to run - (https://github.com/com-lihaoyi/os-lib/pull/286) +be terminated to give a chance for shutdown logic to run +(https://github.com/com-lihaoyi/os-lib/pull/286) [#0-10-3] === 0.10.3 * `os.Inherit` now can be redirected on a threadlocal basis via `os.Inherit.in`, `.out`, or `.err`. - `os.InheritRaw` is available if you do not want the redirects to take effect +`os.InheritRaw` is available if you do not want the redirects to take effect [#0-10-2] diff --git a/os/src/Internals.scala b/os/src/Internals.scala index 3e8564bd..a0cebf6e 100644 --- a/os/src/Internals.scala +++ b/os/src/Internals.scala @@ -7,18 +7,23 @@ object Internals { val emptyStringArray = Array.empty[String] - def transfer0(src: InputStream, sink: (Array[Byte], Int) => Unit) = { + def transfer0(src: InputStream, sink: (Array[Byte], Int) => Unit): Unit = { + transfer0(src, sink, true) + } + def transfer0(src: InputStream, sink: (Array[Byte], Int) => Unit, close: Boolean = true): Unit = { val buffer = new Array[Byte](8192) var r = 0 while (r != -1) { r = src.read(buffer) if (r != -1) sink(buffer, r) } - src.close() + if (close) src.close() } - def transfer(src: InputStream, dest: OutputStream) = transfer0( + def transfer(src: InputStream, dest: OutputStream): Unit = transfer(src, dest, true) + def transfer(src: InputStream, dest: OutputStream, close: Boolean = true): Unit = transfer0( src, - dest.write(_, 0, _) + dest.write(_, 0, _), + close ) } diff --git a/os/src/ReadWriteOps.scala b/os/src/ReadWriteOps.scala index f1f999fb..ee70a157 100644 --- a/os/src/ReadWriteOps.scala +++ b/os/src/ReadWriteOps.scala @@ -34,6 +34,7 @@ object write { else Array(PosixFilePermissions.asFileAttribute(perms.toSet())) java.nio.file.Files.createFile(target.toNIO, permArray: _*) } + java.nio.file.Files.newOutputStream( target.toNIO, openOptions.toArray: _* diff --git a/os/src/ZipOps.scala b/os/src/ZipOps.scala new file mode 100644 index 00000000..ebe564b2 --- /dev/null +++ b/os/src/ZipOps.scala @@ -0,0 +1,316 @@ +package os + +import java.net.URI +import java.nio.file.{FileSystem, FileSystems, Files} +import java.nio.file.attribute.{BasicFileAttributeView, FileTime, PosixFilePermissions} +import java.util.zip.{ZipEntry, ZipFile, ZipInputStream, ZipOutputStream} +import scala.collection.JavaConverters._ +import scala.util.matching.Regex + +object zip { + + /** + * Opens a zip file as a filesystem root that you can operate on using `os.*` APIs. Note + * that you need to call `close()` on the returned `ZipRoot` when you are done with it, to + * avoid leaking filesystem resources + */ + def open(path: Path): ZipRoot = { + new ZipRoot(FileSystems.newFileSystem( + new URI("jar", path.wrapped.toUri.toString, null), + Map("create" -> "true").asJava + )) + } + + /** + * Zips the provided list of files and directories into a single ZIP archive. + * + * If `dest` already exists and is a zip, performs modifications to `dest` in place + * rather than creating a new zip. + * + * @param dest The path to the destination ZIP file. + * @param sources A list of paths to files and directories to be zipped. Defaults to an empty list. + * @param excludePatterns A list of regular expression patterns to exclude files from the ZIP archive. Defaults to an empty list. + * @param includePatterns A list of regular expression patterns to include files in the ZIP archive. Defaults to an empty list (includes all files). + * @param preserveMtimes Whether to preserve modification times (mtimes) of the files. + * @param deletePatterns A list of regular expression patterns to delete files from an existing ZIP archive before appending new ones. + * @param compressionLevel number from 0-9, where 0 is no compression and 9 is best compression. Defaults to -1 (default compression) + * @return The path to the created ZIP archive. + */ + def apply( + dest: os.Path, + sources: Seq[ZipSource] = List(), + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List(), + preserveMtimes: Boolean = false, + deletePatterns: Seq[Regex] = List(), + compressionLevel: Int = java.util.zip.Deflater.DEFAULT_COMPRESSION + ): os.Path = { + + if (os.exists(dest)) { + val opened = open(dest) + try { + for { + openedPath <- os.walk(opened) + if anyPatternsMatch(openedPath.relativeTo(opened).toString, deletePatterns) + } os.remove.all(openedPath) + + createNewZip0( + sources, + excludePatterns, + includePatterns, + (path, sub) => { + os.copy(path, opened / sub, createFolders = true) + if (!preserveMtimes) { + os.mtime.set(opened / sub, 0) + // This is the only way we can properly zero out filesystem metadata within the + // Zip file filesystem; `os.mtime.set` is not enough + val view = + Files.getFileAttributeView((opened / sub).toNIO, classOf[BasicFileAttributeView]) + view.setTimes(FileTime.fromMillis(0), FileTime.fromMillis(0), FileTime.fromMillis(0)) + } + } + ) + } finally opened.close() + } else { + val f = new java.io.FileOutputStream(dest.toIO) + try createNewZip( + sources, + excludePatterns, + includePatterns, + preserveMtimes, + compressionLevel, + f + ) + finally f.close() + } + dest + } + + private def createNewZip0( + sources: Seq[ZipSource], + excludePatterns: Seq[Regex], + includePatterns: Seq[Regex], + makeZipEntry0: (os.Path, os.SubPath) => Unit + ): Unit = { + sources.foreach { source => + if (os.isDir(source.src)) { + for (path <- os.walk(source.src)) { + if (os.isFile(path) && shouldInclude(path.toString, excludePatterns, includePatterns)) { + makeZipEntry0(path, source.dest.getOrElse(os.sub) / path.subRelativeTo(source.src)) + } + } + } else if (shouldInclude(source.src.last, excludePatterns, includePatterns)) { + makeZipEntry0(source.src, source.dest.getOrElse(os.sub / source.src.last)) + } + } + } + private def createNewZip( + sources: Seq[ZipSource], + excludePatterns: Seq[Regex], + includePatterns: Seq[Regex], + preserveMtimes: Boolean, + compressionLevel: Int, + out: java.io.OutputStream + ): Unit = { + val zipOut = new ZipOutputStream(out) + zipOut.setLevel(compressionLevel) + + try { + createNewZip0( + sources, + excludePatterns, + includePatterns, + (path, sub) => makeZipEntry(path, sub, preserveMtimes, zipOut) + ) + } finally { + zipOut.close() + } + } + + private[os] def anyPatternsMatch(fileName: String, patterns: Seq[Regex]) = { + patterns.exists(_.findFirstIn(fileName).isDefined) + } + private[os] def shouldInclude( + fileName: String, + excludePatterns: Seq[Regex], + includePatterns: Seq[Regex] + ): Boolean = { + val isExcluded = anyPatternsMatch(fileName, excludePatterns) + val isIncluded = includePatterns.isEmpty || anyPatternsMatch(fileName, includePatterns) + !isExcluded && isIncluded + } + + private def makeZipEntry( + file: os.Path, + sub: os.SubPath, + preserveMtimes: Boolean, + zipOut: ZipOutputStream + ) = { + + val mtimeOpt = if (preserveMtimes) Some(os.mtime(file)) else None + + val fis = if (os.isFile(file)) Some(os.read.inputStream(file)) else None + try makeZipEntry0(sub, fis, mtimeOpt, zipOut) + finally fis.foreach(_.close()) + } + + private def makeZipEntry0( + sub: os.SubPath, + is: Option[java.io.InputStream], + preserveMtimes: Option[Long], + zipOut: ZipOutputStream + ) = { + val zipEntry = new ZipEntry(sub.toString) + + preserveMtimes match { + case Some(mtime) => zipEntry.setTime(mtime) + case None => zipEntry.setTime(0) + } + + zipOut.putNextEntry(zipEntry) + is.foreach(os.Internals.transfer(_, zipOut, close = false)) + } + + /** + * Zips a folder recursively and returns a geny.Writable for streaming the ZIP data. + * + * @param source The path to the folder to be zipped. + * @param destination The path to the destination ZIP file (optional). If not provided, a temporary ZIP file will be created. + * @param appendToExisting Whether to append the listed paths to an existing ZIP file (if it exists). Defaults to false. + * @param excludePatterns A list of regular expression patterns to exclude files during zipping. Defaults to an empty list. + * @param includePatterns A list of regular expression patterns to include files in the ZIP archive. Defaults to an empty list (includes all files). + * @param preserveMtimes Whether to preserve modification times (mtimes) of the files. + * @return A geny.Writable object for writing the ZIP data. + */ + def stream( + sources: Seq[ZipSource], + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List(), + preserveMtimes: Boolean = false, + compressionLevel: Int = java.util.zip.Deflater.DEFAULT_COMPRESSION + ): geny.Writable = { + (outputStream: java.io.OutputStream) => + { + createNewZip( + sources, + excludePatterns, + includePatterns, + preserveMtimes, + compressionLevel, + outputStream + ) + } + } + + /** + * A filesystem root representing a zip file + */ + class ZipRoot private[os] (fs: FileSystem) extends Path(fs.getRootDirectories.iterator().next()) + with AutoCloseable { + def close(): Unit = fs.close() + } + + /** + * A file or folder you want to include in a zip file. + */ + class ZipSource private[os] (val src: os.Path, val dest: Option[os.SubPath]) + object ZipSource { + implicit def fromPath(src: os.Path): ZipSource = new ZipSource(src, None) + implicit def fromPathTuple(tuple: (os.Path, os.SubPath)): ZipSource = + new ZipSource(tuple._1, Some(tuple._2)) + } + +} + +object unzip { + + /** + * Lists the contents of the given zip file without extracting it + */ + def list( + source: os.Path, + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List() + ): Generator[os.SubPath] = { + for { + (zipEntry, zipInputStream) <- + streamRaw(os.read.stream(source), excludePatterns, includePatterns) + } yield os.SubPath(zipEntry.getName) + } + + /** + * Extract the given zip file into the destination directory + * + * @param source An `os.Path` containing a zip file + * @param dest The path to the destination directory for extracted files. + * @param excludePatterns A list of regular expression patterns to exclude files during extraction. (Optional) + */ + def apply( + source: os.Path, + dest: os.Path, + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List() + ): os.Path = { + stream(os.read.stream(source), dest, excludePatterns, includePatterns) + dest + } + + /** + * Unzips a ZIP data stream represented by a geny.Readable and extracts it to a destination directory. + * + * @param source A geny.Readable object representing the ZIP data stream. + * @param dest The path to the destination directory for extracted files. + * @param excludePatterns A list of regular expression patterns to exclude files during extraction. (Optional) + */ + def stream( + source: geny.Readable, + dest: os.Path, + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List() + ): Unit = { + for ((zipEntry, zipInputStream) <- streamRaw(source, excludePatterns, includePatterns)) { + val newFile = dest / os.SubPath(zipEntry.getName) + if (zipEntry.isDirectory) os.makeDir.all(newFile) + else { + val outputStream = os.write.outputStream(newFile, createFolders = true) + os.Internals.transfer(zipInputStream, outputStream, close = false) + outputStream.close() + } + } + } + + /** + * Low-level api that streams the contents of the given zip file: takes a `geny.Reaable` + * providing the bytes of the zip file, and returns a `geny.Generator` containing `ZipEntry`s + * and the underlying `ZipInputStream` representing the entries in the zip file. + */ + def streamRaw( + source: geny.Readable, + excludePatterns: Seq[Regex] = List(), + includePatterns: Seq[Regex] = List() + ): geny.Generator[(ZipEntry, java.io.InputStream)] = { + new Generator[(ZipEntry, java.io.InputStream)] { + override def generate(handleItem: ((ZipEntry, java.io.InputStream)) => Generator.Action) + : Generator.Action = { + var lastAction: Generator.Action = Generator.Continue + source.readBytesThrough { inputStream => + val zipInputStream = new ZipInputStream(inputStream) + try { + var zipEntry: ZipEntry = zipInputStream.getNextEntry + while (lastAction == Generator.Continue && zipEntry != null) { + // Skip files that match the exclusion patterns + if (os.zip.shouldInclude(zipEntry.getName, excludePatterns, includePatterns)) { + lastAction = handleItem((zipEntry, zipInputStream)) + } + zipEntry = zipInputStream.getNextEntry + } + } finally { + zipInputStream.closeEntry() + zipInputStream.close() + } + } + lastAction + } + } + } +} diff --git a/os/test/src-jvm/ZipOpJvmTests.scala b/os/test/src-jvm/ZipOpJvmTests.scala new file mode 100644 index 00000000..dfe4bad2 --- /dev/null +++ b/os/test/src-jvm/ZipOpJvmTests.scala @@ -0,0 +1,195 @@ +package test.os +import TestUtil.prep +import utest._ + +import java.nio.file.attribute.FileTime +import java.nio.file.{Files, Paths} +import java.util.zip.ZipFile +import scala.collection.JavaConverters._ + +object ZipOpJvmTests extends TestSuite { + + def tests = Tests { + + test("zipAndUnzipFolder") - prep { wd => + // Zipping files and folders in a new zip file + val zipFileName = "zip-file-test.zip" + val zipFile1: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + wd / "File.txt", + wd / "folder1" + ) + ) + // Adding files and folders to an existing zip file + os.zip( + dest = zipFile1, + sources = Seq( + wd / "folder2", + wd / "Multi Line.txt" + ) + ) + + // Unzip file to a destination folder + val unzippedFolder = os.unzip( + source = wd / zipFileName, + dest = wd / "unzipped folder" + ) + + val paths = os.walk(unzippedFolder) + val expected = Seq( + // Files get included in the zip root using their name + wd / "unzipped folder/File.txt", + wd / "unzipped folder/Multi Line.txt", + // Folder contents get included relative to the source root + wd / "unzipped folder/nestedA", + wd / "unzipped folder/nestedB", + wd / "unzipped folder/one.txt", + wd / "unzipped folder/nestedA/a.txt", + wd / "unzipped folder/nestedB/b.txt" + ) + assert(paths.sorted == expected) + } + + test("zipAndUnzipPreserveMtimes") - prep { wd => + // Create a file and set its modification time + val testFile = wd / "FileWithMtime.txt" + os.write(testFile, "Test content") + + // Use basic System.currentTimeMillis() for modification time + val originalMtime = System.currentTimeMillis() - (1 * 60 * 1000) // 1 minute ago + val path = Paths.get(testFile.toString) + Files.setLastModifiedTime(path, FileTime.fromMillis(originalMtime)) + + // Zipping the file with preserveMtimes = true + val zipFile = os.zip( + dest = wd / "zipWithMtimePreservation.zip", + sources = List(testFile), + preserveMtimes = true + ) + + val existingZipFile = new ZipFile(zipFile.toNIO.toFile) + val actualMTime = existingZipFile.entries().asScala.toList.head.getTime + + // Compare the original and actual modification times (in minutes) + assert((originalMtime / (1000 * 60)) == (actualMTime / (1000 * 60))) + } + + def zipAndUnzipDontPreserveMtimes(wd: os.Path, exerciseAppend: Boolean) = { + + val testFile = wd / "FileWithMtime.txt" + os.write.over(testFile, "Test content") + val testFile2 = wd / "FileWithMtime2.txt" + + val mtime1 = os.mtime(testFile) + + val zipFile1 = os.zip( + dest = wd / "zipWithoutMtimes1.zip", + sources = List(testFile), + preserveMtimes = false + ) + + if (exerciseAppend) { + + os.write.over(testFile2, "Test content2") + os.zip( + dest = wd / "zipWithoutMtimes1.zip", + sources = List(testFile2), + preserveMtimes = false + ) + } + + // Sleep a bit to make sure the mtime has time to change, since zip files may + // have a very coarse granulity of up to two seconds + // https://stackoverflow.com/questions/64048499/zipfile-lib-weird-behaviour-with-seconds-in-modified-time + Thread.sleep(5000) + os.write.over(testFile, "Test content") + + val mtime2 = os.mtime(testFile) + + val zipFile2 = os.zip( + dest = wd / "zipWithoutMtimes2.zip", + sources = List(testFile), + preserveMtimes = false + ) + + if (exerciseAppend) { + os.write.over(testFile2, "Test content2") + os.zip( + dest = wd / "zipWithoutMtimes2.zip", + sources = List(testFile2), + preserveMtimes = false + ) + } + + // Even though the mtimes of the two included files are different, the two + // final zip files end up being byte-for-byte the same because the mtimes get wiped + assert(mtime1 != mtime2) + assert(java.util.Arrays.equals(os.read.bytes(zipFile1), os.read.bytes(zipFile2))) + } + + test("zipAndUnzipDontPreserveMtimes") { + test("noAppend") - prep { wd => zipAndUnzipDontPreserveMtimes(wd, false) } + test("append") - prep { wd => zipAndUnzipDontPreserveMtimes(wd, true) } + } + + test("deletePatterns") - prep { wd => + val amxFile = "File.amx" + os.copy(wd / "File.txt", wd / amxFile) + + // Zipping files and folders in a new zip file + val zipFileName = "zipByDeletingCertainFiles.zip" + val zipFile1: os.Path = os.zip( + dest = wd / zipFileName, + sources = List( + wd / "File.txt", + wd / amxFile, + wd / "Multi Line.txt" + ) + ) + + os.zip( + dest = zipFile1, + deletePatterns = List(amxFile.r) + ) + + // Unzip file to check for contents + val outputZipFilePath = os.unzip( + zipFile1, + dest = wd / "zipByDeletingCertainFiles" + ) + val paths = os.walk(wd / "zipByDeletingCertainFiles").sorted + val expected = Seq( + outputZipFilePath / "File.txt", + outputZipFilePath / "Multi Line.txt" + ) + + assert(paths == expected) + } + + test("open") - prep { wd => + val zipFile = os.zip.open(wd / "zip-test.zip") + try { + os.copy(wd / "File.txt", zipFile / "File.txt") + os.copy(wd / "folder1", zipFile / "folder1") + os.copy(wd / "folder2", zipFile / "folder2") + } finally zipFile.close() + + val zipFile2 = os.zip.open(wd / "zip-test.zip") + try { + os.list(zipFile2) ==> Vector( + zipFile2 / "File.txt", + zipFile2 / "folder1", + zipFile2 / "folder2" + ) + os.remove.all(zipFile2 / "folder2") + os.remove(zipFile2 / "File.txt") + } finally zipFile2.close() + + val zipFile3 = os.zip.open(wd / "zip-test.zip") + try os.list(zipFile3) ==> Vector(zipFile3 / "folder1") + finally zipFile3.close() + + } + } +} diff --git a/os/test/src/ZipOpTests.scala b/os/test/src/ZipOpTests.scala new file mode 100644 index 00000000..3d41bdb6 --- /dev/null +++ b/os/test/src/ZipOpTests.scala @@ -0,0 +1,225 @@ +package test.os + +import os.zip +import test.os.TestUtil.prep +import utest._ + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, PrintStream} +import java.util.zip.{ZipEntry, ZipOutputStream} + +object ZipOpTests extends TestSuite { + + def tests = Tests { + test("level") - prep { wd => + val zipsForLevel = for (i <- Range.inclusive(0, 9)) yield { + os.write.over(wd / "File.txt", Range(0, 1000).map(x => x.toString * x)) + os.zip( + dest = wd / s"archive-$i.zip", + sources = Seq( + wd / "File.txt", + wd / "folder1" + ), + compressionLevel = i + ) + } + + // We can't compare every level because compression isn't fully monotonic, + // but we compare some arbitrary levels just to sanity check things + + // Uncompressed zip is definitely bigger than first level of compression + assert(os.size(zipsForLevel(0)) > os.size(zipsForLevel(1))) + // First level of compression is bigger than middle compression + assert(os.size(zipsForLevel(1)) > os.size(zipsForLevel(5))) + // Middle compression is bigger than best compression + assert(os.size(zipsForLevel(5)) > os.size(zipsForLevel(9))) + } + test("renaming") - prep { wd => + val zipFileName = "zip-file-test.zip" + val zipFile1: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + // renaming files and folders + wd / "File.txt" -> os.sub / "renamed-file.txt", + wd / "folder1" -> os.sub / "renamed-folder" + ) + ) + + val unzippedFolder = os.unzip( + source = zipFile1, + dest = wd / "unzipped folder" + ) + + val paths = os.walk(unzippedFolder) + val expected = Seq( + wd / "unzipped folder/renamed-file.txt", + wd / "unzipped folder/renamed-folder", + wd / "unzipped folder/renamed-folder/one.txt" + ) + assert(paths.sorted == expected) + } + + test("excludePatterns") - prep { wd => + val amxFile = "File.amx" + os.copy(wd / "File.txt", wd / amxFile) + + // Zipping files and folders in a new zip file + val zipFileName = "zipByExcludingCertainFiles.zip" + val zipFile1: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + wd / "File.txt", + wd / amxFile, + wd / "Multi Line.txt" + ), + excludePatterns = Seq(".*\\.txt".r) + ) + + // Unzip file to check for contents + val outputZipFilePath = os.unzip( + zipFile1, + dest = wd / "zipByExcludingCertainFiles" + ) + val paths = os.walk(outputZipFilePath).sorted + val expected = Seq(wd / "zipByExcludingCertainFiles/File.amx") + assert(paths == expected) + } + + test("includePatterns") - prep { wd => + val amxFile = "File.amx" + os.copy(wd / "File.txt", wd / amxFile) + + // Zipping files and folders in a new zip file + val zipFileName = "zipByIncludingCertainFiles.zip" + val zipFile1: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + wd / "File.txt", + wd / amxFile, + wd / "Multi Line.txt" + ), + includePatterns = Seq(".*\\.amx".r) + ) + + // Unzip file to check for contents + val outputZipFilePath = + os.unzip(zipFile1, dest = wd / "zipByIncludingCertainFiles") + val paths = os.walk(outputZipFilePath) + val expected = Seq(wd / "zipByIncludingCertainFiles" / amxFile) + assert(paths == expected) + } + + test("zipStream") - prep { wd => + val zipFileName = "zipStreamFunction.zip" + + val stream = os.write.outputStream(wd / "zipStreamFunction.zip") + + val writable = zip.stream(sources = Seq(wd / "File.txt")) + + writable.writeBytesTo(stream) + stream.close() + + val unzippedFolder = os.unzip( + source = wd / zipFileName, + dest = wd / "zipStreamFunction" + ) + + val paths = os.walk(unzippedFolder) + assert(paths == Seq(unzippedFolder / "File.txt")) + } + + test("list") - prep { wd => + // Zipping files and folders in a new zip file + val zipFileName = "listContentsOfZipFileWithoutExtracting.zip" + val zipFile: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + wd / "File.txt", + wd / "folder1" + ) + ) + + // Unzip file to a destination folder + val listedContents = os.unzip.list(source = wd / zipFileName).toSeq + + val expected = Seq(os.sub / "File.txt", os.sub / "one.txt") + assert(listedContents == expected) + } + + test("unzipExcludePatterns") - prep { wd => + val amxFile = "File.amx" + os.copy(wd / "File.txt", wd / amxFile) + + val zipFileName = "unzipAllExceptExcludingCertainFiles.zip" + val zipFile: os.Path = os.zip( + dest = wd / zipFileName, + sources = Seq( + wd / "File.txt", + wd / amxFile, + wd / "folder1" + ) + ) + + // Unzip file to a destination folder + val unzippedFolder = os.unzip( + source = wd / zipFileName, + dest = wd / "unzipAllExceptExcludingCertainFiles", + excludePatterns = Seq(amxFile.r) + ) + + val paths = os.walk(unzippedFolder) + val expected = Seq( + wd / "unzipAllExceptExcludingCertainFiles/File.txt", + wd / "unzipAllExceptExcludingCertainFiles/one.txt" + ) + + assert(paths == expected) + } + + test("unzipStream") - prep { wd => + // Step 1: Create an in-memory ZIP file as a stream + val zipStreamOutput = new ByteArrayOutputStream() + val zipOutputStream = new ZipOutputStream(zipStreamOutput) + + // Step 2: Add some files to the ZIP + val file1Name = "file1.txt" + val file2Name = "nested/folder/file2.txt" + + // Add first file + zipOutputStream.putNextEntry(new ZipEntry(file1Name)) + zipOutputStream.write("Content of file1".getBytes) + zipOutputStream.closeEntry() + + // Add second file inside a nested folder + zipOutputStream.putNextEntry(new ZipEntry(file2Name)) + zipOutputStream.write("Content of file2".getBytes) + zipOutputStream.closeEntry() + + // Close the ZIP output stream + zipOutputStream.close() + + // Step 3: Prepare the destination folder for unzipping + val unzippedFolder = wd / "unzipped-stream-folder" + val readableZipStream: java.io.InputStream = + new ByteArrayInputStream(zipStreamOutput.toByteArray) + + // Unzipping the stream to the destination folder + os.unzip.stream( + source = readableZipStream, + dest = unzippedFolder + ) + + // Step 5: Verify the unzipped files and contents + val paths = os.walk(unzippedFolder) + assert(paths.contains(unzippedFolder / file1Name)) + assert(paths.contains(unzippedFolder / "nested" / "folder" / "file2.txt")) + + // Check the contents of the files + val file1Content = os.read(unzippedFolder / file1Name) + val file2Content = os.read(unzippedFolder / "nested" / "folder" / "file2.txt") + + assert(file1Content == "Content of file1") + assert(file2Content == "Content of file2") + } + + } +}