Rogdham
diff --git a/‎.pylintrc‎
Lines changed: 4 additions & 0 deletions b/‎.pylintrc‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 62 additions & 28 deletions b/‎README.md‎
Lines changed: 62 additions & 28 deletions
diff --git a/‎src/xz/block.py‎
Lines changed: 139 additions & 36 deletions b/‎src/xz/block.py‎
Lines changed: 139 additions & 36 deletions
diff --git a/‎src/xz/common.py‎
Lines changed: 8 additions & 1 deletion b/‎src/xz/common.py‎
Lines changed: 8 additions & 1 deletion
@@ -6,4 +6,8 @@ disable =
     too-few-public-methods,
     too-many-arguments,
     too-many-branches,
+    too-many-instance-attributes,
     too-many-locals,
+
+[SIMILARITIES]
+ignore-imports=yes
@@ -11,6 +11,7 @@ adheres to [Semantic Versioning](https://semver.org/).
 
 ### :rocket: Added
 
+- Write modes (`w`, `x`, `r+`, `w+`, `x+`) :tada:
 - Allow to `seek` past the end of the fileobj
 - Calling `len` on a fileobj gives its length, and `bool` tells if it is empty
 - Export useful constants and functions from `lzma` for easy access: checks, filters,
 
@@ -4,6 +4,8 @@
 
 Pure Python implementation of the XZ file format with random access support
 
+_Leveraging the lzma module for fast (de)compression_
+
 [![GitHub build status](https://img.shields.io/github/workflow/status/rogdham/python-xz/build/master)](https://github.com/rogdham/python-xz/actions?query=branch:master)&nbsp;[![Release on PyPI](https://img.shields.io/pypi/v/python-xz)](https://pypi.org/project/python-xz/)&nbsp;[![Code coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](https://github.com/rogdham/python-xz/search?q=fail+under&type=Code)&nbsp;[![MIT License](https://img.shields.io/pypi/l/python-xz)](https://github.com/Rogdham/python-xz/blob/master/LICENSE.txt)
 
 ---
@@ -14,40 +16,42 @@ Pure Python implementation of the XZ file format with random access support
 
 ---
 
-A XZ file can be composed of several streams and blocks. This allows for random access
-when reading, but this is not supported by Python's builtin `lzma` module, which would
-read all previous blocks for nothing.
+A XZ file can be composed of several streams and blocks. This allows for fast random
+access when reading, but this is not supported by Python's builtin `lzma` module (which
+would read all previous blocks for nothing).
 
 <div align="center">
 
-|                 |      [lzma]       |      [lzmaffi]       |      python-xz       |
-| :-------------: | :---------------: | :------------------: | :------------------: |
-|   module type   |      builtin      |  cffi (C extension)  |     pure Python      |
-|   📄 **read**   |                   |                      |                      |
-|  random access  | ❌ no<sup>1</sup> |  ✔️ yes<sup>2</sup>  |  ✔️ yes<sup>2</sup>  |
-| several blocks  |      ✔️ yes       | ✔️✔️ yes<sup>3</sup> | ✔️✔️ yes<sup>3</sup> |
-| several streams |      ✔️ yes       |        ✔️ yes        | ✔️✔️ yes<sup>4</sup> |
-| stream padding  |       ❌ no       |        ✔️ yes        |        ✔️ yes        |
-|  📝 **write**   |                   |                      |                      |
-|    `w` mode     |      ✔️ yes       |        ✔️ yes        |      ⏳ planned      |
-|    `x` mode     |      ✔️ yes       |        ❌ no         |      ⏳ planned      |
-|    `a` mode     |   ✔️ new stream   |    ✔️ new stream     |      ⏳ planned      |
-|   `r+w` mode    |       ❌ no       |        ❌ no         |      ⏳ planned      |
-| several blocks  |       ❌ no       |        ❌ no         |      ⏳ planned      |
-| several streams | ❌ no<sup>5</sup> |  ❌ no<sup>5</sup>   |      ⏳ planned      |
-| stream padding  | ❌ no<sup>6</sup> |        ✔️ yes        |      ⏳ planned      |
+|                   |      [lzma]       |      [lzmaffi]       |      python-xz       |
+| :---------------: | :---------------: | :------------------: | :------------------: |
+|    module type    |      builtin      |  cffi (C extension)  |     pure Python      |
+|    📄 **read**    |                   |                      |                      |
+|   random access   | ❌ no<sup>1</sup> |  ✔️ yes<sup>2</sup>  |  ✔️ yes<sup>2</sup>  |
+|  several blocks   |      ✔️ yes       | ✔️✔️ yes<sup>3</sup> | ✔️✔️ yes<sup>3</sup> |
+|  several streams  |      ✔️ yes       |        ✔️ yes        | ✔️✔️ yes<sup>4</sup> |
+|  stream padding   | ❌ no<sup>5</sup> |        ✔️ yes        |        ✔️ yes        |
+|   📝 **write**    |                   |                      |                      |
+|     `w` mode      |      ✔️ yes       |        ✔️ yes        |        ✔️ yes        |
+|     `x` mode      |      ✔️ yes       |        ❌ no         |        ✔️ yes        |
+|     `a` mode      |   ✔️ new stream   |    ✔️ new stream     |      ⏳ planned      |
+| `r+`/`w+`/… modes |       ❌ no       |        ❌ no         |        ✔️ yes        |
+|  several blocks   |       ❌ no       |        ❌ no         |        ✔️ yes        |
+|  several streams  | ❌ no<sup>6</sup> |  ❌ no<sup>6</sup>   |        ✔️ yes        |
+|  stream padding   |       ❌ no       |        ❌ no         |      ⏳ planned      |
 
 </div>
-<sub>
+
+<details>
+<summary>Notes</summary>
 
 1. Reading from a position will read the file from the very beginning
 2. Reading from a position will read the file from the beginning of the block
 3. Block positions available with the `block_boundaries` attribute
 4. Stream positions available with the `stream_boundaries` attribute
-5. Possible by manually closing and re-opening in append mode
-6. Related [issue](https://bugs.python.org/issue44134)
+5. Related [issue](https://bugs.python.org/issue44134)
+6. Possible by manually closing and re-opening in append mode
 
-</sub>
+</details>
 
 [lzma]: https://docs.python.org/3/library/lzma.html
 [lzmaffi]: https://github.com/r3m0t/backports.lzma
@@ -56,10 +60,10 @@ read all previous blocks for nothing.
 
 ## Usage
 
-### Read mode
-
 The API is similar to [lzma]: you can use either `xz.open` or `xz.XZFile`.
 
+### Read mode
+
 ```python
 >>> with xz.open('example.xz') as fin:
 ...     fin.read(18)
@@ -95,7 +99,32 @@ are still in bytes (just like with `lzma.open`).
 
 ### Write mode
 
-_This mode is not available yet._
+Writing is only supported from the end of file. It is however possible to truncate the
+file first. Note that truncating is only supported on block boundaries.
+
+```python
+>>> with xz.open('test.xz', 'w') as fout:
+...     fout.write(b'Hello, world!\n')
+...     fout.write(b'This sentence is still in the previous block\n')
+...     fout.change_block()
+...     fout.write(b'But this one is in its own!\n')
+...
+14
+45
+28
+```
+
+Advanced usage:
+
+- Modes like `r+`/`w+`/`x+` allow to open for both read and write at the same time;
+  however in the current implementation, a block with writing in progress is
+  automatically closed when reading data from it.
+- The `check`, `preset` and `filters` arguments to `xz.open` and `xz.XZFile` allow to
+  configure the default values for new streams and blocks.
+- Change block with the `change_block` method (the `preset` and `filters` attributes can
+  be changed beforehand to apply to the new block).
+- Change stream with the `change_stream` method (the `check` attribute can be changed
+  beforehand to apply to the new stream).
 
 ---
 
@@ -121,15 +150,20 @@ compression ratio.
 
 ### How can I create XZ files optimized for random-access?
 
-[XZ Utils](https://tukaani.org/xz/) can create XZ files with several blocks:
+You can open the file for writing and use the `change_block` method to create several
+blocks.
+
+Other tools allow to create XZ files with several blocks as well:
+
+- [XZ Utils](https://tukaani.org/xz/) needs to be called with flags:
 
 ```sh
 $ xz -T0 file                          # threading mode
 $ xz --block-size 16M file             # same size for all blocks
 $ xz --block-list 16M,32M,8M,42M file  # specific size for each block
 ```
 
-[PIXZ](https://github.com/vasi/pixz) creates files with several blocks by default:
+- [PIXZ](https://github.com/vasi/pixz) creates files with several blocks by default:
 
 ```sh
 $ pixz file
 
@@ -1,66 +1,169 @@
 from io import DEFAULT_BUFFER_SIZE, SEEK_SET
-from lzma import FORMAT_XZ, LZMADecompressor, LZMAError
+from lzma import FORMAT_XZ, LZMACompressor, LZMADecompressor, LZMAError
 
-from xz.common import XZError, create_xz_header, create_xz_index_footer
+from xz.common import (
+    XZError,
+    create_xz_header,
+    create_xz_index_footer,
+    parse_xz_footer,
+    parse_xz_index,
+)
 from xz.io import IOAbstract, IOCombiner, IOStatic
 
 
-class XZBlock(IOAbstract):
-    compressed_read_size = DEFAULT_BUFFER_SIZE
+class BlockRead:
+    read_size = DEFAULT_BUFFER_SIZE
 
     def __init__(self, fileobj, check, unpadded_size, uncompressed_size):
-        super().__init__(uncompressed_size)
-        self.compressed_fileobj = IOCombiner(
+        self.length = uncompressed_size
+        self.fileobj = IOCombiner(
             IOStatic(create_xz_header(check)),
             fileobj,
             IOStatic(
                 create_xz_index_footer(check, [(unpadded_size, uncompressed_size)])
             ),
         )
-        self._decompressor_reset()
+        self.reset()
 
-    def _decompressor_reset(self):
-        self.compressed_fileobj.seek(0, SEEK_SET)
+    def reset(self):
+        self.fileobj.seek(0, SEEK_SET)
+        self.pos = 0
         self.decompressor = LZMADecompressor(format=FORMAT_XZ)
 
-    def _decompressor_read(self, size):
+    def decompress(self, pos, size):
+        if pos < self.pos:
+            self.reset()
+
+        skip_before = pos - self.pos
+
         # pylint: disable=using-constant-test
         if self.decompressor.eof:
             raise XZError("block: decompressor eof")
+
         if self.decompressor.needs_input:
-            data_input = self.compressed_fileobj.read(self.compressed_read_size)
+            data_input = self.fileobj.read(self.read_size)
             if not data_input:
                 raise XZError("block: data eof")
         else:
             data_input = b""
-        return self.decompressor.decompress(data_input, size)
-
-    def seek(self, *args):
-        old_pos = self._pos
-        super().seek(*args)
-        pos_diff = self._pos - old_pos
-        if pos_diff < 0:
-            self._decompressor_reset()
-            old_pos = 0
-            pos_diff = self._pos
-        if pos_diff > 0:
-            self._pos = old_pos
-            self.read(pos_diff)
 
-    def _read(self, size):
-        try:
-            data_output = self._decompressor_read(size)
+        data_output = self.decompressor.decompress(data_input, skip_before + size)
+        self.pos += len(data_output)
+
+        if self.pos == self.length:
+            # we reached the end of the block
+            # according to the XZ specification, we must check the
+            # remaining bytes of the block; this is mainly performed by the
+            # decompressor itself when we consume it
+            while not self.decompressor.eof:
+                if self.decompress(self.pos, 1):
+                    raise LZMAError("Corrupt input data")
+
+        return data_output[skip_before:]
+
+
+class BlockWrite:
+    def __init__(self, fileobj, check, preset, filters):
+        self.fileobj = fileobj
+        self.check = check
+        self.compressor = LZMACompressor(FORMAT_XZ, check, preset, filters)
+        self.pos = 0
+        if self.compressor.compress(b"") != create_xz_header(check):
+            raise XZError("block: compressor header")
+
+    def _write(self, data):
+        if data:
+            self.fileobj.seek(self.pos)
+            self.fileobj.write(data)
+            self.pos += len(data)
+
+    def compress(self, data):
+        self._write(self.compressor.compress(data))
+
+    def finish(self):
+        data = self.compressor.flush()
+
+        # footer
+        check, backward_size = parse_xz_footer(data[-12:])
+        if check != self.check:
+            raise XZError("block: compressor footer check")
 
-            if self._pos + len(data_output) == self._length:
-                # we reached the end of the block
-                # according to the XZ specification, we must check the
-                # remaining bytes of the block; this is mainly performed by the
-                # decompressor itself when we consume it
-                while not self.decompressor.eof:
-                    if self._decompressor_read(1):
-                        raise LZMAError("Corrupt input data")
+        # index
+        records = parse_xz_index(data[-12 - backward_size : -12])
+        if len(records) != 1:
+            raise XZError("block: compressor index records length")
 
-            return data_output
+        # remaining block data
+        self._write(data[: -12 - backward_size])
 
+        return records[0]  # (unpadded_size, uncompressed_size)
+
+
+class XZBlock(IOAbstract):
+    def __init__(
+        self,
+        fileobj,
+        check,
+        unpadded_size,
+        uncompressed_size,
+        preset=None,
+        filters=None,
+    ):
+        super().__init__(uncompressed_size)
+        self.fileobj = fileobj
+        self.check = check
+        self.preset = preset
+        self.filters = filters
+        self.unpadded_size = unpadded_size
+        self.operation = None
+
+    @property
+    def uncompressed_size(self):
+        return self._length
+
+    def _read(self, size):
+        # enforce read mode
+        if not isinstance(self.operation, BlockRead):
+            self._write_end()
+            self.operation = BlockRead(
+                self.fileobj,
+                self.check,
+                self.unpadded_size,
+                self.uncompressed_size,
+            )
+
+        # read data
+        try:
+            return self.operation.decompress(self._pos, size)
         except LZMAError as ex:
             raise XZError(f"block: error while decompressing: {ex}") from ex
+
+    def writable(self):
+        return isinstance(self.operation, BlockWrite) or not self._length
+
+    def _write(self, data):
+        # enforce write mode
+        if not isinstance(self.operation, BlockWrite):
+            self.operation = BlockWrite(
+                self.fileobj,
+                self.check,
+                self.preset,
+                self.filters,
+            )
+
+        # write data
+        self.operation.compress(data)
+        return len(data)
+
+    def _write_after(self):
+        if isinstance(self.operation, BlockWrite):
+            self.unpadded_size, uncompressed_size = self.operation.finish()
+            if uncompressed_size != self.uncompressed_size:
+                raise XZError("block: compressor uncompressed size")
+            self.operation = None
+
+    def _truncate(self, size):
+        # thanks to the writable method, we are sure that length is zero
+        # so we don't need to handle the case of truncating in middle of the block
+        self.seek(size)
+        self.write(b"")
@@ -1,4 +1,5 @@
 from binascii import crc32 as crc32int
+import lzma
 from struct import pack, unpack
 
 HEADER_MAGIC = b"\xfd7zXZ\x00"
@@ -12,7 +13,7 @@ class XZError(Exception):
 def encode_mbi(value):
     data = bytearray()
     while value >= 0x80:
-        data.append((value | 0x80) & 0xFF)
+        data.append((value & 0x7F) | 0x80)
         value >>= 7
     data.append(value)
     return data
@@ -57,6 +58,8 @@ def create_xz_index_footer(check, records):
     index = b"\x00"
     index += encode_mbi(len(records))
     for unpadded_size, uncompressed_size in records:
+        if not unpadded_size:
+            raise XZError("index record unpadded size")
         index += encode_mbi(unpadded_size)
         index += encode_mbi(uncompressed_size)
     index += pad(len(index))
@@ -124,3 +127,7 @@ def parse_xz_footer(footer):
     if flag_first_byte or not 0 <= check <= 0xF:
         raise XZError("footer flags")
     return (check, backward_size)
+
+
+# find default value for check implicitely used by lzma
+DEFAULT_CHECK = parse_xz_header(lzma.compress(b"")[:12])