feat(dcp): make DCPOptimizedS3Reader the default for S3StorageReader (#419)

jet-tong · web-flow · commit c4f482629523 · 2026-02-19T15:58:25.000Z
This commit makes `S3ReaderConstructor.dcp_optimized()` the default reader constructor for DCP loading with `S3StorageReader`, with additional default/zstandard tests and adding a new troubleshooting doc for DCPOptimizedS3Reader troubleshooting, and referred to it in error messages, README, and CHANGELOG. 

- Change S3StorageReader default from S3ReaderConstructor.default() to S3ReaderConstructor.dcp_optimized()
- Adjust docs and error messages
  - New TROUBLESHOOTING.md doc with DCPOptimizedS3Reader troubleshooting notes.
  - Add FALLBACK_GUIDANCE to error messages with TROUBLESHOOTING.md doc link and fallback instructions
  - Update README pointing to TROUBLESHOOTING.md doc in DCP section and simplified DCP examples
  - Update CHANGELOG with soft breaking change documentation, also pointing to TROUBLESHOOTING.md
- Adjust tests
  - Add unit test for default constructor verification
  - Zstandard tests (see PR description's additional context for explanation)
    - Add zstandard to dcp-test dependencies
    - Add e2e test for ZStandard compression with all reader types
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,7 @@
-## v1.5.0 (February 17, 2026)
+## v1.5.0 (February 20, 2026)
 
 ### New features
-* Add DCPOptimizedS3Reader for faster and partial DCP loading (#378)
+* Add DCPOptimizedS3Reader as new default for faster and partial DCP loading (#378, #419)
 * Add support for Python 3.14 (#408)
 * Add weights_only parameter support for Lightning 2.6.0 compatibility (#388)
 
@@ -19,7 +19,7 @@
 * Add macOS x86_64 and Python 3.8 deprecation warnings (#400)
 
 ### Breaking changes
-* No breaking changes.
+* No breaking changes, but DCPOptimizedS3Reader as the new default reader for `S3StorageReader` might lead to behavioral changes. See [DCPOptimizedS3Reader Errors](https://github.com/awslabs/s3-connector-for-pytorch/blob/main/docs/TROUBLESHOOTING.md#dcpoptimizeds3reader-errors) for more details.
 
 ## v1.4.3 (July 25, 2025)
 
diff --git a/README.md b/README.md
@@ -132,7 +132,8 @@ Amazon S3 Connector for PyTorch provides robust support for PyTorch distributed
 
 - `S3StorageReader`: Implementation of PyTorch's StorageReader interface. 
   - Supports configurable reading strategies via the `reader_constructor` parameter (see [Reader Configurations](#reader-configurations)). 
-  - `S3ReaderConstructor.dcp_optimized()` is recommended for faster loading with partial checkpoint optimizations. 
+  - Uses `DCPOptimizedS3Reader` by default for faster loading and partial checkpoint optimizations. 
+  - Please refer to [DCPOptimizedS3Reader Errors](https://github.com/awslabs/s3-connector-for-pytorch/blob/main/docs/TROUBLESHOOTING.md#dcpoptimizeds3reader-errors) for troubleshooting.
 - `S3FileSystem`: An implementation of PyTorch's FileSystemBase.
 
 These tools enable seamless integration of Amazon S3 with 
@@ -155,7 +156,6 @@ can be found in the [examples/dcp](https://github.com/awslabs/s3-connector-for-p
 
 ```py
 from s3torchconnector.dcp import S3StorageWriter, S3StorageReader
-from s3torchconnector import S3ReaderConstructor
 
 import torchvision
 import torch.distributed.checkpoint as DCP
@@ -178,14 +178,12 @@ DCP.save(
 )
 
 # Load distributed checkpoint from S3
+# S3StorageReader uses DCPOptimizedS3Reader by default for improved performance
 model = torchvision.models.resnet18()
 model_state_dict = model.state_dict()
-# Use DCP-optimized reader for faster loading
-reader_constructor = S3ReaderConstructor.dcp_optimized()
 s3_storage_reader = S3StorageReader(
     region=REGION, 
     path=CHECKPOINT_URI,
-    reader_constructor=reader_constructor, # optional; constructor for S3Reader types
 )
 DCP.load(
     state_dict=model_state_dict,
@@ -424,8 +422,9 @@ Amazon S3 Connector for PyTorch supports three types of readers, configurable th
 
 ### Reader Types
 
-#### 1. Sequential Reader (Default)
+#### 1. Sequential Reader
 
+- Default for non-DCP use cases.
 - Downloads and buffers the entire S3 object in memory.
 - Prioritizes performance over memory usage by buffering entire objects.
 
@@ -437,9 +436,9 @@ Amazon S3 Connector for PyTorch supports three types of readers, configurable th
   - **Small reads** (< `buffer_size`): Use internal buffer to reduce S3 API calls.
   - **Large reads** (≥ `buffer_size`): Bypass buffer for direct transfer.
 
-#### 3. DCP-Optimized Reader (DCP only)
+#### 3. DCP-Optimized Reader
 
-- Specialized usage for PyTorch Distributed Checkpoint (DCP) loading.
+- Default for PyTorch Distributed Checkpoint (DCP) loading with `S3StorageReader`.
 - Provides performance improvements through per-item buffers and zero-copy buffer management.
 - Enables efficient partial checkpoint loading (e.g. model-only) through selective data fetching with range coalescing.
 - Automatically handles range metadata injection from DCP load plan.
@@ -449,7 +448,7 @@ Amazon S3 Connector for PyTorch supports three types of readers, configurable th
 
 - **Sequential Reader**: For processing entire objects, and when repeated access to the data is required. Best for most general use cases.
 - **Range-based Reader**: For larger objects (100MB+) that require sparse partial reads, and in memory-constrained environments. 
-- **DCP-Optimized Reader**: For typical PyTorch Distributed Checkpoint loading scenarios for highest performance and memory-efficiency.
+- **DCP-Optimized Reader**: For typical PyTorch Distributed Checkpoint loading scenarios for highest performance and memory-efficiency. (Default for `S3StorageReader`)
 
 **Note**: S3Reader instances are not thread-safe and should not be shared across threads. For multiprocessing with DataLoader, each worker process creates its own S3Reader instance automatically.
 
@@ -484,6 +483,7 @@ DCP interface - `S3StorageReader` usage with dcp-optimized reader:
 from s3torchconnector.dcp import S3StorageReader
 from s3torchconnector import S3ReaderConstructor
 
+# dcp_optimized is already the default for S3StorageReader; demonstration purposes only. 
 reader_constructor = S3ReaderConstructor.dcp_optimized()
 s3_storage_reader = S3StorageReader(
     region=REGION, 
diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md
@@ -0,0 +1,24 @@
+# Troubleshooting
+
+If `s3torchconnector` is not working as expected, please check [Github issues](https://github.com/awslabs/s3-connector-for-pytorch/issues) to see if your issue has already been addressed. If not, feel free to [create a GitHub issue](https://github.com/awslabs/s3-connector-for-pytorch/issues/new/choose) with all the details.
+
+For debug logging for mountpoint-s3-client and CRT logs, please refer to [Enabling Debug Logging](https://github.com/awslabs/s3-connector-for-pytorch/blob/main/DEVELOPMENT.md#enabling-debug-logging) section in the DEVELOPMENT doc. 
+
+### DCPOptimizedS3Reader Errors
+
+`S3StorageReader` uses `DCPOptimizedS3Reader` (created with `S3ReaderConstructor.dcp_optimized()`) by default (v1.5.0+) for improved performance. See [PR #378](https://github.com/awslabs/s3-connector-for-pytorch/pull/378) for more details about the reader. 
+
+If you encounter errors with the default reader, please [submit a GitHub issue](https://github.com/awslabs/s3-connector-for-pytorch/issues) describing your use case. We'd like to understand your scenario and potentially extend `DCPOptimizedS3Reader` to support it, so you can benefit from the performance improvements.
+
+For unsupported or non-DCP access patterns, use the generic reader:
+
+```py
+from s3torchconnector import S3ReaderConstructor
+from s3torchconnector.dcp import S3StorageReader
+
+storage_reader = S3StorageReader(
+    region=REGION, 
+    path=CHECKPOINT_URI,
+    reader_constructor=S3ReaderConstructor.default()
+)
+```
diff --git a/s3torchconnector/pyproject.toml b/s3torchconnector/pyproject.toml
@@ -68,6 +68,7 @@ dcp = [
 dcp-test = [
     "s3torchconnector[dcp]",
     "pytest",
+    "zstandard",
 ]
 
 [tool.setuptools.packages]
diff --git a/s3torchconnector/src/s3torchconnector/dcp/s3_file_system.py b/s3torchconnector/src/s3torchconnector/dcp/s3_file_system.py
@@ -325,7 +325,15 @@ def validate_checkpoint_id(cls, checkpoint_id: Union[str, os.PathLike]) -> bool:
 
 
 class S3StorageReader(FileSystemReader):
-    """S3 implementation of PyTorch's FileSystemReader with configurable reader strategies."""
+    """S3 implementation of PyTorch's FileSystemReader with configurable reader strategies.
+
+    By default, uses DCPOptimizedS3Reader for improved checkpoint loading performance.
+    For unsupported or non-DCP access patterns, please use the generic reader:
+        storage_reader = S3StorageReader(
+            region, path,
+            reader_constructor=S3ReaderConstructor.default()
+        )
+    """
 
     def __init__(
         self,
@@ -343,11 +351,14 @@ def __init__(
             region (str): The AWS region for S3.
             path (Union[str, os.PathLike]): The S3 path to read checkpoints from.
             s3client_config (Optional[S3ClientConfig]): Optional S3ClientConfig with parameters for S3 client.
-            reader_constructor (Optional[S3ReaderConstructorProtocol]): Optional partial(S3Reader) created using S3ReaderConstructor
-                e.g. S3ReaderConstructor.sequential() or S3ReaderConstructor.range_based()
+            reader_constructor (Optional[S3ReaderConstructorProtocol]): Reader constructor created using
+                S3ReaderConstructor. Defaults to ``S3ReaderConstructor.dcp_optimized()`` for best performance.
+                Use ``S3ReaderConstructor.sequential()`` for unsupported/non-DCP access patterns.
         """
         super().__init__(path)
-        self._reader_constructor = reader_constructor or S3ReaderConstructor.default()
+        self._reader_constructor = (
+            reader_constructor or S3ReaderConstructor.dcp_optimized()
+        )
         self.fs: S3FileSystem = S3FileSystem(  # type: ignore[assignment] # since we overrode self.fs: FileSystem
             region,
             s3client_config=s3client_config,
diff --git a/s3torchconnector/src/s3torchconnector/s3reader/constructor.py b/s3torchconnector/src/s3torchconnector/s3reader/constructor.py
@@ -14,7 +14,12 @@
 )
 from .sequential import SequentialS3Reader
 from .ranged import RangedS3Reader
-from .dcp_optimized import DCPOptimizedS3Reader, ItemRange, DEFAULT_MAX_GAP_SIZE
+from .dcp_optimized import (
+    DCPOptimizedS3Reader,
+    ItemRange,
+    DEFAULT_MAX_GAP_SIZE,
+    FALLBACK_GUIDANCE,
+)
 
 if TYPE_CHECKING:
     from torch.distributed.checkpoint.planner import ReadItem
@@ -115,7 +120,8 @@ def __call__(self, bucket: str, key: str, get_object_info, get_stream) -> S3Read
 
         # Error for other files; warn users in case they override prepare_local_plan behavior
         raise ValueError(
-            f"No ranges found for {s3_uri}. Make sure range injection is used in S3StorageReader.prepare_local_plan."
+            f"No ranges found for {s3_uri}. Make sure range injection is used in "
+            f"'S3StorageReader.prepare_local_plan'.\n{FALLBACK_GUIDANCE}"
         )
 
 
@@ -135,7 +141,9 @@ class S3ReaderConstructor:
 
     @staticmethod
     def sequential() -> S3ReaderConstructorProtocol:
-        """Creates a constructor for sequential readers
+        """Creates a constructor for sequential (generic) readers.
+
+        This reader is the generic reader that supports all access patterns.
 
         Returns:
             S3ReaderConstructorProtocol: Partial constructor for SequentialS3Reader
@@ -158,8 +166,8 @@ def range_based(buffer_size: Optional[int] = None) -> S3ReaderConstructorProtoco
         Returns:
             S3ReaderConstructorProtocol: Partial constructor for RangedS3Reader
 
-        Range-based reader performs byte-range requests to read specific portions of S3 objects without
-        downloading the entire file.
+        Range-based reader performs byte-range requests for each read/readinto call
+        to read specific portions of S3 objects without downloading the entire file.
 
         Buffer size affects read performance:
 
@@ -233,7 +241,9 @@ def dcp_optimized(
 
     @staticmethod
     def default() -> S3ReaderConstructorProtocol:
-        """Creates default reader constructor (sequential)
+        """Creates the default generic reader constructor.
+
+        This creates a sequential (generic) reader that supports all access patterns.
 
         Returns:
             S3ReaderConstructorProtocol: Partial constructor for SequentialS3Reader
diff --git a/s3torchconnector/src/s3torchconnector/s3reader/dcp_optimized.py b/s3torchconnector/src/s3torchconnector/s3reader/dcp_optimized.py
@@ -47,6 +47,13 @@
 FIND_ITEM_ERROR_PREFIX = (
     "DCPOptimizedS3Reader only supports sequentially accessing provided ranges: "
 )
+FALLBACK_GUIDANCE = (
+    "If this error is encountered with the default DCP reader (S3ReaderConstructor.dcp_optimized()) "
+    "added in s3torchconnector v1.5.0, please refer to the troubleshooting doc "
+    "(https://github.com/awslabs/s3-connector-for-pytorch/blob/main/docs/TROUBLESHOOTING.md#dcpoptimizeds3reader-errors)."
+    "\nFor unsupported or non-DCP access patterns, use the generic reader: "
+    "S3StorageReader(region, path, reader_constructor=S3ReaderConstructor.default())"
+)
 
 
 @dataclass
@@ -399,7 +406,7 @@ def _find_item_for_range(self, start: int, end: int) -> ItemRange:
         if start < item.end or self._current_item_buffer is None:
             raise ValueError(
                 f"{FIND_ITEM_ERROR_PREFIX}Range {start}-{end} not contained in "
-                f"current item {item.start}-{item.end}"
+                f"current item {item.start}-{item.end}.\n{FALLBACK_GUIDANCE}"
             )
 
         # Advance to next item
@@ -409,7 +416,7 @@ def _find_item_for_range(self, start: int, end: int) -> ItemRange:
         except StopIteration:
             raise ValueError(
                 f"{FIND_ITEM_ERROR_PREFIX}Range {start}-{end} not contained in last item "
-                f"with range {prev_item.start}-{prev_item.end}"
+                f"with range {prev_item.start}-{prev_item.end}.\n{FALLBACK_GUIDANCE}"
             )
 
         # Check if requested range is within next item
@@ -419,7 +426,7 @@ def _find_item_for_range(self, start: int, end: int) -> ItemRange:
         raise ValueError(
             f"{FIND_ITEM_ERROR_PREFIX}Range {start}-{end} not contained in "
             f"current item {prev_item.start}-{prev_item.end} nor the "
-            f"next item {item.start}-{item.end}."
+            f"next item {item.start}-{item.end}.\n{FALLBACK_GUIDANCE}"
         )
 
     def _get_stream_for_item(self, item: ItemRange) -> GetObjectStream:
@@ -647,11 +654,15 @@ def read(self, size: Optional[int] = None) -> bytes:
             S3Exception: An error occurred accessing S3.
         """
         if size is None:
-            raise ValueError("Size cannot be None; full read not supported")
+            raise ValueError(
+                f"Size cannot be None; full read not supported.\n{FALLBACK_GUIDANCE}"
+            )
         if not isinstance(size, int):
             raise TypeError(f"argument should be integer or None, not {type(size)!r}")
         if size < 0:
-            raise ValueError("Size cannot be negative; full read not supported")
+            raise ValueError(
+                f"Size cannot be negative; full read not supported.\n{FALLBACK_GUIDANCE}"
+            )
         if size == 0:
             return b""
 
diff --git a/s3torchconnector/tst/e2e/dcp/test_e2e_s3_storage_reader.py b/s3torchconnector/tst/e2e/dcp/test_e2e_s3_storage_reader.py
@@ -183,3 +183,98 @@ def track_get_object_stream(self, bucket, key, start=None, end=None):
     print(
         f"{filter_name} load, {coalesce}: {len(stream_calls)} streams, {len(filtered_keys)} tensors"
     )
+
+
+@pytest.mark.parametrize("model", [SIMPLE_MODEL, LARGER_MODEL])
+@pytest.mark.parametrize(
+    "reader_constructor_name,reader_constructor",
+    [
+        ("sequential", S3ReaderConstructor.sequential()),
+        ("range_based", S3ReaderConstructor.range_based()),
+        ("dcp_optimized", S3ReaderConstructor.dcp_optimized()),
+    ],
+)
+def test_zstd_compression_partial_load(
+    checkpoint_directory, model, reader_constructor_name, reader_constructor
+):
+    """Test ZStandard compression with partial load works for all readers.
+
+    Tests compatibility with PyTorch DCP's transform_from() which decompresses
+    incoming stream data when _extensions=[ZStandard()] is used on S3StorageWriter,
+    especially testing that it retains sequential access pattern for dcp_optimized reader.
+    """
+
+    # TODO Python 3.8 uses PyTorch 2.4 and does not have ZStandard; remove conditional import/skip after deprecating Python 3.8.
+    try:
+        from torch.distributed.checkpoint._extension import ZStandard
+    except ImportError:
+        pytest.skip("ZStandard extension not available in this PyTorch version")
+
+    region = checkpoint_directory.region
+    s3_uri = checkpoint_directory.s3_uri
+
+    state_dict = model.state_dict()
+    all_keys = list(state_dict.keys())
+
+    # Save with ZStandard compression
+    writer = S3StorageWriter(
+        region=region,
+        path=s3_uri,
+        overwrite=True,
+        _extensions=[ZStandard()],
+    )
+    dcp.save(state_dict, storage_writer=writer)
+
+    # Partial load - only weight tensors
+    keys_to_load = [k for k in all_keys if k.endswith(".weight")]
+    assert keys_to_load, "No weight keys found in model"
+    loaded = {k: torch.empty_like(state_dict[k]) for k in keys_to_load}
+
+    # Track read positions for dcp_optimized
+    read_calls = []
+    original_read = DCPOptimizedS3Reader.read
+    original_readinto = DCPOptimizedS3Reader.readinto
+
+    def track_reads(self, size=None):
+        if not self.key.endswith(".metadata"):
+            read_calls.append(("read", self._position, size, self.key))
+            print(f"read: pos={self._position}, size={size}, key={self.key}")
+        return original_read(self, size)
+
+    def track_readinto(self, buf):
+        if not self.key.endswith(".metadata"):
+            read_calls.append(("readinto", self._position, len(buf), self.key))
+            print(f"readinto: pos={self._position}, size={len(buf)}, key={self.key}")
+        return original_readinto(self, buf)
+
+    # Load with position tracking (only affects dcp_optimized)
+    with (
+        patch.object(DCPOptimizedS3Reader, "read", track_reads),
+        patch.object(DCPOptimizedS3Reader, "readinto", track_readinto),
+    ):
+        reader = S3StorageReader(
+            region=region,
+            path=s3_uri,
+            reader_constructor=reader_constructor,
+        )
+        dcp.load(loaded, storage_reader=reader)
+
+    # Verify loaded tensors match
+    for key in keys_to_load:
+        assert torch.equal(loaded[key], state_dict[key]), f"Mismatch for {key}"
+
+    # Print summary and verify sequential access for dcp_optimized
+    # This helps to manually verify sequential access is still enforced even with
+    # zstandard transform on each tensor for dcp_optimized reader to work.
+    if reader_constructor_name == "dcp_optimized" and read_calls:
+        read_positions = [call[1] for call in read_calls]
+        assert read_positions == sorted(
+            read_positions
+        ), "Read positions should be in ascending order"
+
+        print(f"\n{reader_constructor_name}: {len(keys_to_load)} tensors loaded")
+        print(f"  Total calls: {len(read_calls)}")
+        print(f"  read: {sum(1 for c in read_calls if c[0] == 'read')}")
+        print(f"  readinto: {sum(1 for c in read_calls if c[0] == 'readinto')}")
+    else:
+        print(f"{reader_constructor_name}: {len(keys_to_load)} tensors loaded")
diff --git a/s3torchconnector/tst/unit/dcp/test_s3_storage_reader.py b/s3torchconnector/tst/unit/dcp/test_s3_storage_reader.py
@@ -10,6 +10,7 @@
 
 from s3torchconnector.dcp import S3StorageReader
 from s3torchconnector.s3reader import S3ReaderConstructor, ItemRange
+from s3torchconnector.s3reader.constructor import DCPOptimizedConstructor
 
 TEST_REGION = "eu-east-1"
 TEST_PATH = "s3://test-bucket/test-checkpoint/"
@@ -34,6 +35,12 @@ def load_plan_with_offsets(draw):
     return LoadPlan(items), storage_data
 
 
+def test_s3storage_reader_default_uses_dcp_optimized():
+    """Verify S3StorageReader without explicit constructor uses dcp_optimized."""
+    reader = S3StorageReader(region=TEST_REGION, path=TEST_PATH)
+    assert isinstance(reader._reader_constructor, DCPOptimizedConstructor)
+
+
 def test_s3storage_reader_prepare_local_plan_empty():
     """Test prepare_local_plan handles empty plans."""
     s3_storage_reader = S3StorageReader(TEST_REGION, TEST_PATH)

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ dcp = [`
`68`	`68`	`dcp-test = [`
`69`	`69`	`"s3torchconnector[dcp]",`
`70`	`70`	`"pytest",`
	`71`	`+ "zstandard",`
`71`	`72`	`]`
`72`	`73`
`73`	`74`	`[tool.setuptools.packages]`