fsspec · martindurant · Nov 24, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -145,5 +145,5 @@ jobs:
         shell: bash -l {0}
         run: |
           cd ${{ matrix.FRIEND }}
-          pytest -v
+          pytest -v -W ignore::pytest.PytestRemovedIn9Warning
           cd ..
diff --git a/fsspec/caching.py b/fsspec/caching.py
@@ -6,7 +6,6 @@
 import math
 import os
 import threading
-import warnings
 from collections import OrderedDict
 from concurrent.futures import Future, ThreadPoolExecutor
 from itertools import groupby
@@ -629,7 +628,7 @@ def __init__(
         fetcher: Fetcher,
         size: int,
         data: dict[tuple[int, int], bytes] | None = None,
-        strict: bool = True,
+        strict: bool = False,
         **_: Any,
     ):
         super().__init__(blocksize, fetcher, size)
@@ -653,50 +652,65 @@ def __init__(
         else:
             self.data = {}
 
+    @property
+    def size(self):
+        return sum(_[1] - _[0] for _ in self.data)
+
+    @size.setter
+    def size(self, value):
+        pass
+
+    @property
+    def nblocks(self):
+        return len(self.data)
+
+    @nblocks.setter
+    def nblocks(self, value):
+        pass
+
     def _fetch(self, start: int | None, stop: int | None) -> bytes:
         if start is None:
             start = 0
         if stop is None:
             stop = self.size
+        self.total_requested_bytes += stop - start
 
         out = b""
-        for (loc0, loc1), data in self.data.items():
-            # If self.strict=False, use zero-padded data
-            # for reads beyond the end of a "known" buffer
+        started = False
+        loc_old = 0
+        for loc0, loc1 in sorted(self.data):
+            if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
+                # entirely within the block
+                off = start - loc0
+                self.hit_count += 1
+                return self.data[(loc0, loc1)][off : off + stop - start]
+            if stop <= loc0:
+                break
+            if started and loc0 > loc_old:
+                # a gap where we need data
+                self.miss_count += 1
+                if self.strict:
+                    raise ValueError
+                out += b"\x00" * (loc0 - loc_old)
             if loc0 <= start < loc1:
+                # found the start
+                self.hit_count += 1
                 off = start - loc0
-                out = data[off : off + stop - start]
-                if not self.strict or loc0 <= stop <= loc1:
-                    # The request is within a known range, or
-                    # it begins within a known range, and we
-                    # are allowed to pad reads beyond the
-                    # buffer with zero
-                    out += b"\x00" * (stop - start - len(out))
-                    self.hit_count += 1
-                    return out
-                else:
-                    # The request ends outside a known range,
-                    # and we are being "strict" about reads
-                    # beyond the buffer
-                    start = loc1
-                    break
-
-        # We only get here if there is a request outside the
-        # known parts of the file. In an ideal world, this
-        # should never happen
-        if self.fetcher is None:
-            # We cannot fetch the data, so raise an error
-            raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
-        # We can fetch the data, but should warn the user
-        # that this may be slow
-        warnings.warn(
-            f"Read is outside the known file parts: {(start, stop)}. "
-            f"IO/caching performance may be poor!"
-        )
-        logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
-        self.total_requested_bytes += stop - start
+                out = self.data[(loc0, loc1)][off : off + stop - start]
+                started = True
+            elif start < loc0 and stop > loc1:
+                # the whole block
+                self.hit_count += 1
+                out += self.data[(loc0, loc1)]
+            elif loc0 <= stop <= loc1:
+                # end block
+                self.hit_count += 1
+                return out + self.data[(loc0, loc1)][: stop - loc0]
+            loc_old = loc1
         self.miss_count += 1
-        return out + super()._fetch(start, stop)
+        if started and not self.strict:
+            return out + b"\x00" * (stop - loc_old)
+        raise ValueError
 
 
 class UpdatableLRU(Generic[P, T]):