Skip to content

Commit f3cca78

Browse files
laarohiLuke Aarohiseberg
authored
BUG: allow empty memmaps in most situations (numpy#27723)
Allow empty memmap in more cases, by: * avoiding `bytes=0` when an offset is used (but it is a multiple of allocation granularity) * Use a size of 1 when a memmap is created fresh but it is empty. --- * BUG: error empty memmap offset is a multiple of allocation granularity * DOC: added a code comment explaining issue with `bytes==0` in memmap * TST: test for arr.offset being correct * ENH: allow empty memmap * TST: adjust test for empty memmap * STY: numpy/_core/tests/test_memmap.py Co-authored-by: Sebastian Berg <[email protected]> * STY: Update numpy/_core/tests/test_memmap.py Co-authored-by: Sebastian Berg <[email protected]> * STY: Update numpy/_core/tests/test_memmap.py Co-authored-by: Sebastian Berg <[email protected]> * TST: Update numpy/_core/tests/test_memmap.py Co-authored-by: Sebastian Berg <[email protected]> * DOC: added enhancement release note for numpygh-27723 --------- Co-authored-by: Luke Aarohi <[email protected]> Co-authored-by: Sebastian Berg <[email protected]>
1 parent fb91abc commit f3cca78

File tree

3 files changed

+29
-7
lines changed

3 files changed

+29
-7
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
* Improved support for empty `memmap`. Previously an empty `memmap` would fail
2+
unless a non-zero ``offset`` was set. Now a zero-size `memmap` is supported
3+
even if ``offset=0``. To achieve this, if a `memmap` is mapped to an empty
4+
file that file is padded with a single byte.

numpy/_core/memmap.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,14 @@ def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
262262

263263
bytes = int(offset + size*_dbytes)
264264

265-
if mode in ('w+', 'r+') and flen < bytes:
266-
fid.seek(bytes - 1, 0)
267-
fid.write(b'\0')
268-
fid.flush()
265+
if mode in ('w+', 'r+'):
266+
# gh-27723
267+
# if bytes == 0, we write out 1 byte to allow empty memmap.
268+
bytes = max(bytes, 1)
269+
if flen < bytes:
270+
fid.seek(bytes - 1, 0)
271+
fid.write(b'\0')
272+
fid.flush()
269273

270274
if mode == 'c':
271275
acc = mmap.ACCESS_COPY
@@ -276,6 +280,11 @@ def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
276280

277281
start = offset - offset % mmap.ALLOCATIONGRANULARITY
278282
bytes -= start
283+
# bytes == 0 is problematic as in mmap length=0 maps the full file.
284+
# See PR gh-27723 for a more detailed explanation.
285+
if bytes == 0 and start > 0:
286+
bytes += mmap.ALLOCATIONGRANULARITY
287+
start -= mmap.ALLOCATIONGRANULARITY
279288
array_offset = offset - start
280289
mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
281290

numpy/_core/tests/test_memmap.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,13 @@ def test_mmap_offset_greater_than_allocation_granularity(self):
199199
fp = memmap(self.tmpfp, shape=size, mode='w+', offset=offset)
200200
assert_(fp.offset == offset)
201201

202+
def test_empty_array_with_offset_multiple_of_allocation_granularity(self):
203+
self.tmpfp.write(b'a'*mmap.ALLOCATIONGRANULARITY)
204+
size = 0
205+
offset = mmap.ALLOCATIONGRANULARITY
206+
fp = memmap(self.tmpfp, shape=size, mode='w+', offset=offset)
207+
assert_equal(fp.offset, offset)
208+
202209
def test_no_shape(self):
203210
self.tmpfp.write(b'a'*16)
204211
mm = memmap(self.tmpfp, dtype='float64')
@@ -207,12 +214,14 @@ def test_no_shape(self):
207214
def test_empty_array(self):
208215
# gh-12653
209216
with pytest.raises(ValueError, match='empty file'):
210-
memmap(self.tmpfp, shape=(0,4), mode='w+')
217+
memmap(self.tmpfp, shape=(0, 4), mode='r')
211218

212-
self.tmpfp.write(b'\0')
219+
# gh-27723
220+
# empty memmap works with mode in ('w+','r+')
221+
memmap(self.tmpfp, shape=(0, 4), mode='w+')
213222

214223
# ok now the file is not empty
215-
memmap(self.tmpfp, shape=(0,4), mode='w+')
224+
memmap(self.tmpfp, shape=(0, 4), mode='w+')
216225

217226
def test_shape_type(self):
218227
memmap(self.tmpfp, shape=3, mode='w+')

0 commit comments

Comments
 (0)