Skip to content

Commit 5f0b7b1

Browse files
committed
Fix several problems with filename handling
- Starting in 1.2.0, OpenSlide() and OpenSlide.detect_format() have failed to accept filename arguments formatted as bytes because str(b'abc') == "b'abc'". In addition, filename arguments with invalid types (such as None) have been stringified and passed to OpenSlide, rather than raising an exception during conversion; we even had tests for this (!). - lowlevel has always encoded filename arguments to UTF-8, but on non-Windows it should have used the Python filesystem encoding instead (usually UTF-8 but not always). On Windows, OpenSlide 4.0.0+ expects UTF-8 rather than arbitrary bytes. (OpenSlide < 4.0.0 expects the system codepage, which isn't very useful in practice because of its limited character set, so we ignore that case for now.) - Type hints did not allow filename arguments to be bytes, nor did they allow os.PathLike subclasses which were not pathlib.Path (such as pathlib.PurePath). Accept str, bytes, or os.PathLike for all filename arguments, and properly convert them to bytes for OpenSlide. Fixes: 98c11bd ("Add support for pathlib.Path instances (#123)") Fixes: 5644229 ("tests: test passing invalid types to OpenSlide constructor")
1 parent 6a75bbd commit 5f0b7b1

File tree

7 files changed

+73
-22
lines changed

7 files changed

+73
-22
lines changed

openslide/__init__.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from __future__ import annotations
2727

2828
from io import BytesIO
29-
from pathlib import Path
3029
from types import TracebackType
3130
from typing import Iterator, Literal, Mapping, TypeVar
3231

@@ -82,7 +81,7 @@ def __exit__(
8281
return False
8382

8483
@classmethod
85-
def detect_format(cls, filename: str | Path) -> str | None:
84+
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
8685
"""Return a string describing the format of the specified file.
8786
8887
If the file format is not recognized, return None."""
@@ -189,23 +188,23 @@ class OpenSlide(AbstractSlide):
189188
operations on the OpenSlide object, other than close(), will fail.
190189
"""
191190

192-
def __init__(self, filename: str | Path):
191+
def __init__(self, filename: lowlevel.Filename):
193192
"""Open a whole-slide image."""
194193
AbstractSlide.__init__(self)
195194
self._filename = filename
196-
self._osr = lowlevel.open(str(filename))
195+
self._osr = lowlevel.open(filename)
197196
if lowlevel.read_icc_profile.available:
198197
self._profile = lowlevel.read_icc_profile(self._osr)
199198

200199
def __repr__(self) -> str:
201200
return f'{self.__class__.__name__}({self._filename!r})'
202201

203202
@classmethod
204-
def detect_format(cls, filename: str | Path) -> str | None:
203+
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
205204
"""Return a string describing the format vendor of the specified file.
206205
207206
If the file format is not recognized, return None."""
208-
return lowlevel.detect_vendor(str(filename))
207+
return lowlevel.detect_vendor(filename)
209208

210209
def close(self) -> None:
211210
"""Close the OpenSlide object."""
@@ -358,7 +357,7 @@ def __repr__(self) -> str:
358357
class ImageSlide(AbstractSlide):
359358
"""A wrapper for a PIL.Image that provides the OpenSlide interface."""
360359

361-
def __init__(self, file: str | Path | Image.Image):
360+
def __init__(self, file: lowlevel.Filename | Image.Image):
362361
"""Open an image file.
363362
364363
file can be a filename or a PIL.Image."""
@@ -376,7 +375,7 @@ def __repr__(self) -> str:
376375
return f'{self.__class__.__name__}({self._file_arg!r})'
377376

378377
@classmethod
379-
def detect_format(cls, filename: str | Path) -> str | None:
378+
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
380379
"""Return a string describing the format of the specified file.
381380
382381
If the file format is not recognized, return None."""
@@ -484,7 +483,7 @@ def set_cache(self, cache: OpenSlideCache) -> None:
484483
pass
485484

486485

487-
def open_slide(filename: str | Path) -> OpenSlide | ImageSlide:
486+
def open_slide(filename: lowlevel.Filename) -> OpenSlide | ImageSlide:
488487
"""Open a whole-slide or regular image.
489488
490489
Return an OpenSlide object for whole-slide images and an ImageSlide

openslide/lowlevel.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# openslide-python - Python bindings for the OpenSlide library
33
#
44
# Copyright (c) 2010-2013 Carnegie Mellon University
5-
# Copyright (c) 2016-2023 Benjamin Gilbert
5+
# Copyright (c) 2016-2024 Benjamin Gilbert
66
#
77
# This library is free software; you can redistribute it and/or modify it
88
# under the terms of version 2.1 of the GNU Lesser General Public License
@@ -48,16 +48,17 @@
4848
cdll,
4949
)
5050
from itertools import count
51+
import os
5152
import platform
52-
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypeVar, cast
53+
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypeAlias, TypeVar, cast
5354

5455
from PIL import Image
5556

5657
from . import _convert
5758

5859
if TYPE_CHECKING:
59-
# Python 3.10+ for ParamSpec
60-
from typing import ParamSpec, TypeAlias
60+
# Python 3.10+
61+
from typing import ParamSpec
6162

6263
from _convert import _Buffer
6364

@@ -196,6 +197,26 @@ def from_param(cls, obj: _OpenSlideCache) -> _OpenSlideCache:
196197
return obj
197198

198199

200+
Filename: TypeAlias = str | bytes | os.PathLike[Any]
201+
202+
203+
class _filename_p:
204+
"""Wrapper class to convert filename arguments to bytes."""
205+
206+
@classmethod
207+
def from_param(cls, obj: Filename) -> bytes:
208+
# fspath and fsencode throw TypeError on unexpected types
209+
if platform.system() == 'Windows':
210+
# OpenSlide 4.0.0+ requires UTF-8 on Windows
211+
obj = os.fspath(obj)
212+
if isinstance(obj, str):
213+
return obj.encode('UTF-8')
214+
else:
215+
return obj
216+
else:
217+
return os.fsencode(obj)
218+
219+
199220
class _utf8_p:
200221
"""Wrapper class to convert string arguments to bytes."""
201222

@@ -350,14 +371,14 @@ def decorator(fn: Callable[_P, _T]) -> _Func[_P, _T]:
350371

351372

352373
try:
353-
detect_vendor: _Func[[str], str] = _func(
354-
'openslide_detect_vendor', c_char_p, [_utf8_p], _check_string
374+
detect_vendor: _Func[[Filename], str] = _func(
375+
'openslide_detect_vendor', c_char_p, [_filename_p], _check_string
355376
)
356377
except AttributeError:
357378
raise OpenSlideVersionError('3.4.0')
358379

359-
open: _Func[[str], _OpenSlide] = _func(
360-
'openslide_open', c_void_p, [_utf8_p], _check_open
380+
open: _Func[[Filename], _OpenSlide] = _func(
381+
'openslide_open', c_void_p, [_filename_p], _check_open
361382
)
362383

363384
close: _Func[[_OpenSlide], None] = _func(

tests/fixtures/😐.png

589 Bytes
Loading

tests/fixtures/😐.svs

2.59 KB
Binary file not shown.

tests/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_lowlevel_available(self):
4545
if getattr(attr, '__module__', None) == '__future__':
4646
continue
4747
# ignore random imports
48-
if hasattr(ctypes, name) or name in ('count', 'platform'):
48+
if hasattr(ctypes, name) or name in ('count', 'os', 'platform'):
4949
continue
5050
self.assertTrue(
5151
hasattr(attr, 'available'),

tests/test_imageslide.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#
22
# openslide-python - Python bindings for the OpenSlide library
33
#
4-
# Copyright (c) 2016-2023 Benjamin Gilbert
4+
# Copyright (c) 2016-2024 Benjamin Gilbert
55
#
66
# This library is free software; you can redistribute it and/or modify it
77
# under the terms of version 2.1 of the GNU Lesser General Public License
@@ -19,6 +19,7 @@
1919

2020
from __future__ import annotations
2121

22+
import sys
2223
import unittest
2324

2425
from PIL import Image
@@ -44,6 +45,21 @@ def test_open_image(self):
4445
self.assertEqual(osr.dimensions, (300, 250))
4546
self.assertEqual(repr(osr), 'ImageSlide(%r)' % img)
4647

48+
@unittest.skipUnless(
49+
sys.getfilesystemencoding() == 'utf-8',
50+
'Python filesystem encoding is not UTF-8',
51+
)
52+
def test_unicode_path(self):
53+
path = file_path('😐.png')
54+
for arg in path, str(path):
55+
self.assertEqual(ImageSlide.detect_format(arg), 'PNG')
56+
self.assertEqual(ImageSlide(arg).dimensions, (300, 250))
57+
58+
def test_unicode_path_bytes(self):
59+
arg = str(file_path('😐.png')).encode('UTF-8')
60+
self.assertEqual(ImageSlide.detect_format(arg), 'PNG')
61+
self.assertEqual(ImageSlide(arg).dimensions, (300, 250))
62+
4763
def test_operations_on_closed_handle(self):
4864
with Image.open(file_path('boxes.png')) as img:
4965
osr = ImageSlide(img)

tests/test_openslide.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#
22
# openslide-python - Python bindings for the OpenSlide library
33
#
4-
# Copyright (c) 2016-2023 Benjamin Gilbert
4+
# Copyright (c) 2016-2024 Benjamin Gilbert
55
#
66
# This library is free software; you can redistribute it and/or modify it
77
# under the terms of version 2.1 of the GNU Lesser General Public License
@@ -61,12 +61,27 @@ def test_open(self):
6161
self.assertRaises(
6262
OpenSlideUnsupportedFormatError, lambda: OpenSlide('setup.py')
6363
)
64-
self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(None))
65-
self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(3))
64+
self.assertRaises(ArgumentError, lambda: OpenSlide(None))
65+
self.assertRaises(ArgumentError, lambda: OpenSlide(3))
6666
self.assertRaises(
6767
OpenSlideUnsupportedFormatError, lambda: OpenSlide('unopenable.tiff')
6868
)
6969

70+
@unittest.skipUnless(
71+
sys.getfilesystemencoding() == 'utf-8',
72+
'Python filesystem encoding is not UTF-8',
73+
)
74+
def test_unicode_path(self):
75+
path = file_path('😐.svs')
76+
for arg in path, str(path):
77+
self.assertEqual(OpenSlide.detect_format(arg), 'aperio')
78+
self.assertEqual(OpenSlide(arg).dimensions, (16, 16))
79+
80+
def test_unicode_path_bytes(self):
81+
arg = str(file_path('😐.svs')).encode('UTF-8')
82+
self.assertEqual(OpenSlide.detect_format(arg), 'aperio')
83+
self.assertEqual(OpenSlide(arg).dimensions, (16, 16))
84+
7085
def test_operations_on_closed_handle(self):
7186
osr = OpenSlide(file_path('boxes.tiff'))
7287
props = osr.properties

0 commit comments

Comments
 (0)