From a65540ee6d022ccc7f851e2b7e4df89188c61cdd Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sun, 20 Oct 2024 00:59:41 -0700 Subject: [PATCH 1/2] Fix several problems with filename handling - Starting in 1.2.0, OpenSlide() and OpenSlide.detect_format() have failed to accept filename arguments formatted as bytes because str(b'abc') == "b'abc'". In addition, filename arguments with invalid types (such as None) have been stringified and passed to OpenSlide, rather than raising an exception during conversion; we even had tests for this (!). - lowlevel has always encoded filename arguments to UTF-8, but on non-Windows it should have used the Python filesystem encoding instead (usually UTF-8 but not always). On Windows, OpenSlide 4.0.0+ expects UTF-8 rather than arbitrary bytes. (OpenSlide < 4.0.0 expects the system codepage, which isn't very useful in practice because of its limited character set, so we ignore that case for now.) - Type hints did not allow filename arguments to be bytes, nor did they allow os.PathLike subclasses which were not pathlib.Path (such as pathlib.PurePath). Accept str, bytes, or os.PathLike for all filename arguments, and properly convert them to bytes for OpenSlide. Fixes: 98c11bd4ca2b ("Add support for pathlib.Path instances (#123)") Fixes: 564422975d9b ("tests: test passing invalid types to OpenSlide constructor") Signed-off-by: Benjamin Gilbert --- openslide/__init__.py | 17 ++++++------- openslide/lowlevel.py | 35 +++++++++++++++++++++----- "tests/fixtures/\360\237\230\220.png" | Bin 0 -> 589 bytes "tests/fixtures/\360\237\230\220.svs" | Bin 0 -> 2651 bytes tests/test_base.py | 2 +- tests/test_imageslide.py | 18 ++++++++++++- tests/test_openslide.py | 21 +++++++++++++--- 7 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 "tests/fixtures/\360\237\230\220.png" create mode 100644 "tests/fixtures/\360\237\230\220.svs" diff --git a/openslide/__init__.py b/openslide/__init__.py index 53c70756..65022b41 100644 --- a/openslide/__init__.py +++ b/openslide/__init__.py @@ -26,7 +26,6 @@ from __future__ import annotations from io import BytesIO -from pathlib import Path from types import TracebackType from typing import Iterator, Literal, Mapping, TypeVar @@ -82,7 +81,7 @@ def __exit__( return False @classmethod - def detect_format(cls, filename: str | Path) -> str | None: + def detect_format(cls, filename: lowlevel.Filename) -> str | None: """Return a string describing the format of the specified file. If the file format is not recognized, return None.""" @@ -189,11 +188,11 @@ class OpenSlide(AbstractSlide): operations on the OpenSlide object, other than close(), will fail. """ - def __init__(self, filename: str | Path): + def __init__(self, filename: lowlevel.Filename): """Open a whole-slide image.""" AbstractSlide.__init__(self) self._filename = filename - self._osr = lowlevel.open(str(filename)) + self._osr = lowlevel.open(filename) if lowlevel.read_icc_profile.available: self._profile = lowlevel.read_icc_profile(self._osr) @@ -201,11 +200,11 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({self._filename!r})' @classmethod - def detect_format(cls, filename: str | Path) -> str | None: + def detect_format(cls, filename: lowlevel.Filename) -> str | None: """Return a string describing the format vendor of the specified file. If the file format is not recognized, return None.""" - return lowlevel.detect_vendor(str(filename)) + return lowlevel.detect_vendor(filename) def close(self) -> None: """Close the OpenSlide object.""" @@ -358,7 +357,7 @@ def __repr__(self) -> str: class ImageSlide(AbstractSlide): """A wrapper for a PIL.Image that provides the OpenSlide interface.""" - def __init__(self, file: str | Path | Image.Image): + def __init__(self, file: lowlevel.Filename | Image.Image): """Open an image file. file can be a filename or a PIL.Image.""" @@ -376,7 +375,7 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({self._file_arg!r})' @classmethod - def detect_format(cls, filename: str | Path) -> str | None: + def detect_format(cls, filename: lowlevel.Filename) -> str | None: """Return a string describing the format of the specified file. If the file format is not recognized, return None.""" @@ -484,7 +483,7 @@ def set_cache(self, cache: OpenSlideCache) -> None: pass -def open_slide(filename: str | Path) -> OpenSlide | ImageSlide: +def open_slide(filename: lowlevel.Filename) -> OpenSlide | ImageSlide: """Open a whole-slide or regular image. Return an OpenSlide object for whole-slide images and an ImageSlide diff --git a/openslide/lowlevel.py b/openslide/lowlevel.py index 2042a9a5..5efa5fa9 100644 --- a/openslide/lowlevel.py +++ b/openslide/lowlevel.py @@ -2,7 +2,7 @@ # openslide-python - Python bindings for the OpenSlide library # # Copyright (c) 2010-2013 Carnegie Mellon University -# Copyright (c) 2016-2023 Benjamin Gilbert +# Copyright (c) 2016-2024 Benjamin Gilbert # # This library is free software; you can redistribute it and/or modify it # under the terms of version 2.1 of the GNU Lesser General Public License @@ -48,6 +48,7 @@ cdll, ) from itertools import count +import os import platform from typing import TYPE_CHECKING, Any, Callable, Protocol, TypeVar, cast @@ -56,7 +57,7 @@ from . import _convert if TYPE_CHECKING: - # Python 3.10+ for ParamSpec + # Python 3.10+ from typing import ParamSpec, TypeAlias from _convert import _Buffer @@ -196,6 +197,28 @@ def from_param(cls, obj: _OpenSlideCache) -> _OpenSlideCache: return obj +if TYPE_CHECKING: + # Python 3.10+ + Filename: TypeAlias = str | bytes | os.PathLike[Any] + + +class _filename_p: + """Wrapper class to convert filename arguments to bytes.""" + + @classmethod + def from_param(cls, obj: Filename) -> bytes: + # fspath and fsencode raise TypeError on unexpected types + if platform.system() == 'Windows': + # OpenSlide 4.0.0+ requires UTF-8 on Windows + obj = os.fspath(obj) + if isinstance(obj, str): + return obj.encode('UTF-8') + else: + return obj + else: + return os.fsencode(obj) + + class _utf8_p: """Wrapper class to convert string arguments to bytes.""" @@ -350,14 +373,14 @@ def decorator(fn: Callable[_P, _T]) -> _Func[_P, _T]: try: - detect_vendor: _Func[[str], str] = _func( - 'openslide_detect_vendor', c_char_p, [_utf8_p], _check_string + detect_vendor: _Func[[Filename], str] = _func( + 'openslide_detect_vendor', c_char_p, [_filename_p], _check_string ) except AttributeError: raise OpenSlideVersionError('3.4.0') -open: _Func[[str], _OpenSlide] = _func( - 'openslide_open', c_void_p, [_utf8_p], _check_open +open: _Func[[Filename], _OpenSlide] = _func( + 'openslide_open', c_void_p, [_filename_p], _check_open ) close: _Func[[_OpenSlide], None] = _func( diff --git "a/tests/fixtures/\360\237\230\220.png" "b/tests/fixtures/\360\237\230\220.png" new file mode 100644 index 0000000000000000000000000000000000000000..fccc4fe5095c5fb963f5328980dbff091e754bfc GIT binary patch literal 589 zcmeAS@N?(olHy`uVBq!ia0y~yVAKJ!e=#uw$u#Ck2|$6;OlRi+PiJR^fTH}g%$!sP zhKf0*6K%Z@2Z*%EuU@)HlwHTZ6Z zu-Fn-R-y0NLe}5aJt^JHlu_}j@wAl*Yn#5VnJoTJ_}#hZ zT)*zyyz*YuW@@gyHMmS%PgBiPz{_xMLHJQ}~8(C%h%Wip2Us82b@p8*{9atuA2Vd{gDIjyCb{0XT-iE5@N{Fprru`A<3Z;(R9+&9+yXp#6L2ivk?9M?~zM3L*!i}H@ zBmVzQ1_6eD3@kt&LjfZ*m@NS0G9s~=fNWsEFx&$wWQK~f1Nm%FHZZ&x_!yZOGJuRH zKsBOJagZJvC>vyl6O`=$WIH3VT^J#50NLvbWwQe{L^HB8C<19876t|(CT38)05KE8 z(*}rokeOy|3=AA#=va_il$o#KnVXoNs^F8ERFqg$sZeHUqz8lwyj&(`1_lNd#zvNg zCJNDpmKGL07nQ;UG|NhXFS1_s6{x|XKNX1XR8Nd~%>#%7kf76yg} zsVSz0=1Ga>H35l5xy7j^K;@yqz6wCAN>Ynzd;%qpBv939)bwQa(@#jDq@PH)(A za@VrsGgqu$^8W~fJTRNFGJ*jxC4&JY6Eh1d8#@Ol7dKGBRsjZJ-eYEBVP<7z0cHZm zTA(}wiy*6zqM;+3a9|?4QlW@Z(iUwW$pkka<)WpdpCN3cY31zV>gMj@=@lFj8WtWA8I_!pnwFlCnN?g;T2@|BS=HRq+ScCD*)?hMl&RCE z&zL!D(c&dbmn~nha@D5ITefc7zGLUELx+zXJ$C%W$y1juU%7hi`i+~n9zJ^f}jBP9rNV4wdIB1yvyfKnd+if z_=3&1>gbhcx9&xHuqfQ^53stdlOG(gweOi>qgsXS#=uOgrmR^x<`a7=o0l(SpOxBr z>fWt2uMenC4Bq1PrnOViQR0o`*Okv2&Tm(L!WFVUjeT8KcyG#qItLcPSG-2I&gSwo z8E%z%qW`^as_fJ`Wx56}f-?Pk!#7PbD0~}u>-O61GdeijERQe0wEzE2ev(SZ55STL zU+L%!EWW-0)d)gMxLj7$l2Qy>BCY}^1aYXElR!3d2?{F_tvGR)h@f;tM2W}+F8364 z?H~o9g08}7VK-XX<)9UIu+ol;ot=Z7jf;bWgPV(sn@>W3kC&HER#Z$-LRnr_MM+*s mQBB)iUrp0QQ&Gvl(a^-w+Rn~SRnNuK*~Y`%*3NqPmUaN}3g}V* literal 0 HcmV?d00001 diff --git a/tests/test_base.py b/tests/test_base.py index bcced6f8..d03ce7cf 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -45,7 +45,7 @@ def test_lowlevel_available(self): if getattr(attr, '__module__', None) == '__future__': continue # ignore random imports - if hasattr(ctypes, name) or name in ('count', 'platform'): + if hasattr(ctypes, name) or name in ('count', 'os', 'platform'): continue self.assertTrue( hasattr(attr, 'available'), diff --git a/tests/test_imageslide.py b/tests/test_imageslide.py index dd00ad66..e577851f 100644 --- a/tests/test_imageslide.py +++ b/tests/test_imageslide.py @@ -1,7 +1,7 @@ # # openslide-python - Python bindings for the OpenSlide library # -# Copyright (c) 2016-2023 Benjamin Gilbert +# Copyright (c) 2016-2024 Benjamin Gilbert # # This library is free software; you can redistribute it and/or modify it # under the terms of version 2.1 of the GNU Lesser General Public License @@ -19,6 +19,7 @@ from __future__ import annotations +import sys import unittest from PIL import Image @@ -44,6 +45,21 @@ def test_open_image(self): self.assertEqual(osr.dimensions, (300, 250)) self.assertEqual(repr(osr), 'ImageSlide(%r)' % img) + @unittest.skipUnless( + sys.getfilesystemencoding() == 'utf-8', + 'Python filesystem encoding is not UTF-8', + ) + def test_unicode_path(self): + path = file_path('😐.png') + for arg in path, str(path): + self.assertEqual(ImageSlide.detect_format(arg), 'PNG') + self.assertEqual(ImageSlide(arg).dimensions, (300, 250)) + + def test_unicode_path_bytes(self): + arg = str(file_path('😐.png')).encode('UTF-8') + self.assertEqual(ImageSlide.detect_format(arg), 'PNG') + self.assertEqual(ImageSlide(arg).dimensions, (300, 250)) + def test_operations_on_closed_handle(self): with Image.open(file_path('boxes.png')) as img: osr = ImageSlide(img) diff --git a/tests/test_openslide.py b/tests/test_openslide.py index ab134f33..b863fb97 100644 --- a/tests/test_openslide.py +++ b/tests/test_openslide.py @@ -1,7 +1,7 @@ # # openslide-python - Python bindings for the OpenSlide library # -# Copyright (c) 2016-2023 Benjamin Gilbert +# Copyright (c) 2016-2024 Benjamin Gilbert # # This library is free software; you can redistribute it and/or modify it # under the terms of version 2.1 of the GNU Lesser General Public License @@ -60,12 +60,27 @@ def test_open(self): self.assertRaises( OpenSlideUnsupportedFormatError, lambda: OpenSlide('setup.py') ) - self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(None)) - self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(3)) + self.assertRaises(ArgumentError, lambda: OpenSlide(None)) + self.assertRaises(ArgumentError, lambda: OpenSlide(3)) self.assertRaises( OpenSlideUnsupportedFormatError, lambda: OpenSlide('unopenable.tiff') ) + @unittest.skipUnless( + sys.getfilesystemencoding() == 'utf-8', + 'Python filesystem encoding is not UTF-8', + ) + def test_unicode_path(self): + path = file_path('😐.svs') + for arg in path, str(path): + self.assertEqual(OpenSlide.detect_format(arg), 'aperio') + self.assertEqual(OpenSlide(arg).dimensions, (16, 16)) + + def test_unicode_path_bytes(self): + arg = str(file_path('😐.svs')).encode('UTF-8') + self.assertEqual(OpenSlide.detect_format(arg), 'aperio') + self.assertEqual(OpenSlide(arg).dimensions, (16, 16)) + def test_operations_on_closed_handle(self): osr = OpenSlide(file_path('boxes.tiff')) props = osr.properties From cf628e0bbed9cd398870513c968e8170b5c9aaa6 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Sun, 20 Oct 2024 01:54:58 -0700 Subject: [PATCH 2/2] lowlevel: accept bytes in type hints for _utf8_p arguments The high-level API doesn't accept bytes for any of the affected functionality, but lowlevel does, so encode that in the type signatures. Signed-off-by: Benjamin Gilbert --- openslide/lowlevel.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/openslide/lowlevel.py b/openslide/lowlevel.py index 5efa5fa9..48963057 100644 --- a/openslide/lowlevel.py +++ b/openslide/lowlevel.py @@ -475,7 +475,7 @@ def read_icc_profile(slide: _OpenSlide) -> bytes | None: 'openslide_get_property_names', POINTER(c_char_p), [_OpenSlide], _check_name_list ) -get_property_value: _Func[[_OpenSlide, str], str] = _func( +get_property_value: _Func[[_OpenSlide, str | bytes], str] = _func( 'openslide_get_property_value', c_char_p, [_OpenSlide, _utf8_p] ) @@ -487,7 +487,7 @@ def read_icc_profile(slide: _OpenSlide) -> bytes | None: ) _get_associated_image_dimensions: _Func[ - [_OpenSlide, str, _Pointer[c_int64], _Pointer[c_int64]], None + [_OpenSlide, str | bytes, _Pointer[c_int64], _Pointer[c_int64]], None ] = _func( 'openslide_get_associated_image_dimensions', None, @@ -496,46 +496,54 @@ def read_icc_profile(slide: _OpenSlide) -> bytes | None: @_wraps_funcs([_get_associated_image_dimensions]) -def get_associated_image_dimensions(slide: _OpenSlide, name: str) -> tuple[int, int]: +def get_associated_image_dimensions( + slide: _OpenSlide, name: str | bytes +) -> tuple[int, int]: w, h = c_int64(), c_int64() _get_associated_image_dimensions(slide, name, byref(w), byref(h)) return w.value, h.value -_read_associated_image: _Func[[_OpenSlide, str, _Pointer[c_uint32]], None] = _func( - 'openslide_read_associated_image', None, [_OpenSlide, _utf8_p, POINTER(c_uint32)] +_read_associated_image: _Func[[_OpenSlide, str | bytes, _Pointer[c_uint32]], None] = ( + _func( + 'openslide_read_associated_image', + None, + [_OpenSlide, _utf8_p, POINTER(c_uint32)], + ) ) @_wraps_funcs([get_associated_image_dimensions, _read_associated_image]) -def read_associated_image(slide: _OpenSlide, name: str) -> Image.Image: +def read_associated_image(slide: _OpenSlide, name: str | bytes) -> Image.Image: w, h = get_associated_image_dimensions(slide, name) buf = (w * h * c_uint32)() _read_associated_image(slide, name, buf) return _load_image(buf, (w, h)) -get_associated_image_icc_profile_size: _Func[[_OpenSlide, str], int] = _func( +get_associated_image_icc_profile_size: _Func[[_OpenSlide, str | bytes], int] = _func( 'openslide_get_associated_image_icc_profile_size', c_int64, [_OpenSlide, _utf8_p], minimum_version='4.0.0', ) -_read_associated_image_icc_profile: _Func[[_OpenSlide, str, _Pointer[c_char]], None] = ( - _func( - 'openslide_read_associated_image_icc_profile', - None, - [_OpenSlide, _utf8_p, POINTER(c_char)], - minimum_version='4.0.0', - ) +_read_associated_image_icc_profile: _Func[ + [_OpenSlide, str | bytes, _Pointer[c_char]], None +] = _func( + 'openslide_read_associated_image_icc_profile', + None, + [_OpenSlide, _utf8_p, POINTER(c_char)], + minimum_version='4.0.0', ) @_wraps_funcs( [get_associated_image_icc_profile_size, _read_associated_image_icc_profile] ) -def read_associated_image_icc_profile(slide: _OpenSlide, name: str) -> bytes | None: +def read_associated_image_icc_profile( + slide: _OpenSlide, name: str | bytes +) -> bytes | None: size = get_associated_image_icc_profile_size(slide, name) if size == 0: return None