Skip to content

Commit 5d010e9

Browse files
authored
Use os.fsencode to encode bytes (#5662)
# Remove Python 2 compatibility code for string encoding/decoding This PR simplifies the codebase by removing Python 2 compatibility code related to string encoding and decoding operations. Key changes: - Remove custom `_convert_args`, `arg_encoding()`, `_fsencoding()` and `convert_command_args()` functions - Replace with standard library's `os.fsencode()` and `os.fsdecode()` for file system path handling - Simplify bytestring/string conversions throughout the codebase - Update test code to use modern string handling These changes reduce code complexity and maintenance burden since Python 2 support is no longer needed.
2 parents 43301c4 + 9acfa3c commit 5d010e9

File tree

14 files changed

+44
-143
lines changed

14 files changed

+44
-143
lines changed

beets/test/helper.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,9 @@ def capture_stdout():
117117
print(capture.getvalue())
118118

119119

120-
def _convert_args(args):
121-
"""Convert args to bytestrings for Python 2 and convert them to strings
122-
on Python 3.
123-
"""
124-
for i, elem in enumerate(args):
125-
if isinstance(elem, bytes):
126-
args[i] = elem.decode(util.arg_encoding())
127-
128-
return args
129-
130-
131120
def has_program(cmd, args=["--version"]):
132121
"""Returns `True` if `cmd` can be executed."""
133-
full_cmd = _convert_args([cmd] + args)
122+
full_cmd = [cmd] + args
134123
try:
135124
with open(os.devnull, "wb") as devnull:
136125
subprocess.check_call(
@@ -385,7 +374,7 @@ def run_command(self, *args, **kwargs):
385374
if hasattr(self, "lib"):
386375
lib = self.lib
387376
lib = kwargs.get("lib", lib)
388-
beets.ui._raw_main(_convert_args(list(args)), lib)
377+
beets.ui._raw_main(list(args), lib)
389378

390379
def run_with_output(self, *args):
391380
with capture_stdout() as out:

beets/ui/commands.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,13 +1359,8 @@ def import_func(lib, opts, args):
13591359
# what we need. On Python 3, we need to undo the "helpful"
13601360
# conversion to Unicode strings to get the real bytestring
13611361
# filename.
1362-
paths = [
1363-
p.encode(util.arg_encoding(), "surrogateescape") for p in paths
1364-
]
1365-
paths_from_logfiles = [
1366-
p.encode(util.arg_encoding(), "surrogateescape")
1367-
for p in paths_from_logfiles
1368-
]
1362+
paths = [os.fsencode(p) for p in paths]
1363+
paths_from_logfiles = [os.fsencode(p) for p in paths_from_logfiles]
13691364

13701365
# Check the user-specified directories.
13711366
for path in paths:

beets/util/__init__.py

Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -369,28 +369,6 @@ def components(path: AnyStr) -> list[AnyStr]:
369369
return comps
370370

371371

372-
def arg_encoding() -> str:
373-
"""Get the encoding for command-line arguments (and other OS
374-
locale-sensitive strings).
375-
"""
376-
return sys.getfilesystemencoding()
377-
378-
379-
def _fsencoding() -> str:
380-
"""Get the system's filesystem encoding. On Windows, this is always
381-
UTF-8 (not MBCS).
382-
"""
383-
encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
384-
if encoding == "mbcs":
385-
# On Windows, a broken encoding known to Python as "MBCS" is
386-
# used for the filesystem. However, we only use the Unicode API
387-
# for Windows paths, so the encoding is actually immaterial so
388-
# we can avoid dealing with this nastiness. We arbitrarily
389-
# choose UTF-8.
390-
encoding = "utf-8"
391-
return encoding
392-
393-
394372
def bytestring_path(path: PathLike) -> bytes:
395373
"""Given a path, which is either a bytes or a unicode, returns a str
396374
path (ensuring that we never deal with Unicode pathnames). Path should be
@@ -410,11 +388,7 @@ def bytestring_path(path: PathLike) -> bytes:
410388
):
411389
str_path = str_path[len(WINDOWS_MAGIC_PREFIX) :]
412390

413-
# Try to encode with default encodings, but fall back to utf-8.
414-
try:
415-
return str_path.encode(_fsencoding())
416-
except (UnicodeError, LookupError):
417-
return str_path.encode("utf-8")
391+
return os.fsencode(str_path)
418392

419393

420394
PATH_SEP: bytes = bytestring_path(os.sep)
@@ -436,10 +410,7 @@ def displayable_path(
436410
# A non-string object: just get its unicode representation.
437411
return str(path)
438412

439-
try:
440-
return path.decode(_fsencoding(), "ignore")
441-
except (UnicodeError, LookupError):
442-
return path.decode("utf-8", "ignore")
413+
return os.fsdecode(path)
443414

444415

445416
def syspath(path: PathLike, prefix: bool = True) -> str:
@@ -854,19 +825,6 @@ def plurality(objs: Sequence[T]) -> tuple[T, int]:
854825
return c.most_common(1)[0]
855826

856827

857-
def convert_command_args(args: list[BytesOrStr]) -> list[str]:
858-
"""Convert command arguments, which may either be `bytes` or `str`
859-
objects, to uniformly surrogate-escaped strings."""
860-
assert isinstance(args, list)
861-
862-
def convert(arg) -> str:
863-
if isinstance(arg, bytes):
864-
return os.fsdecode(arg)
865-
return arg
866-
867-
return [convert(a) for a in args]
868-
869-
870828
# stdout and stderr as bytes
871829
class CommandOutput(NamedTuple):
872830
stdout: bytes
@@ -891,7 +849,7 @@ def command_output(cmd: list[BytesOrStr], shell: bool = False) -> CommandOutput:
891849
This replaces `subprocess.check_output` which can have problems if lots of
892850
output is sent to stderr.
893851
"""
894-
converted_cmd = convert_command_args(cmd)
852+
converted_cmd = [os.fsdecode(a) for a in cmd]
895853

896854
devnull = subprocess.DEVNULL
897855

beetsplug/convert.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from beets import art, config, plugins, ui, util
2929
from beets.library import Item, parse_query_string
3030
from beets.plugins import BeetsPlugin
31-
from beets.util import arg_encoding, par_map
31+
from beets.util import par_map
3232
from beets.util.artresizer import ArtResizer
3333
from beets.util.m3u import M3UFile
3434

@@ -284,7 +284,7 @@ def encode(self, command, source, dest, pretend=False):
284284
if not quiet and not pretend:
285285
self._log.info("Encoding {0}", util.displayable_path(source))
286286

287-
command = command.decode(arg_encoding(), "surrogateescape")
287+
command = os.fsdecode(command)
288288
source = os.fsdecode(source)
289289
dest = os.fsdecode(dest)
290290

@@ -298,7 +298,7 @@ def encode(self, command, source, dest, pretend=False):
298298
"dest": dest,
299299
}
300300
)
301-
encode_cmd.append(args[i].encode(util.arg_encoding()))
301+
encode_cmd.append(os.fsdecode(args[i]))
302302

303303
if pretend:
304304
self._log.info("{0}", " ".join(ui.decargs(args)))

beetsplug/hook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
import shlex
1818
import string
1919
import subprocess
20+
import sys
2021

2122
from beets.plugins import BeetsPlugin
22-
from beets.util import arg_encoding
2323

2424

2525
class CodingFormatter(string.Formatter):
@@ -73,7 +73,7 @@ def hook_function(**kwargs):
7373

7474
# For backwards compatibility, use a string formatter that decodes
7575
# bytes (in particular, paths) to unicode strings.
76-
formatter = CodingFormatter(arg_encoding())
76+
formatter = CodingFormatter(sys.getfilesystemencoding())
7777
command_pieces = [
7878
formatter.format(piece, event=event, **kwargs)
7979
for piece in shlex.split(command)

beetsplug/ipfs.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
from beets import config, library, ui, util
2222
from beets.plugins import BeetsPlugin
23-
from beets.util import syspath
2423

2524

2625
class IPFSPlugin(BeetsPlugin):
@@ -193,7 +192,7 @@ def ipfs_get_from_hash(self, lib, _hash):
193192
# This uses a relative path, hence we cannot use util.syspath(_hash,
194193
# prefix=True). However, that should be fine since the hash will not
195194
# exceed MAX_PATH.
196-
shutil.rmtree(syspath(_hash, prefix=False))
195+
shutil.rmtree(util.syspath(_hash, prefix=False))
197196

198197
def ipfs_publish(self, lib):
199198
with tempfile.NamedTemporaryFile() as tmp:
@@ -299,9 +298,7 @@ def create_new_album(self, album, tmplib):
299298
break
300299
except AttributeError:
301300
pass
302-
item_path = os.path.basename(item.path).decode(
303-
util._fsencoding(), "ignore"
304-
)
301+
item_path = os.fsdecode(os.path.basename(item.path))
305302
# Clear current path from item
306303
item.path = f"/ipfs/{album.ipfs}/{item_path}"
307304

beetsplug/thumbnails.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
from xdg import BaseDirectory
2929

30-
from beets import util
3130
from beets.plugins import BeetsPlugin
3231
from beets.ui import Subcommand, decargs
3332
from beets.util import bytestring_path, displayable_path, syspath
@@ -288,6 +287,6 @@ def uri(self, path):
288287
self.libgio.g_free(uri_ptr)
289288

290289
try:
291-
return uri.decode(util._fsencoding())
290+
return os.fsdecode(uri)
292291
except UnicodeDecodeError:
293292
raise RuntimeError(f"Could not decode filename from GIO: {uri!r}")

beetsplug/web/__init__.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,8 @@ def item_file(item_id):
315315
item_path = os.fsdecode(item.path)
316316

317317
base_filename = os.path.basename(item_path)
318-
# FIXME: Arguably, this should just use `displayable_path`: The latter
319-
# tries `_fsencoding()` first, but then falls back to `utf-8`, too.
320318
if isinstance(base_filename, bytes):
321-
try:
322-
unicode_base_filename = base_filename.decode("utf-8")
323-
except UnicodeError:
324-
unicode_base_filename = util.displayable_path(base_filename)
319+
unicode_base_filename = util.displayable_path(base_filename)
325320
else:
326321
unicode_base_filename = base_filename
327322

test/plugins/test_convert.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,13 @@ class ConvertCommand:
143143
in tests.
144144
"""
145145

146-
def run_convert_path(self, path, *args):
146+
def run_convert_path(self, item, *args):
147147
"""Run the `convert` command on a given path."""
148-
# The path is currently a filesystem bytestring. Convert it to
149-
# an argument bytestring.
150-
path = path.decode(util._fsencoding()).encode(util.arg_encoding())
151-
152-
args = args + (b"path:" + path,)
153-
return self.run_command("convert", *args)
148+
return self.run_command("convert", *args, f"path:{item.filepath}")
154149

155150
def run_convert(self, *args):
156151
"""Run the `convert` command on `self.item`."""
157-
return self.run_convert_path(self.item.path, *args)
152+
return self.run_convert_path(self.item, *args)
158153

159154

160155
@_common.slow_test()
@@ -320,22 +315,22 @@ def setUp(self):
320315
def test_transcode_from_lossless(self):
321316
[item] = self.add_item_fixtures(ext="flac")
322317
with control_stdin("y"):
323-
self.run_convert_path(item.path)
318+
self.run_convert_path(item)
324319
converted = os.path.join(self.convert_dest, b"converted.mp3")
325320
self.assertFileTag(converted, "mp3")
326321

327322
def test_transcode_from_lossy(self):
328323
self.config["convert"]["never_convert_lossy_files"] = False
329324
[item] = self.add_item_fixtures(ext="ogg")
330325
with control_stdin("y"):
331-
self.run_convert_path(item.path)
326+
self.run_convert_path(item)
332327
converted = os.path.join(self.convert_dest, b"converted.mp3")
333328
self.assertFileTag(converted, "mp3")
334329

335330
def test_transcode_from_lossy_prevented(self):
336331
[item] = self.add_item_fixtures(ext="ogg")
337332
with control_stdin("y"):
338-
self.run_convert_path(item.path)
333+
self.run_convert_path(item)
339334
converted = os.path.join(self.convert_dest, b"converted.ogg")
340335
self.assertNoFileTag(converted, "mp3")
341336

test/plugins/test_ipfs.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from beets.test import _common
1919
from beets.test.helper import PluginTestCase
20-
from beets.util import _fsencoding, bytestring_path
20+
from beets.util import bytestring_path
2121
from beetsplug.ipfs import IPFSPlugin
2222

2323

@@ -36,9 +36,7 @@ def test_stored_hashes(self):
3636
for check_item in added_album.items():
3737
try:
3838
if check_item.get("ipfs", with_album=False):
39-
ipfs_item = os.path.basename(want_item.path).decode(
40-
_fsencoding(),
41-
)
39+
ipfs_item = os.fsdecode(os.path.basename(want_item.path))
4240
want_path = "/ipfs/{}/{}".format(test_album.ipfs, ipfs_item)
4341
want_path = bytestring_path(want_path)
4442
assert check_item.path == want_path

0 commit comments

Comments
 (0)