Skip to content

Commit 9756a5d

Browse files
authored
Fix _legalize_path types and clean up path legalisation logic (#5224)
**Refactor: Simplify Path Generation and Legalization** This PR refactors the way destination paths for library items are generated and made filesystem-safe. The goal is to simplify the process, make it more robust, and centralize most of the path manipulation logic. **Key Changes:** * **`Item.destination` Simplified:** * The method now has a clearer interface, primarily controlled by the `relative_to_libdir` flag (replacing the old `fragment` flag). * It consistently returns the path as `bytes`, ready for filesystem operations. * Path legalization logic (sanitization, truncation, replacements) is now delegated to `util.legalize_path`. * **`util.legalize_path` Enhanced:** * Takes responsibility for the full legalization process, including truncation based on filesystem limits. * Uses new helper functions (`util.truncate_path`, `util.truncate_str`) for robust truncation of path components while preserving extensions and handling multi-byte characters correctly. * Includes logic to handle potential conflicts where sanitization/replacements might interfere with truncation, falling back to default rules if necessary. * **Centralized Max Length:** * A new cached function `util.get_max_filename_length` determines the maximum filename length, checking the config first and then querying the filesystem. This refactoring leads to cleaner code by separating concerns: `Item.destination` focuses on generating the *intended* path based on metadata and formats, while `util.legalize_path` and its helpers handle the complexities of making that path valid for the target filesystem.
2 parents ecdff78 + 921b7ed commit 9756a5d

File tree

8 files changed

+147
-201
lines changed

8 files changed

+147
-201
lines changed

beets/library.py

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,26 +1075,19 @@ def move(
10751075

10761076
def destination(
10771077
self,
1078-
fragment=False,
1078+
relative_to_libdir=False,
10791079
basedir=None,
1080-
platform=None,
10811080
path_formats=None,
1082-
replacements=None,
1083-
):
1084-
"""Return the path in the library directory designated for the
1085-
item (i.e., where the file ought to be).
1081+
) -> bytes:
1082+
"""Return the path in the library directory designated for the item
1083+
(i.e., where the file ought to be).
10861084
1087-
fragment makes this method return just the path fragment underneath
1088-
the root library directory; the path is also returned as Unicode
1089-
instead of encoded as a bytestring. basedir can override the library's
1090-
base directory for the destination.
1085+
The path is returned as a bytestring. ``basedir`` can override the
1086+
library's base directory for the destination.
10911087
"""
10921088
db = self._check_db()
1093-
platform = platform or sys.platform
10941089
basedir = basedir or db.directory
10951090
path_formats = path_formats or db.path_formats
1096-
if replacements is None:
1097-
replacements = self._db.replacements
10981091

10991092
# Use a path format based on a query, falling back on the
11001093
# default.
@@ -1122,7 +1115,7 @@ def destination(
11221115
subpath = self.evaluate_template(subpath_tmpl, True)
11231116

11241117
# Prepare path for output: normalize Unicode characters.
1125-
if platform == "darwin":
1118+
if sys.platform == "darwin":
11261119
subpath = unicodedata.normalize("NFD", subpath)
11271120
else:
11281121
subpath = unicodedata.normalize("NFC", subpath)
@@ -1132,19 +1125,10 @@ def destination(
11321125
subpath, beets.config["path_sep_replace"].as_str()
11331126
)
11341127

1135-
maxlen = beets.config["max_filename_length"].get(int)
1136-
if not maxlen:
1137-
# When zero, try to determine from filesystem.
1138-
maxlen = util.max_filename_length(db.directory)
1139-
1140-
subpath, fellback = util.legalize_path(
1141-
subpath,
1142-
replacements,
1143-
maxlen,
1144-
os.path.splitext(self.path)[1],
1145-
fragment,
1128+
lib_path_str, fallback = util.legalize_path(
1129+
subpath, db.replacements, os.path.splitext(self.path)[1]
11461130
)
1147-
if fellback:
1131+
if fallback:
11481132
# Print an error message if legalization fell back to
11491133
# default replacements because of the maximum length.
11501134
log.warning(
@@ -1153,11 +1137,12 @@ def destination(
11531137
"the filename.",
11541138
subpath,
11551139
)
1140+
lib_path_bytes = util.bytestring_path(lib_path_str)
11561141

1157-
if fragment:
1158-
return util.as_string(subpath)
1159-
else:
1160-
return normpath(os.path.join(basedir, subpath))
1142+
if relative_to_libdir:
1143+
return lib_path_bytes
1144+
1145+
return normpath(os.path.join(basedir, lib_path_bytes))
11611146

11621147

11631148
class Album(LibModel):

beets/util/__init__.py

Lines changed: 62 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from collections import Counter
3131
from contextlib import suppress
3232
from enum import Enum
33+
from functools import cache
3334
from importlib import import_module
3435
from multiprocessing.pool import ThreadPool
3536
from pathlib import Path
@@ -47,6 +48,7 @@
4748

4849
from unidecode import unidecode
4950

51+
import beets
5052
from beets.util import hidden
5153

5254
if TYPE_CHECKING:
@@ -694,105 +696,87 @@ def sanitize_path(path: str, replacements: Replacements | None = None) -> str:
694696
return os.path.join(*comps)
695697

696698

697-
def truncate_path(path: AnyStr, length: int = MAX_FILENAME_LENGTH) -> AnyStr:
698-
"""Given a bytestring path or a Unicode path fragment, truncate the
699-
components to a legal length. In the last component, the extension
700-
is preserved.
699+
def truncate_str(s: str, length: int) -> str:
700+
"""Truncate the string to the given byte length.
701+
702+
If we end up truncating a unicode character in the middle (rendering it invalid),
703+
it is removed:
704+
705+
>>> s = "🎹🎶" # 8 bytes
706+
>>> truncate_str(s, 6)
707+
'🎹'
701708
"""
702-
comps = components(path)
709+
return os.fsencode(s)[:length].decode(sys.getfilesystemencoding(), "ignore")
703710

704-
out = [c[:length] for c in comps]
705-
base, ext = os.path.splitext(comps[-1])
706-
if ext:
707-
# Last component has an extension.
708-
base = base[: length - len(ext)]
709-
out[-1] = base + ext
710711

711-
return os.path.join(*out)
712+
def truncate_path(str_path: str) -> str:
713+
"""Truncate each path part to a legal length preserving the extension."""
714+
max_length = get_max_filename_length()
715+
path = Path(str_path)
716+
parent_parts = [truncate_str(p, max_length) for p in path.parts[:-1]]
717+
stem = truncate_str(path.stem, max_length - len(path.suffix))
718+
return str(Path(*parent_parts, stem).with_suffix(path.suffix))
712719

713720

714721
def _legalize_stage(
715-
path: str,
716-
replacements: Replacements | None,
717-
length: int,
718-
extension: str,
719-
fragment: bool,
720-
) -> tuple[BytesOrStr, bool]:
722+
path: str, replacements: Replacements | None, extension: str
723+
) -> tuple[str, bool]:
721724
"""Perform a single round of path legalization steps
722-
(sanitation/replacement, encoding from Unicode to bytes,
723-
extension-appending, and truncation). Return the path (Unicode if
724-
`fragment` is set, `bytes` otherwise) and whether truncation was
725-
required.
725+
1. sanitation/replacement
726+
2. appending the extension
727+
3. truncation.
728+
729+
Return the path and whether truncation was required.
726730
"""
727731
# Perform an initial sanitization including user replacements.
728732
path = sanitize_path(path, replacements)
729733

730-
# Encode for the filesystem.
731-
if not fragment:
732-
path = bytestring_path(path) # type: ignore
733-
734734
# Preserve extension.
735735
path += extension.lower()
736736

737737
# Truncate too-long components.
738738
pre_truncate_path = path
739-
path = truncate_path(path, length)
739+
path = truncate_path(path)
740740

741741
return path, path != pre_truncate_path
742742

743743

744744
def legalize_path(
745-
path: str,
746-
replacements: Replacements | None,
747-
length: int,
748-
extension: bytes,
749-
fragment: bool,
750-
) -> tuple[BytesOrStr, bool]:
751-
"""Given a path-like Unicode string, produce a legal path. Return
752-
the path and a flag indicating whether some replacements had to be
753-
ignored (see below).
754-
755-
The legalization process (see `_legalize_stage`) consists of
756-
applying the sanitation rules in `replacements`, encoding the string
757-
to bytes (unless `fragment` is set), truncating components to
758-
`length`, appending the `extension`.
759-
760-
This function performs up to three calls to `_legalize_stage` in
761-
case truncation conflicts with replacements (as can happen when
762-
truncation creates whitespace at the end of the string, for
763-
example). The limited number of iterations iterations avoids the
764-
possibility of an infinite loop of sanitation and truncation
765-
operations, which could be caused by replacement rules that make the
766-
string longer. The flag returned from this function indicates that
767-
the path has to be truncated twice (indicating that replacements
768-
made the string longer again after it was truncated); the
769-
application should probably log some sort of warning.
745+
path: str, replacements: Replacements | None, extension: str
746+
) -> tuple[str, bool]:
747+
"""Given a path-like Unicode string, produce a legal path. Return the path
748+
and a flag indicating whether some replacements had to be ignored (see
749+
below).
750+
751+
This function uses `_legalize_stage` function to legalize the path, see its
752+
documentation for the details of what this involves. It is called up to
753+
three times in case truncation conflicts with replacements (as can happen
754+
when truncation creates whitespace at the end of the string, for example).
755+
756+
The limited number of iterations avoids the possibility of an infinite loop
757+
of sanitation and truncation operations, which could be caused by
758+
replacement rules that make the string longer.
759+
760+
The flag returned from this function indicates that the path has to be
761+
truncated twice (indicating that replacements made the string longer again
762+
after it was truncated); the application should probably log some sort of
763+
warning.
770764
"""
765+
suffix = as_string(extension)
771766

772-
if fragment:
773-
# Outputting Unicode.
774-
extension = extension.decode("utf-8", "ignore")
775-
776-
first_stage_path, _ = _legalize_stage(
777-
path, replacements, length, extension, fragment
767+
first_stage, _ = os.path.splitext(
768+
_legalize_stage(path, replacements, suffix)[0]
778769
)
779770

780-
# Convert back to Unicode with extension removed.
781-
first_stage_path, _ = os.path.splitext(displayable_path(first_stage_path))
782-
783771
# Re-sanitize following truncation (including user replacements).
784-
second_stage_path, retruncated = _legalize_stage(
785-
first_stage_path, replacements, length, extension, fragment
786-
)
772+
second_stage, truncated = _legalize_stage(first_stage, replacements, suffix)
787773

788-
# If the path was once again truncated, discard user replacements
789-
# and run through one last legalization stage.
790-
if retruncated:
791-
second_stage_path, _ = _legalize_stage(
792-
first_stage_path, None, length, extension, fragment
793-
)
774+
if not truncated:
775+
return second_stage, False
794776

795-
return second_stage_path, retruncated
777+
# If the path was truncated, discard user replacements
778+
# and run through one last legalization stage.
779+
return _legalize_stage(first_stage, None, suffix)[0], True
796780

797781

798782
def str2bool(value: str) -> bool:
@@ -871,16 +855,21 @@ def command_output(cmd: list[BytesOrStr], shell: bool = False) -> CommandOutput:
871855
return CommandOutput(stdout, stderr)
872856

873857

874-
def max_filename_length(path: BytesOrStr, limit=MAX_FILENAME_LENGTH) -> int:
858+
@cache
859+
def get_max_filename_length() -> int:
875860
"""Attempt to determine the maximum filename length for the
876861
filesystem containing `path`. If the value is greater than `limit`,
877862
then `limit` is used instead (to prevent errors when a filesystem
878863
misreports its capacity). If it cannot be determined (e.g., on
879864
Windows), return `limit`.
880865
"""
866+
if length := beets.config["max_filename_length"].get(int):
867+
return length
868+
869+
limit = MAX_FILENAME_LENGTH
881870
if hasattr(os, "statvfs"):
882871
try:
883-
res = os.statvfs(path)
872+
res = os.statvfs(beets.config["directory"].as_str())
884873
except OSError:
885874
return limit
886875
return min(res[9], limit)

beets/vfs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def libtree(lib):
4949
"""
5050
root = Node({}, {})
5151
for item in lib.items():
52-
dest = item.destination(fragment=True)
53-
parts = util.components(dest)
52+
dest = item.destination(relative_to_libdir=True)
53+
parts = util.components(util.as_string(dest))
5454
_insert(root, parts, item.id)
5555
return root

beetsplug/bpd/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from beets import dbcore, vfs
3434
from beets.library import Item
3535
from beets.plugins import BeetsPlugin
36-
from beets.util import bluelet
36+
from beets.util import as_string, bluelet
3737

3838
if TYPE_CHECKING:
3939
from beets.dbcore.query import Query
@@ -1130,7 +1130,7 @@ def play_finished(self):
11301130

11311131
def _item_info(self, item):
11321132
info_lines = [
1133-
"file: " + item.destination(fragment=True),
1133+
"file: " + as_string(item.destination(relative_to_libdir=True)),
11341134
"Time: " + str(int(item.length)),
11351135
"duration: " + f"{item.length:.3f}",
11361136
"Id: " + str(item.id),

beetsplug/convert.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -624,13 +624,7 @@ def convert_func(self, lib, opts, args):
624624
# strings we get from item.destination to bytes.
625625
items_paths = [
626626
os.path.relpath(
627-
util.bytestring_path(
628-
item.destination(
629-
basedir=dest,
630-
path_formats=path_formats,
631-
fragment=False,
632-
)
633-
),
627+
item.destination(basedir=dest, path_formats=path_formats),
634628
pl_dir,
635629
)
636630
for item in items

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ select = [
263263
]
264264
[tool.ruff.lint.per-file-ignores]
265265
"beets/**" = ["PT"]
266+
"test/test_util.py" = ["E501"]
266267

267268
[tool.ruff.lint.isort]
268269
split-on-trailing-comma = false

0 commit comments

Comments
 (0)