@@ -696,22 +696,26 @@ def sanitize_path(path: str, replacements: Replacements | None = None) -> str:
696
696
return os .path .join (* comps )
697
697
698
698
699
- def truncate_path (path : AnyStr ) -> AnyStr :
700
- """Given a bytestring path or a Unicode path fragment, truncate the
701
- components to a legal length. In the last component, the extension
702
- is preserved.
699
+ def truncate_str (s : str , length : int ) -> str :
700
+ """Truncate the string to the given byte length.
701
+
702
+ If we end up truncating a unicode character in the middle (rendering it invalid),
703
+ it is removed:
704
+
705
+ >>> s = "🎹🎶" # 8 bytes
706
+ >>> truncate_str(s, 6)
707
+ '🎹'
703
708
"""
704
- max_length = get_max_filename_length ()
705
- comps = components (path )
709
+ return os .fsencode (s )[:length ].decode (sys .getfilesystemencoding (), "ignore" )
706
710
707
- out = [c [:length ] for c in comps ]
708
- base , ext = os .path .splitext (comps [- 1 ])
709
- if ext :
710
- # Last component has an extension.
711
- base = base [: max_length - len (ext )]
712
- out [- 1 ] = base + ext
713
711
714
- return os .path .join (* out )
712
+ def truncate_path (str_path : str ) -> str :
713
+ """Truncate each path part to a legal length preserving the extension."""
714
+ max_length = get_max_filename_length ()
715
+ path = Path (str_path )
716
+ parent_parts = [truncate_str (p , max_length ) for p in path .parts [:- 1 ]]
717
+ stem = truncate_str (path .stem , max_length - len (path .suffix ))
718
+ return str (Path (* parent_parts , stem ).with_suffix (path .suffix ))
715
719
716
720
717
721
def _legalize_stage (
0 commit comments