1- from os . path import dirname , join
1+ import unicodedata , re
22from os import rename
3+ from os .path import dirname , join
4+ from os .path import splitext
35from .scantree import ScanTree
4- import unicodedata
56
67
7- def text_to_ascii (text : str ):
8+ def asciify (text : str ):
89 """
910 Converts a Unicode string to its closest ASCII equivalent by removing
1011 accent marks and other non-ASCII characters.
1112 """
1213 return "" .join (c for c in unicodedata .normalize ("NFD" , text ) if unicodedata .category (c ) != "Mn" )
1314
1415
16+ def slugify (value ):
17+ value = str (value )
18+ value = asciify (value )
19+ value = re .sub (r"[^a-zA-Z0-9_.+-]+" , "_" , value )
20+ return value
21+
22+
23+ def clean (value ):
24+ value = str (value )
25+ value = re .sub (r"\-+" , "-" , value ).strip ("-" )
26+ value = re .sub (r"_+" , "_" , value ).strip ("_" )
27+ return value
28+
29+
30+ def urlsafe (name , parent = None ):
31+ s = slugify (name )
32+ if s != name or re .search (r"[_-]\." , s ) or re .search (r"[_-]+" , s ):
33+ assert slugify (s ) == s
34+ stem , ext = splitext (s )
35+ return clean (stem ) + ext
36+ return name
37+
38+
1539def split_subs (s : str ):
1640 a = s [1 :].split (s [0 ], 3 )
1741 if len (a ) > 1 :
1842 search = a [0 ]
1943 replace = a [1 ]
44+ extra = {}
2045 if not search :
2146 raise RuntimeError (f"Empty search pattern { s !r} " )
2247 if len (a ) > 2 :
23- flags = a [2 ]
48+ flags = None
49+ for x in a [2 :]:
50+ if x in [
51+ "upper" ,
52+ "lower" ,
53+ "title" ,
54+ "swapcase" ,
55+ "expandtabs" ,
56+ "casefold" ,
57+ "capitalize" ,
58+ "asciify" ,
59+ "slugify" ,
60+ "urlsafe" ,
61+ "ext" ,
62+ "stem" ,
63+ ]:
64+ if x not in ["ext" , "stem" ]:
65+ assert not replace
66+ pass
67+ extra [x ] = True
68+ else :
69+ flags = x
2470 if flags :
2571 search = f"(?{ flags } ){ search } "
26- return search , replace , {}
72+ return search , replace , extra
2773 raise RuntimeError (f"Invalid pattern { s !r} " )
2874
2975
@@ -61,37 +107,65 @@ def start(self):
61107 _subs .append ((lambda name , parent : name .upper ()))
62108
63109 if self .urlsafe :
64- from os .path import splitext
65-
66- def slugify (value ):
67- value = str (value )
68- value = text_to_ascii (value )
69- value = re .sub (r"[^a-zA-Z0-9_.+-]+" , "_" , value )
70- return value
71-
72- def clean (value ):
73- value = str (value )
74- value = re .sub (r"\-+" , "-" , value ).strip ("-" )
75- value = re .sub (r"_+" , "_" , value ).strip ("_" )
76- return value
77-
78- def urlsafe (name , parent ):
79- s = slugify (name )
80- if s != name or re .search (r"[_-]\." , s ) or re .search (r"[_-]+" , s ):
81- assert slugify (s ) == s
82- stem , ext = splitext (s )
83- return clean (stem ) + ext
84- return name
85-
86110 _subs .append (urlsafe )
87111
88- def _append (rex , rep , extra ):
112+ def _append (rex , rep : str , extra ):
113+ if extra :
114+
115+ def fn (name : str , parent ):
116+ if extra .get ("stem" ):
117+ S , x = splitext (name )
118+ fin = lambda r : r + x
119+ elif extra .get ("ext" ):
120+ x , S = splitext (name )
121+ fin = lambda r : x + r
122+ else :
123+ S = name
124+ fin = lambda r : r
125+
126+ # def fr():
127+ # return rex.sub(rep, S)
128+
129+ if extra .get ("lower" ):
130+ R = lambda m : m .group (0 ).lower ()
131+ elif extra .get ("upper" ):
132+ R = lambda m : m .group (0 ).upper ()
133+ elif extra .get ("title" ):
134+ R = lambda m : m .group (0 ).title ()
135+ elif extra .get ("swapcase" ):
136+ R = lambda m : m .group (0 ).swapcase ()
137+ elif extra .get ("casefold" ):
138+ R = lambda m : m .group (0 ).casefold ()
139+ elif extra .get ("capitalize" ):
140+ R = lambda m : m .group (0 ).capitalize ()
141+ elif extra .get ("asciify" ):
142+ R = lambda m : asciify (m .group (0 ))
143+ elif extra .get ("urlsafe" ):
144+ R = lambda m : urlsafe (m .group (0 ))
145+ elif extra .get ("slugify" ):
146+ R = lambda m : urlsafe (m .group (0 ))
147+ else :
148+ R = rep
149+ # return fin(fx(fr()))
150+
151+ return fin (rex .sub (R , S ))
152+
153+ else :
154+
155+ def fn (name , parent ):
156+ return rex .sub (rep , name )
157+
158+ fn .regx = rex
159+
89160 # print("REX", rex, rep)
90- _subs .append (( lambda name , parent : rex . sub ( rep , name )) )
161+ _subs .append (fn )
91162
92163 for s in self .subs :
93164 search , replace , extra = split_subs (s )
94- rex = regex (search )
165+ try :
166+ rex = regex (search )
167+ except Exception as e :
168+ raise RuntimeError (f"Bad regexp { search !r} : { e } " )
95169 _append (rex , replace , extra )
96170
97171 self ._subs = _subs
@@ -107,6 +181,7 @@ def process_entry(self, de):
107181 for fn in self ._subs :
108182 v = fn (name2 , parent )
109183 # print("PE_subs", de.path, name2, v)
184+ # print("fn", getattr(fn, "regx", "?"))
110185 if v :
111186 name2 = v
112187 # print("PE", de.path, [name1, name2])
0 commit comments