File tree Expand file tree Collapse file tree 1 file changed +11
-10
lines changed Expand file tree Collapse file tree 1 file changed +11
-10
lines changed Original file line number Diff line number Diff line change @@ -1265,25 +1265,26 @@ def proofread_canonicals(
12651265 purge (http , * paths_to_purge )
12661266
12671267
1268+ # Python 3.12 onwards doesn't use self-closing tags for <link rel="canonical">
1269+ _canonical_re = re .compile (
1270+ b"""<link rel="canonical" href="https://docs.python.org/([^"]*)"(?: /)?>"""
1271+ )
1272+
1273+
12681274def _check_canonical_rel (file : Path , www_root : Path ):
12691275 # Check for a canonical relation link in the HTML.
12701276 # If one exists, ensure that the target exists
12711277 # or otherwise remove the canonical link element.
1272- prefix = b'<link rel="canonical" href="https://docs.python.org/'
1273- suffix = b'" />'
1274- pfx_len = len (prefix )
1275- sfx_len = len (suffix )
12761278 html = file .read_bytes ()
1277- try :
1278- start = html .index (prefix )
1279- end = html .index (suffix , start + pfx_len )
1280- except ValueError :
1279+ canonical = _canonical_re .search (html )
1280+ if canonical is None :
12811281 return None
1282- target = html [ start + pfx_len : end ].decode (errors = "surrogateescape" )
1282+ target = canonical [ 1 ].decode (encoding = "UTF-8" , errors = "surrogateescape" )
12831283 if (www_root / target ).exists ():
12841284 return None
12851285 logging .info ("Removing broken canonical from %s to %s" , file , target )
1286- file .write_bytes (html [:start ] + html [end + sfx_len :])
1286+ start , end = canonical .span ()
1287+ file .write_bytes (html [:start ] + html [end :])
12871288 return file
12881289
12891290
You can’t perform that action at this time.
0 commit comments