1
1
#!/usr/bin/env python3
2
2
#
3
3
# This a link checker for Kubernetes documentation website.
4
- # - We cover the following cases for the language you provide via `-l`, which
5
- # defaults to 'en'.
6
- # - If the language specified is not English (`en`), we check if you are
7
- # actually using the localized links. For example, if you specify `zh` as
8
- # the language, and for link target `/docs/foo/bar`, we check if the English
9
- # version exists AND if the Chinese version exists as well. A checking record
10
- # is produced if the link can use the localized version.
4
+ #
5
+ # If the language to check is not English (`en`), we check if you are actually
6
+ # using the localized links. For example, if you checking
7
+ # `content/zh/docs/foo/bar`, we check if the English version exists AND if the
8
+ # Chinese version exists as well. A checking record is produced if the link
9
+ # can use the localized version.
11
10
#
12
11
# Usage: linkchecker.py -h
13
12
#
64
63
C_RED = "\033 [31m"
65
64
C_GREEN = "\033 [32m"
66
65
C_YELLOW = "\033 [33m"
67
- C_GRAY = "\033 [90m"
66
+ C_GRAY = "\033 [90m"
68
67
C_CYAN = "\033 [36m"
69
68
C_END = "\033 [0m"
70
69
71
70
# Command line arguments shared across functions
72
71
ARGS = None
72
+ # Command line parser
73
+ PARSER = None
74
+ # Language as parsed from the file path
75
+ LANG = None
73
76
# Global result dictionary keyed by page examined
74
77
RESULT = {}
75
78
# Cached redirect entries
76
79
REDIRECTS = {}
77
80
# Cached anchors in target pages
78
81
ANCHORS = {}
79
82
83
+
80
84
def new_record (level , message , target ):
81
85
"""Create new checking record.
82
86
@@ -89,7 +93,7 @@ def new_record(level, message, target):
89
93
global ARGS
90
94
91
95
# Skip info when verbose
92
- if ARGS .verbose == False and level == "INFO" :
96
+ if ARGS .verbose is False and level == "INFO" :
93
97
return None
94
98
95
99
result = None
@@ -98,9 +102,9 @@ def new_record(level, message, target):
98
102
else :
99
103
target = C_GRAY + target + C_END
100
104
if level == "INFO" :
101
- result = target + ": " + C_GREEN + message + C_END
105
+ result = target + ": " + C_GREEN + message + C_END
102
106
elif level == "WARNING" :
103
- result = target + ": " + C_YELLOW + message + C_END
107
+ result = target + ": " + C_YELLOW + message + C_END
104
108
else : # default to error
105
109
result = target + ": " + C_RED + message + C_END
106
110
@@ -286,7 +290,7 @@ def check_target(page, anchor, target):
286
290
287
291
# link to English or localized page
288
292
if (target .startswith ("/docs/" ) or
289
- target .startswith ("/" + ARGS . lang + "/docs/" )):
293
+ target .startswith ("/" + LANG + "/docs/" )):
290
294
291
295
# target is shared reference (kubectl or kubernetes-api?
292
296
if (target .find ("/docs/reference/generated/kubectl/" ) >= 0 or
@@ -305,22 +309,22 @@ def check_target(page, anchor, target):
305
309
if ok :
306
310
# We do't do additional checks for English site even if it has
307
311
# links to a non-English page
308
- if ARGS . lang == "en" :
312
+ if LANG == "en" :
309
313
return None
310
314
311
315
# If we are already checking localized link, fine
312
- if target .startswith ("/" + ARGS . lang + "/docs/" ):
316
+ if target .startswith ("/" + LANG + "/docs/" ):
313
317
return None
314
318
315
319
# additional check for localization even if English target exists
316
- base = os .path .join (ROOT , "content" , ARGS . lang )
320
+ base = os .path .join (ROOT , "content" , LANG )
317
321
found = check_file_exists (base , target )
318
322
if not found :
319
323
# Still to be translated
320
324
return None
321
325
msg = ("Localized page detected, please append '/%s' to the target"
322
- % ARGS . lang )
323
- return new_record ("ERROR" , "Link not using localized page" , target )
326
+ % LANG )
327
+ return new_record ("ERROR" , msg , target )
324
328
325
329
# taget might be a redirect entry
326
330
real_target = get_redirect (target )
@@ -333,15 +337,16 @@ def check_target(page, anchor, target):
333
337
msg = "Link may be wrong for the anchor [%s]" % anchor
334
338
return new_record ("WARNING" , msg , target )
335
339
336
- def check_anchor (target_page , anchor ):
340
+
341
+ def check_anchor (target , anchor ):
337
342
"""Check if an anchor is defined in the target page
338
343
339
- :param target_page : The target page to check
344
+ :param target : The target page to check
340
345
:param anchor: Anchor string to find in the target page
341
346
"""
342
- if target_page not in ANCHORS :
347
+ if target not in ANCHORS :
343
348
try :
344
- with open (target_page , "r" ) as f :
349
+ with open (target , "r" ) as f :
345
350
data = f .readlines ()
346
351
except Exception as ex :
347
352
print ("[Error] failed in reading markdown file: " + str (ex ))
@@ -351,16 +356,18 @@ def check_anchor(target_page, anchor):
351
356
regex1 = re .compile (anchor_pattern1 )
352
357
anchor_pattern2 = r"{#(.*?)}"
353
358
regex2 = re .compile (anchor_pattern2 )
354
- ANCHORS [target_page ] = regex1 .findall (content ) + regex2 .findall (content )
355
- return anchor in ANCHORS [target_page ]
359
+ ANCHORS [target ] = regex1 .findall (content ) + regex2 .findall (content )
360
+ return anchor in ANCHORS [target ]
361
+
356
362
357
363
def check_apiref_target (target , anchor ):
358
364
"""Check a link to an API reference page.
359
365
360
366
:param target: The link target string to check
361
367
:param anchor: Anchor string from the content page
362
368
"""
363
- base = os .path .join (ROOT , "content" , "en" , "docs" , "reference" , "kubernetes-api" )
369
+ base = os .path .join (ROOT , "content" , "en" , "docs" , "reference" ,
370
+ "kubernetes-api" )
364
371
ok = check_file_exists (base + "/" , target )
365
372
if not ok :
366
373
return new_record ("ERROR" , "API reference page not found" , target )
@@ -370,7 +377,9 @@ def check_apiref_target(target, anchor):
370
377
371
378
target_page = os .path .join (base , target )+ ".md"
372
379
if not check_anchor (target_page , anchor ):
373
- return new_record ("ERROR" , "Anchor not found in API reference page" , target + "#" + anchor )
380
+ return new_record ("ERROR" , "Anchor not found in API reference page" ,
381
+ target + "#" + anchor )
382
+
374
383
375
384
def validate_links (page ):
376
385
"""Find and validate links on a content page.
@@ -398,8 +407,8 @@ def validate_links(page):
398
407
records .append (r )
399
408
400
409
# searches for pattern: {{< api-reference page="" anchor=""
401
- apiref_pattern = r"{{ *< *api-reference page=\"([^\"]*?)\" *anchor=\"(.*?)\""
402
- regex = re .compile (apiref_pattern )
410
+ apiref_re = r"{{ *< *api-reference page=\"([^\"]*?)\" *anchor=\"(.*?)\""
411
+ regex = re .compile (apiref_re )
403
412
404
413
matches = regex .findall (content )
405
414
for m in matches :
@@ -408,8 +417,8 @@ def validate_links(page):
408
417
records .append (r )
409
418
410
419
# searches for pattern: {{< api-reference page=""
411
- apiref_pattern = r"{{ *< *api-reference page=\"([^\"]*?)\""
412
- regex = re .compile (apiref_pattern )
420
+ apiref_re = r"{{ *< *api-reference page=\"([^\"]*?)\""
421
+ regex = re .compile (apiref_re )
413
422
414
423
matches = regex .findall (content )
415
424
for m in matches :
@@ -426,31 +435,38 @@ def parse_arguments():
426
435
427
436
Result is returned and saved into global variable ARGS.
428
437
"""
429
- parser = argparse .ArgumentParser (description = "Links checker for docs." )
430
- parser .add_argument ("-l" , dest = "lang" , default = "en" , metavar = "<LANG>" ,
431
- help = ("two letter language code, e.g. 'zh'. "
432
- "(default='en')" ))
433
- parser .add_argument ("-v" , dest = "verbose" , action = "store_true" ,
438
+ global PARSER
439
+
440
+ PARSER = argparse .ArgumentParser (description = "Links checker for docs." )
441
+ PARSER .add_argument ("-v" , dest = "verbose" , action = "store_true" ,
434
442
help = "switch on verbose level" )
435
- parser .add_argument ("-f" , dest = "filter" , default = "/docs/**/*.md" ,
436
- metavar = "<FILTER>" ,
437
- help = ("File pattern to scan, e.g. '/docs/foo.md'. "
438
- "(default='/docs/**/*.md')" ))
439
- parser .add_argument ("-n" , "--no-color" , action = "store_true" ,
443
+ PARSER .add_argument ("-n" , "--no-color" , action = "store_true" ,
440
444
help = "Suppress colored printing." )
445
+ PARSER .add_argument ("-f" , dest = "filter" , default = "content/en/docs/**/*.md" ,
446
+ metavar = "<FILTER>" ,
447
+ help = ("File pattern to scan. "
448
+ "(default='content/en/docs/**/*.md')" ))
441
449
442
- return parser .parse_args ()
450
+ return PARSER .parse_args ()
443
451
444
452
445
453
def main ():
446
454
"""The main entry of the program."""
447
- global ARGS , ROOT , REDIRECTS
455
+ global ARGS , ROOT , REDIRECTS , PARSER , LANG
448
456
449
457
ARGS = parse_arguments ()
450
- print ("Language: " + ARGS .lang )
451
458
ROOT = os .path .join (os .path .dirname (__file__ ), '..' )
452
- content_dir = os .path .join (ROOT , 'content' )
453
- lang_dir = os .path .join (content_dir , ARGS .lang )
459
+
460
+ print (ARGS .filter )
461
+ parts = ARGS .filter .split ("/" , 2 )
462
+ if len (parts ) != 3 or parts [0 ] != "content" :
463
+ print ("ERROR:\n Please specify file pattern in the format "
464
+ "'content/<lang>/<path-pattern>', for example:\n "
465
+ "'content/zh/docs/concepts/**/*.md'\n " )
466
+ PARSER .print_help ()
467
+ sys .exit (- 1 )
468
+
469
+ LANG = parts [1 ]
454
470
455
471
# read redirects data
456
472
redirects_fn = os .path .join (ROOT , "static" , "_redirects" )
@@ -473,7 +489,7 @@ def main():
473
489
print ("[Error] failed in reading redirects file: " + str (ex ))
474
490
return
475
491
476
- folders = [f for f in glob .glob (lang_dir + ARGS .filter , recursive = True )]
492
+ folders = [f for f in glob .glob (ARGS .filter , recursive = True )]
477
493
for page in folders :
478
494
validate_links (page )
479
495
0 commit comments