2323 PROJECT_CONFIG ,
2424 Config ,
2525)
26+ import tempfile
27+ import json
2628
2729
2830def _build_docs (session : nox .Session , config : Config ) -> None :
@@ -49,37 +51,6 @@ def _build_multiversion_docs(session: nox.Session, config: Config) -> None:
4951 session .run ("touch" , f"{ DOCS_OUTPUT_DIR } /.nojekyll" )
5052
5153
52- def _doc_files (root : Path ) -> Iterable [Path ]:
53- """Returns an iterator over all documentation files of the project"""
54- docs = Path (root ).glob ("**/*.rst" )
55-
56- def _deny_filter (path : Path ) -> bool :
57- return not ("venv" in path .parts )
58-
59- return filter (lambda path : _deny_filter (path ), docs )
60-
61-
62- def _doc_urls (files : Iterable [Path ]) -> Iterable [tuple [Path , str ]]:
63- """Returns an iterable over all urls contained in the provided files"""
64- def should_filter (url : str ) -> bool :
65- _filtered : Container [str ] = []
66- return url .startswith ("mailto" ) or url in _filtered
67-
68- for file in files :
69- urls = re .findall ( r"http[s]?://[^\s<>'\"\,\)\]]+[^\s<>'\"\,\.\)\]]" , file .open ().read ())
70- yield from zip (repeat (file ), filter (lambda url : not should_filter (url ), urls ))
71-
72-
73- def _doc_links_check (url : str ) -> Tuple [Optional [int ], str ]:
74- """Checks if an url is still working (can be accessed)"""
75- try :
76- # User-Agent needs to be faked otherwise some webpages will deny access with a 403
77- result = requests .get (url , timeout = 5 )
78- return result .status_code , f"{ result .reason } "
79- except requests .exceptions .RequestException as ex :
80- print ("error:" , ex )
81-
82-
8354def _git_diff_changes_main () -> int :
8455 """
8556 Check if doc/changes is changed and return the exit code of command git diff.
@@ -139,22 +110,52 @@ def docs_list_links(session: Session) -> None:
139110@nox .session (name = "docs:links:check" , python = False )
140111def docs_links_check (session : Session ) -> None :
141112 """Checks whether all links in the documentation are accessible."""
142- errors = []
143- urls = list (_doc_urls (_doc_files (PROJECT_CONFIG .root )))
144- urls_count = len (urls )
145- count = 1
146- for path , url in urls :
147- print (f"({ count } /{ urls_count } ): { url } " )
148- status , details = _doc_links_check (url )
149- if status != 200 :
150- errors .append ((path , url , status , details ))
151- count += 1
152-
153- if errors :
154- session .error (
155- "\n "
156- + "\n " .join (f"Url: { e [1 ]} , File: { e [0 ]} , Error: { e [3 ]} " for e in errors )
157- )
113+ with tempfile .TemporaryDirectory () as tmpdir :
114+ tmpdir = Path (tmpdir )
115+ sp = subprocess .run (["poetry" , "run" , "--" , "sphinx-build" , "-b" , 'linkcheck' , PROJECT_CONFIG .root / "doc" , tmpdir ], capture_output = True , text = True )
116+ print (sp .returncode )
117+ if sp .returncode >= 2 :
118+ print (sp .stderr )
119+ session .error (2 )
120+ output = tmpdir / "output.json"
121+ results = output .read_text ().split ("\n " )
122+ reslen = len (results )
123+ resstr = results [- 1 ]
124+ if (reslen == 0 ) or ((reslen == 1 ) and (resstr == "" )):
125+ return
126+ elif resstr == "" :
127+ results .pop ()
128+ for line , result in enumerate (results ):
129+ resdict = json .loads (result )
130+ if resdict ['status' ] == 'ignored' and resdict ['uri' ].startswith ('http' ):
131+ try :
132+ match = re .search (r"https?://[^\s\"\'<>]+" , resdict ["uri" ])
133+ if match :
134+ resdict ['uri' ] = match .group ()
135+ print (f"{ line } /{ reslen } " )
136+ result = requests .head (resdict ['uri' ], timeout = 5 )
137+ if result .status_code != 200 :
138+ result = requests .get (resdict ['uri' ], timeout = 5 , stream = True )
139+ result .close ()
140+ if result .status_code >= 400 :
141+ resdict ['status' ] = 'broken'
142+ resdict ['code' ] = result .status_code
143+ if result .status_code < 400 :
144+ resdict ['status' ] = 'working'
145+ resdict ['code' ] = result .status_code
146+ except requests .exceptions .Timeout :
147+ resdict ['status' ] = 'timeout'
148+ results [line ] = json .dumps (resdict )
149+ output .write_text ("\n " .join (f"{ r } " for r in results ))
150+ errors = []
151+ for result in results :
152+ line = json .loads (result )
153+ if (line ["status" ] == "broken" ) or line ["status" ] == "timeout" :
154+ errors .append (result )
155+ if errors :
156+ print ("Error" + "s" if len (errors ) > 1 else "" )
157+ print ("\n " .join (error for error in errors ))
158+ session .error (1 )
158159
159160
160161@nox .session (name = "changelog:updated" , python = False )
0 commit comments