Skip to content

Commit 12f7735

Browse files
committed
Speed up purges by re-using a requests session
1 parent 2135304 commit 12f7735

File tree

1 file changed

+91
-32
lines changed

1 file changed

+91
-32
lines changed

build_docs.py

Lines changed: 91 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,8 @@ def build_robots_txt(
434434
www_root: Path,
435435
group,
436436
skip_cache_invalidation,
437-
):
437+
session: requests.Session,
438+
) -> None:
438439
"""Disallow crawl of EOL versions in robots.txt."""
439440
if not www_root.exists():
440441
logging.info("Skipping robots.txt generation (www root does not even exists).")
@@ -449,7 +450,7 @@ def build_robots_txt(
449450
robots_file.chmod(0o775)
450451
run(["chgrp", group, robots_file])
451452
if not skip_cache_invalidation:
452-
purge("robots.txt")
453+
purge(session, "robots.txt")
453454

454455

455456
def build_sitemap(
@@ -642,7 +643,7 @@ def full_build(self):
642643
"""
643644
return not self.quick and not self.language.html_only
644645

645-
def run(self) -> bool:
646+
def run(self, session: requests.Session) -> bool:
646647
"""Build and publish a Python doc, for a language, and a version."""
647648
start_time = perf_counter()
648649
logging.info("Running.")
@@ -653,7 +654,7 @@ def run(self) -> bool:
653654
if self.should_rebuild():
654655
self.build_venv()
655656
self.build()
656-
self.copy_build_to_webroot()
657+
self.copy_build_to_webroot(session)
657658
self.save_state(build_duration=perf_counter() - start_time)
658659
except Exception as err:
659660
logging.exception("Badly handled exception, human, please help.")
@@ -797,7 +798,7 @@ def build_venv(self):
797798
run([venv_path / "bin" / "python", "-m", "pip", "freeze", "--all"])
798799
self.venv = venv_path
799800

800-
def copy_build_to_webroot(self):
801+
def copy_build_to_webroot(self, session: requests.Session) -> None:
801802
"""Copy a given build to the appropriate webroot with appropriate rights."""
802803
logging.info("Publishing start.")
803804
self.www_root.mkdir(parents=True, exist_ok=True)
@@ -909,9 +910,9 @@ def copy_build_to_webroot(self):
909910
prefixes = run(["find", "-L", targets_dir, "-samefile", target]).stdout
910911
prefixes = prefixes.replace(targets_dir + "/", "")
911912
prefixes = [prefix + "/" for prefix in prefixes.split("\n") if prefix]
912-
purge(*prefixes)
913+
purge(session, *prefixes)
913914
for prefix in prefixes:
914-
purge(*[prefix + p for p in changed])
915+
purge(session, *[prefix + p for p in changed])
915916
logging.info("Publishing done")
916917

917918
def should_rebuild(self):
@@ -977,7 +978,15 @@ def save_state(self, build_duration: float):
977978
state_file.write_text(tomlkit.dumps(states), encoding="UTF-8")
978979

979980

980-
def symlink(www_root: Path, language: Language, directory: str, name: str, group: str, skip_cache_invalidation: bool):
981+
def symlink(
982+
www_root: Path,
983+
language: Language,
984+
directory: str,
985+
name: str,
986+
group: str,
987+
skip_cache_invalidation: bool,
988+
session: requests.Session,
989+
) -> None:
981990
"""Used by major_symlinks and dev_symlink to maintain symlinks."""
982991
if language.tag == "en": # english is rooted on /, no /en/
983992
path = www_root
@@ -994,12 +1003,17 @@ def symlink(www_root: Path, language: Language, directory: str, name: str, group
9941003
link.symlink_to(directory)
9951004
run(["chown", "-h", ":" + group, str(link)])
9961005
if not skip_cache_invalidation:
997-
purge_path(www_root, link)
1006+
purge_path(session, www_root, link)
9981007

9991008

10001009
def major_symlinks(
1001-
www_root: Path, group, versions: Iterable[Version], languages: Iterable[Language], skip_cache_invalidation: bool
1002-
):
1010+
www_root: Path,
1011+
group: str,
1012+
versions: Iterable[Version],
1013+
languages: Iterable[Language],
1014+
skip_cache_invalidation: bool,
1015+
session: requests.Session,
1016+
) -> None:
10031017
"""Maintains the /2/ and /3/ symlinks for each languages.
10041018
10051019
Like:
@@ -1009,11 +1023,26 @@ def major_symlinks(
10091023
"""
10101024
current_stable = Version.current_stable(versions).name
10111025
for language in languages:
1012-
symlink(www_root, language, current_stable, "3", group, skip_cache_invalidation)
1013-
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation)
1026+
symlink(
1027+
www_root,
1028+
language,
1029+
current_stable,
1030+
"3",
1031+
group,
1032+
skip_cache_invalidation,
1033+
session,
1034+
)
1035+
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation, session)
10141036

10151037

1016-
def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidation: bool):
1038+
def dev_symlink(
1039+
www_root: Path,
1040+
group,
1041+
versions,
1042+
languages,
1043+
skip_cache_invalidation: bool,
1044+
session: requests.Session,
1045+
) -> None:
10171046
"""Maintains the /dev/ symlinks for each languages.
10181047
10191048
Like:
@@ -1023,10 +1052,18 @@ def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidat
10231052
"""
10241053
current_dev = Version.current_dev(versions).name
10251054
for language in languages:
1026-
symlink(www_root, language, current_dev, "dev", group, skip_cache_invalidation)
1055+
symlink(
1056+
www_root,
1057+
language,
1058+
current_dev,
1059+
"dev",
1060+
group,
1061+
skip_cache_invalidation,
1062+
session,
1063+
)
10271064

10281065

1029-
def purge(*paths):
1066+
def purge(session: requests.Session, *paths: Path | str) -> None:
10301067
"""Remove one or many paths from docs.python.org's CDN.
10311068
10321069
To be used when a file change, so the CDN fetch the new one.
@@ -1035,20 +1072,22 @@ def purge(*paths):
10351072
for path in paths:
10361073
url = urljoin(base, str(path))
10371074
logging.debug("Purging %s from CDN", url)
1038-
requests.request("PURGE", url, timeout=30)
1075+
session.request("PURGE", url, timeout=30)
10391076

10401077

1041-
def purge_path(www_root: Path, path: Path):
1078+
def purge_path(session: requests.Session, www_root: Path, path: Path) -> None:
10421079
"""Recursively remove a path from docs.python.org's CDN.
10431080
10441081
To be used when a directory change, so the CDN fetch the new one.
10451082
"""
1046-
purge(*[file.relative_to(www_root) for file in path.glob("**/*")])
1047-
purge(path.relative_to(www_root))
1048-
purge(str(path.relative_to(www_root)) + "/")
1083+
purge(session, *[file.relative_to(www_root) for file in path.glob("**/*")])
1084+
purge(session, path.relative_to(www_root))
1085+
purge(session, str(path.relative_to(www_root)) + "/")
10491086

10501087

1051-
def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
1088+
def proofread_canonicals(
1089+
www_root: Path, skip_cache_invalidation: bool, session: requests.Session
1090+
) -> None:
10521091
"""In www_root we check that all canonical links point to existing contents.
10531092
10541093
It can happen that a canonical is "broken":
@@ -1070,11 +1109,11 @@ def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
10701109
html = html.replace(canonical.group(0), "")
10711110
file.write_text(html, encoding="UTF-8", errors="surrogateescape")
10721111
if not skip_cache_invalidation:
1073-
purge(str(file).replace("/srv/docs.python.org/", ""))
1112+
purge(session, str(file).replace("/srv/docs.python.org/", ""))
10741113

10751114

1076-
def parse_versions_from_devguide():
1077-
releases = requests.get(
1115+
def parse_versions_from_devguide(session: requests.Session) -> list[Version]:
1116+
releases = session.get(
10781117
"https://raw.githubusercontent.com/"
10791118
"python/devguide/main/include/release-cycle.json",
10801119
timeout=30,
@@ -1101,8 +1140,9 @@ def parse_languages_from_config():
11011140

11021141

11031142
def build_docs(args) -> bool:
1104-
"""Build all docs (each languages and each versions)."""
1105-
versions = parse_versions_from_devguide()
1143+
"""Build all docs (each language and each version)."""
1144+
session = requests.Session()
1145+
versions = parse_versions_from_devguide(session)
11061146
languages = parse_languages_from_config()
11071147
todo = [
11081148
(version, language)
@@ -1130,19 +1170,38 @@ def build_docs(args) -> bool:
11301170
builder = DocBuilder(
11311171
version, versions, language, languages, cpython_repo, **vars(args)
11321172
)
1133-
all_built_successfully &= builder.run()
1173+
all_built_successfully &= builder.run(session)
11341174
logging.root.handlers[0].setFormatter(
11351175
logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
11361176
)
11371177

11381178
build_sitemap(versions, languages, args.www_root, args.group)
11391179
build_404(args.www_root, args.group)
11401180
build_robots_txt(
1141-
versions, languages, args.www_root, args.group, args.skip_cache_invalidation
1181+
versions,
1182+
languages,
1183+
args.www_root,
1184+
args.group,
1185+
args.skip_cache_invalidation,
1186+
session,
1187+
)
1188+
major_symlinks(
1189+
args.www_root,
1190+
args.group,
1191+
versions,
1192+
languages,
1193+
args.skip_cache_invalidation,
1194+
session,
1195+
)
1196+
dev_symlink(
1197+
args.www_root,
1198+
args.group,
1199+
versions,
1200+
languages,
1201+
args.skip_cache_invalidation,
1202+
session,
11421203
)
1143-
major_symlinks(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
1144-
dev_symlink(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
1145-
proofread_canonicals(args.www_root, args.skip_cache_invalidation)
1204+
proofread_canonicals(args.www_root, args.skip_cache_invalidation, session)
11461205

11471206
return all_built_successfully
11481207

0 commit comments

Comments
 (0)