Skip to content

Commit ca80170

Browse files
committed
[docs] Compress html docs and speed up deduplication
1 parent 825e53e commit ca80170

File tree

3 files changed

+88
-62
lines changed

3 files changed

+88
-62
lines changed

.github/workflows/deploy-docs.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ jobs:
6868
export TERM=xterm-256color
6969
export COLUMNS=120
7070
python3 tools/scripts/docs_modm_io_generator.py -c -j4 -d
71+
- name: Size of documentation archive
72+
if: always()
73+
run: |
74+
ls -lh modm-api-docs.tar.gz
7175
- name: Upload api documentation to docs.modm.io
7276
if: always()
7377
env:

tools/scripts/docs_modm_io_generator.py

Lines changed: 83 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
import os
1414
import sys
1515
import json
16+
import gzip
1617
import shutil
17-
import zipfile
18+
import hashlib
1819
import tempfile
1920
import argparse
2021
import datetime
@@ -71,7 +72,8 @@ def get_targets():
7172
elif target.platform == "sam":
7273
short_id.naming_schema = "{platform}{family}{series}"
7374

74-
short_id.set("platform", target.platform) # invalidate caches
75+
# invalidate id cache due to lack of proper API
76+
short_id.set("platform", target.platform)
7577
minimal_targets[short_id.string].append(target)
7678

7779
target_list = []
@@ -99,15 +101,15 @@ def get_targets():
99101
def main():
100102
parser = argparse.ArgumentParser()
101103
test_group = parser.add_mutually_exclusive_group()
102-
test_group.add_argument("--test", "-t", action='store_true', help="Test mode: generate only a few targets. List includes targets with multiple board modules.")
103-
test_group.add_argument("--test2", "-t2", action='store_true', help="Test mode: generate only a few targets. List has targets from the real target list.")
104+
test_group.add_argument("--test", "-t", action="store_true", help="Test mode: generate only a few targets. List includes targets with multiple board modules.")
105+
test_group.add_argument("--test2", "-t2", action="store_true", help="Test mode: generate only a few targets. List has targets from the real target list.")
104106
parser.add_argument("--jobs", "-j", type=int, default=2, help="Number of parallel doxygen processes")
105-
parser.add_argument("--local-temp", "-l", action='store_true', help="Create temporary directory inside current working directory")
107+
parser.add_argument("--local-temp", "-l", action="store_true", help="Create temporary directory inside current working directory")
106108
group = parser.add_mutually_exclusive_group()
107-
group.add_argument("--compress", "-c", action='store_true', help="Compress output into gzip archive")
109+
group.add_argument("--compress", "-c", action="store_true", help="Compress output into gzip archive")
108110
group.add_argument("--output", "-o", type=str, help="Output directory")
109-
parser.add_argument("--overwrite", "-f", action='store_true', help="Overwrite existing data in output directory (Removes all files from output directory.)")
110-
parser.add_argument("--deduplicate", "-d", action='store_true', help="Deduplicate identical files with symlinks.")
111+
parser.add_argument("--overwrite", "-f", action="store_true", help="Overwrite existing data in output directory (Removes all files from output directory.)")
112+
parser.add_argument("--deduplicate", "-d", action="store_true", help="Deduplicate identical files with symlinks.")
111113
parser.add_argument("--target-job", help="Create a single target from job string.")
112114
args = parser.parse_args()
113115

@@ -136,56 +138,60 @@ def main():
136138
with tempfile.TemporaryDirectory(dir=temp_dir) as tempdir:
137139
tempdir = Path(tempdir)
138140
modm_path = os.path.abspath(os.path.dirname(sys.argv[0]) + "/../..")
139-
print("Modm Path: {}".format(modm_path))
140-
print("Temporary directory: {}".format(str(tempdir)))
141+
print(f"Modm Path: {modm_path}")
142+
print(f"Temporary directory: {tempdir}")
141143
output_dir = (tempdir / "output")
142144
(output_dir / "develop/api").mkdir(parents=True)
143145
os.chdir(tempdir)
144146
print("Starting to generate documentation...")
145147
template_overview(output_dir, device_list, board_list, template_path)
146-
print("... for {} devices, estimated memory footprint is {} MB".format(len(device_list) + len(board_list), (len(device_list)*70)+2000))
148+
print(f"... for {len(device_list) + len(board_list)} devices, estimated memory footprint is {len(device_list)*70+2000} MB")
147149
with ThreadPool(args.jobs) as pool:
148150
# We can only pass one argument to pool.map
149-
devices = [f"python3 {filepath} --target-job '{modm_path}|{tempdir}|{dev}||{args.deduplicate}'" for dev in device_list]
150-
devices += [f"python3 {filepath} --target-job '{modm_path}|{tempdir}|{dev}|{brd}|{args.deduplicate}'" for (brd, dev) in board_list]
151-
results = pool.map(lambda d: subprocess.run(d, shell=True).returncode, list(set(devices)))
152-
# output_dir.rename(cwd / 'modm-api-docs')
151+
devices = [f'python3 {filepath} --target-job "{modm_path}|{tempdir}|{dev}||{args.deduplicate}|{args.compress}"' for dev in device_list]
152+
devices += [f'python3 {filepath} --target-job "{modm_path}|{tempdir}|{dev}|{brd}|{args.deduplicate}|{args.compress}"' for (brd, dev) in board_list]
153+
devices = list(set(devices))
154+
# Run the first generation first so that the other jobs can already deduplicate properly
155+
results = [subprocess.call(devices[0], shell=True)]
156+
results += pool.map(lambda d: subprocess.call(d, shell=True), devices[1:])
157+
# remove all the hash files
158+
for file in (output_dir / "develop/api").glob("*.hash"):
159+
file.unlink()
153160
if args.compress:
154161
print("Zipping docs ...")
155-
# Zipping may take more than 10 minutes
156-
os.system(f"(cd {str(output_dir)} && {'g' if is_running_on_macos else ''}tar --checkpoint=.100 -czf {str(cwd / 'modm-api-docs.tar.gz')} .)")
157-
# shutil.make_archive(str(cwd / 'modm-api-docs'), 'gztar', str(output_dir))
162+
# Zipping is *much* faster via command line than via python!
163+
tar = "gtar" if is_running_on_macos else "tar"
164+
zip_cmd = f"(cd {str(output_dir)} && {tar} --checkpoint=.100 -czf {str(cwd)}/modm-api-docs.tar.gz .)"
165+
subprocess.call(zip_cmd, shell=True)
158166
else:
159167
if args.overwrite and final_output_dir.exists():
160168
for i in final_output_dir.iterdir():
161-
print('Removing {}'.format(i))
169+
print(f"Removing {i}")
162170
if i.is_dir():
163171
shutil.rmtree(i)
164172
else:
165173
os.remove(i)
166-
print('Moving {} -> {}'.format(output_dir, final_output_dir))
167-
#shutil.move(str(output_dir) + '/', str(final_output_dir))
168174
print(f"Moving {output_dir} -> {final_output_dir}")
169175
output_dir.rename(final_output_dir)
170-
return results.count(0) == len(results)
176+
return len(results) - results.count(0)
171177

172178

173179
def create_target(argument):
174-
modm_path, tempdir, device, board, deduplicate = argument.split("|")
180+
modm_path, tempdir, device, board, deduplicate, compress = argument.split("|")
175181
tempdir = Path(tempdir)
176182
output_dir = board if board else device
177183
try:
178-
print("Generating documentation for {} ...".format(output_dir))
184+
print(f"Generating documentation for {output_dir}...")
179185

180-
options = ["modm:target={0}".format(device)]
186+
options = [f"modm:target={device}"]
181187
if device.startswith("at"):
182188
options.append("modm:platform:core:f_cpu=16000000")
183189
builder = lbuild.api.Builder(options=options)
184190
builder.load([Path(modm_path) / "repo.lb", Path(modm_path) / "test/repo.lb"])
185191
modules = sorted(builder.parser.modules.keys())
186192

187193
if board:
188-
chosen_board = "modm:board:{}".format(board)
194+
chosen_board = f"modm:board:{board}"
189195
else:
190196
# Only allow the first board module to be built (they overwrite each others files)
191197
chosen_board = next((m for m in modules if ":board:" in m), None)
@@ -200,51 +206,71 @@ def create_target(argument):
200206

201207
builder.build(output_dir, modules)
202208

203-
print('Executing: (cd {}/modm/docs/ && doxypress doxypress.json)'.format(output_dir))
204-
retval = os.system('(cd {}/modm/docs/ && doxypress doxypress.json > /dev/null 2>&1)'.format(output_dir))
209+
print(f"Executing: (cd {output_dir}/modm/docs/ && doxypress doxypress.json)")
210+
retval = subprocess.call(f"(cd {output_dir}/modm/docs/ && doxypress doxypress.json > /dev/null 2>&1)", shell=True)
211+
# retval = subprocess.call(f"(cd {output_dir}/modm/docs/ && doxygen doxyfile.cfg > /dev/null 2>&1)", shell=True)
205212
if retval != 0:
206-
print("Error {} generating documentation for device {}.".format(retval, output_dir))
213+
print(f"Error {retval} generating documentation for device {output_dir}.")
207214
return False
208-
print("Finished generating documentation for device {}.".format(output_dir))
215+
print(f"Finished generating documentation for device {output_dir}.")
209216

210217
srcdir = (tempdir / output_dir / "modm/docs/html")
211-
destdir = tempdir / 'output/develop/api' / output_dir
218+
destdir = tempdir / "output/develop/api" / output_dir
212219

213220
if deduplicate == "True":
214-
print("Deduplicating files for {}...".format(device))
215-
symlinks = defaultdict(list)
216-
for file in (tempdir / 'output').rglob('*'):
217-
if file.is_dir() or file.is_symlink(): continue;
218-
key = file.relative_to(tempdir).parts[4:]
219-
if key:
220-
symlinks[os.path.join(*key)].append(file)
221+
print(f"Deduplicating files for {device}...")
222+
# Find and build the hash symlink database
223+
hashdb = {}
224+
for hashes in tempdir.glob("output/develop/api/*.hash"):
225+
for line in hashes.read_text().splitlines():
226+
fhash, path = line.split(" ", 1)
227+
hashdb[fhash] = os.path.join(hashes.stem, path)
228+
# Generate a list of files and replace them with symlinks
229+
our_hashdb = {}
230+
# symlinks = {}
221231
dot_counter = 0
222-
for file in srcdir.rglob('*'):
232+
for file in srcdir.rglob("*"):
223233
if file.is_dir():
224234
print(end="", flush=True)
225235
continue
226-
key = str(file.relative_to(srcdir))
227-
if key in symlinks:
228-
for kfile in symlinks[key]:
229-
symlinks[hash(kfile.read_bytes())].append(kfile)
230-
del symlinks[key]
231-
fhash = hash(file.read_bytes())
232-
if fhash in symlinks:
233-
dot_counter += 1
234-
if dot_counter % 30 == 0: print(".", end="")
235-
rpath = symlinks[fhash][0].relative_to(tempdir / 'output/develop/api')
236-
lpath = os.path.relpath(srcdir, file.parent)
237-
sympath = os.path.join("..", lpath, rpath)
238-
# print("Linking {} -> {}".format(file.relative_to(srcdir), sympath))
236+
dot_counter += 1
237+
if dot_counter % 30 == 0: print(".", end="")
238+
file_bytes = file.read_bytes()
239+
if compress == "True":
240+
cfile = file.with_suffix(file.suffix + ".gz")
241+
file_bytes = gzip.compress(file_bytes, mtime=0)
242+
cfile.write_bytes(file_bytes)
243+
file.unlink()
244+
file = cfile
245+
relpath = file.relative_to(srcdir)
246+
fhash = hashlib.md5(file_bytes).hexdigest()
247+
if (rpath := hashdb.get(fhash)) is not None:
248+
# Previously seen file can be symlinked
249+
lpath = os.path.relpath(srcdir.parent, file.parent)
250+
sympath = os.path.join(lpath, rpath)
251+
# symlinks[relpath] = sympath
239252
file.unlink()
240253
file.symlink_to(sympath)
254+
# print(f"Symlinking {file.relative_to(srcdir)} to {sympath}")
255+
else:
256+
# This is a new file, store it in our hashdb
257+
our_hashdb[fhash] = relpath
258+
259+
# Write the symlink file
260+
# if symlinks:
261+
# lines = [f"{path} -> {sympath}" for path, sympath in symlinks.items()]
262+
# (srcdir / "symlinks.txt").write_text("\n".join(lines))
263+
# Write out our hashdb
264+
if our_hashdb:
265+
lines = [f"{fhash} {relpath}" for fhash, relpath in our_hashdb.items()]
266+
destdir.with_suffix(".hash").write_text("\n".join(lines))
241267

242268
# Only move folder *after* deduplication to prevent race condition with file.unlink()
243269
print(f"\nMoving {srcdir.relative_to(tempdir)} -> {destdir.relative_to(tempdir)}", flush=True)
244270
srcdir.rename(destdir)
245271
return True
246272
except Exception as e:
247-
print("Error generating documentation for device {}: {}".format(output_dir, e))
273+
print(f"Error generating documentation for device {output_dir}: {e}")
248274
return False
249275

250276

@@ -255,18 +281,14 @@ def template_overview(output_dir, device_list, board_list, template_path):
255281
date=datetime.datetime.now().strftime("%d.%m.%Y, %H:%M"),
256282
num_devices=len(device_list),
257283
num_boards=len(board_list))
258-
with open(str(output_dir) + "/index.html","w+") as f:
259-
f.write(html)
284+
(output_dir / "index.html").write_text(html)
260285
json_data = {
261286
"devices": [str(d).upper() for d in device_list] + [rename_board(b) for (b,_) in board_list],
262287
"name2board": [{rename_board(b): b} for (b,_) in board_list],
263288
}
264-
with open(str(output_dir) + "/develop/targets.json","w+") as outfile:
289+
with (output_dir / "develop/targets.json").open("w+", encoding="UTF-8") as outfile:
265290
json.dump(json_data, outfile)
266-
with open(str(output_dir) + "/robots.txt","w+") as f:
267-
robots_txt = "User-agent: *\n"
268-
f.write(robots_txt)
269-
291+
(output_dir / "robots.txt").write_text("User-agent: *\n")
270292

271293
if __name__ == "__main__":
272-
exit(0 if main() else -1)
294+
exit(main())

tools/scripts/docs_modm_io_index.html.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ function showDocumentation() {
219219
return;
220220
}
221221
n2b = name2board[targetinput.value]
222-
var url ="/" + releaseinput.value + "/api/" + (n2b ? n2b : targetinput.value).toLowerCase() + "/";
222+
var url ="/" + releaseinput.value + "/api/" + (n2b ? n2b : targetinput.value).toLowerCase() + "/index.html";
223223
location.href = url;
224224
}
225225
targetinput.addEventListener("input", function(event) {

0 commit comments

Comments
 (0)