Skip to content

Commit 6a27557

Browse files
committed
Update archive script
1 parent 9735e7d commit 6a27557

File tree

1 file changed

+109
-166
lines changed

1 file changed

+109
-166
lines changed

scripts/archive_docs.py

Lines changed: 109 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -1,176 +1,119 @@
11
import sys
22
import os
33
import shutil
4-
import datetime
5-
import subprocess
64
import frontmatter
75

6+
if len(sys.argv) < 2:
7+
print("Usage: archive_docs VERSION")
8+
exit(1)
89

9-
# Usage instructions:
10-
# Update _config.yml to specify the new version number
11-
# Add a new row to /_data/versions.csv with the new version number
12-
# Run this script. More options below, but as an example:
13-
# run this script in the top level docs directory like: python3 scripts/archive_docs.py 1.0
14-
# When testing, run a clean (non-incremental) serve: jekyll serve
10+
old_stable_version = sys.argv[1]
1511

16-
if len(sys.argv) < 2:
17-
print(
18-
"Usage: python3 scripts/archive_docs.py [version] [--noconfirm] [--date=YYYY-MM-DD]"
12+
13+
# update new stable file based on old stable file
14+
def update_new_stable_page(new_stable_file, old_stable_dir):
15+
# if the old counterpart exists, parse YAML metadata and get the "redirect_from" field
16+
old_stable_file = new_stable_file.replace("docs/preview", "docs/stable")
17+
if os.path.exists(old_stable_file):
18+
old_stable_doc = frontmatter.load(old_stable_file)
19+
redirect_from_field = old_stable_doc.get("redirect_from")
20+
else:
21+
redirect_from_field = None
22+
23+
new_stable_doc = frontmatter.load(new_stable_file)
24+
25+
# overwrite the new stable doc's redirect_from field with the one from the old stable document
26+
new_stable_doc["redirect_from"] = redirect_from_field
27+
28+
# replace link tags in the content
29+
new_stable_doc.content = new_stable_doc.content.replace(
30+
f"{{% link docs/preview/",
31+
f"{{% link docs/stable/"
1932
)
20-
print(
21-
"If date is specified, this script will copy docs that existed at that specific date"
33+
return frontmatter.dumps(new_stable_doc)
34+
35+
36+
# copy docs/preview to docs/stable, while keeping the redirects from docs/stable
37+
def archive_preview(old_stable_version):
38+
src = "docs/preview"
39+
dst = f"docs/stable_temp"
40+
old_stable = "docs/stable"
41+
42+
os.makedirs(dst, exist_ok=True)
43+
44+
for root, dirs, files in os.walk(src):
45+
rel_path = os.path.relpath(root, src)
46+
dest_dir = os.path.join(dst, rel_path)
47+
os.makedirs(dest_dir, exist_ok=True)
48+
49+
for file in files:
50+
src_file = os.path.join(root, file)
51+
dst_file = os.path.join(dest_dir, file)
52+
53+
if src_file.endswith(".md"):
54+
new_content = update_new_stable_page(src_file, old_stable)
55+
print(new_content)
56+
with open(dst_file, 'w') as f:
57+
f.write(new_content)
58+
else:
59+
shutil.copy2(src_file, dst_file)
60+
61+
62+
shutil.rmtree("docs/stable")
63+
shutil.move("docs/stable_temp", "docs/stable")
64+
65+
66+
def update_stable_page(src_file, old_stable_version):
67+
# parse YAML metadata and adjust the "redirect_from" field
68+
doc = frontmatter.load(src_file)
69+
70+
redirect_from_field = doc.get("redirect_from")
71+
if redirect_from_field:
72+
redirect_from_field_to_archive = [
73+
x.replace("docs/stable/", f"docs/{old_stable_version}/")
74+
for x in redirect_from_field
75+
]
76+
doc["redirect_from"] = redirect_from_field_to_archive
77+
78+
# replace link tags in the content
79+
doc.content = doc.content.replace(
80+
f"{{% link docs/stable/",
81+
f"{{% link docs/{old_stable_version}/"
2282
)
23-
print("Otherwise files are copied over as-is")
24-
exit(1)
83+
return frontmatter.dumps(doc)
2584

26-
git_log_cmd = ['git', 'log', "--pretty=format:%H %cI"]
27-
git_ls_cmd = ['git', 'ls-tree', '--name-only']
28-
git_show_cmd = ['git', 'show']
29-
30-
date = None
31-
confirm = True
32-
arguments = sys.argv
33-
for i in range(len(arguments)):
34-
if arguments[i] == '--noconfirm':
35-
confirm = False
36-
del arguments[i]
37-
i -= 1
38-
elif arguments[i].startswith("--date="):
39-
date = datetime.datetime.strptime(arguments[i].split('=')[1], '%Y-%m-%d')
40-
date = datetime.datetime(date.year, date.month, date.day, 23, 59, 59)
41-
42-
43-
def execute_and_get_output(cmd):
44-
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
45-
stdout = proc.stdout.read()
46-
stderr = proc.stderr.read()
47-
proc.wait()
48-
if proc.returncode != 0:
49-
print(f"Command '{cmd}' failed!")
50-
print(stdout)
51-
print(stderr)
52-
exit(1)
53-
return stdout
54-
55-
56-
revision = None
57-
if date != None:
58-
log_files = execute_and_get_output(git_log_cmd).decode('utf8').split('\n')
59-
for line in log_files:
60-
splits = line.split(' ')
61-
parsed_date = datetime.datetime.strptime(splits[1], '%Y-%m-%dT%H:%M:%S%z')
62-
rev_date = datetime.datetime(
63-
parsed_date.year,
64-
parsed_date.month,
65-
parsed_date.day,
66-
parsed_date.hour,
67-
parsed_date.minute,
68-
parsed_date.second,
69-
)
70-
if rev_date < date:
71-
break
72-
else:
73-
revision = splits[0]
74-
print(f"Formatting for {revision} (committed on {rev_date})")
75-
76-
ignored_files = ['.DS_Store', 'archive', 'faq.md', 'why_duckdb.md']
77-
78-
version = arguments[1]
79-
folder = os.path.join('docs', 'archive', version)
80-
81-
print(
82-
f"Archiving current docs for version \"{version}\" to path \"{folder}\". Remember to update _config.yml and /_data/versions.csv also."
83-
)
84-
if confirm:
85-
result = input("Continue with archival (y/n)?\n")
86-
if result != 'y':
87-
print("Aborting.")
88-
exit(0)
89-
90-
91-
def list_tree(source):
92-
if revision == None:
93-
return os.listdir(source)
94-
else:
95-
output = execute_and_get_output(git_ls_cmd + [revision, source + '/'])
96-
output = output.decode('utf8').split('\n')
97-
output = [os.path.basename(x) for x in output if len(x) > 0]
98-
return output
99-
100-
101-
def copy_file(source_path, target_path, version):
102-
print(f"{source_path} -> {target_path}")
103-
if revision == None:
104-
if source_path.endswith(".md"):
105-
with open(source_path) as f, open(target_path, "w") as of:
106-
# parse YAML metadata and adjust the "redirect_from" field
107-
doc = frontmatter.load(f)
108-
109-
redirect_from_field = doc.get("redirect_from")
110-
if redirect_from_field:
111-
redirect_from_field_to_archive = [
112-
x.replace("docs/", f"docs/archive/{version}/")
113-
for x in redirect_from_field
114-
]
115-
doc["redirect_from"] = redirect_from_field_to_archive
116-
117-
doc.content = doc.content.replace(
118-
f"{{% link docs/",
119-
f"{{% link docs/archive/{version}/",
120-
)
121-
122-
of.write(frontmatter.dumps(doc))
123-
else:
124-
shutil.copy(source_path, target_path)
125-
else:
126-
output = execute_and_get_output(git_show_cmd + [revision + ':' + source_path])
127-
file_content = output
128-
with open(target_path, 'wb+') as f:
129-
f.write(file_content)
130-
131-
132-
def recursive_copy(source, target, version):
133-
if not os.path.exists(target):
134-
os.mkdir(target)
135-
for fname in list_tree(source):
136-
if fname in ignored_files:
137-
continue
138-
source_path = os.path.join(source, fname)
139-
target_path = os.path.join(target, fname)
140-
if os.path.isfile(source_path):
141-
copy_file(source_path, target_path, version)
142-
elif os.path.isdir(source_path):
143-
recursive_copy(source_path, target_path, version)
144-
145-
146-
def archive_installation_page(version):
147-
# get frontmatter of the proxy installation file (which includes the actual one)
148-
with open(f"docs/installation/index.html") as current_installation_file:
149-
current_installation_file_loaded = frontmatter.load(current_installation_file)
150-
current_installation_file_loaded.content = ""
151-
152-
# adjust installation links to point to the old version in the archived page
153-
with open(f"_includes/installation.html") as main_installation_file, open(
154-
f"docs/archive/{version}/installation/index.html", "w"
155-
) as archived_installation_file:
156-
installation_page = "\n" + main_installation_file.read()
157-
installation_page = installation_page.replace(" (Latest Release)", "")
158-
installation_page = installation_page.replace(
159-
"{{ site.current_duckdb_version }}", version
160-
)
161-
# we leave the variable "{{ site.next_java_version }}" as is
162-
# to allow the "GitHub main (Nightly Build)" to move with new versions
163-
164-
archived_installation_file.write(
165-
frontmatter.dumps(current_installation_file_loaded)
166-
)
167-
archived_installation_file.write(installation_page)
168-
169-
170-
recursive_copy('docs', folder, version)
171-
copy_file(
172-
'_data/menu_docs_stable.json',
173-
'_data/menu_docs_%s.json' % (version.replace('.', ''),),
174-
version,
175-
)
176-
archive_installation_page(version)
85+
86+
# copy docs/stable to docs/<old_stable_version>
87+
# the directs should be expanded on with the version number
88+
def archive_stable(old_stable_version):
89+
src = "docs/stable"
90+
dst = f"docs/{old_stable_version}"
91+
92+
os.makedirs(dst, exist_ok=True)
93+
94+
for root, dirs, files in os.walk(src):
95+
rel_path = os.path.relpath(root, src)
96+
dest_dir = os.path.join(dst, rel_path)
97+
os.makedirs(dest_dir, exist_ok=True)
98+
99+
for file in files:
100+
src_file = os.path.join(root, file)
101+
dst_file = os.path.join(dest_dir, file)
102+
103+
if src_file.endswith(".md"):
104+
new_content = update_stable_page(src_file, old_stable_version)
105+
print(new_content)
106+
with open(dst_file, 'w') as f:
107+
f.write(new_content)
108+
else:
109+
shutil.copy2(src_file, dst_file)
110+
111+
112+
113+
old_stable_version_no_dots = old_stable_version.replace(".", "")
114+
115+
# copy_file("_data/menu_docs_stable.json", f"_data/menu_docs_{old_stable_version_no_dots}.json")
116+
archive_stable(old_stable_version)
117+
118+
# copy_file("_data/menu_docs_preview.json", f"_data/menu_docs_stable.json")
119+
# archive_preview()

0 commit comments

Comments
 (0)