Skip to content

Commit fdd6f92

Browse files
committed
Initial commit, adding support for updating timestamps from git logs to each .py and .rst file
1 parent 13e7981 commit fdd6f92

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

.jenkins/build.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then
5656
# Files to run must be accessible to subprocessed (at least to `download_data.py`)
5757
export FILES_TO_RUN
5858

59+
# Step 2.1: Add timestamps to .py and .rst files in source directories
60+
bash $DIR/update_timestamps_batch.sh .
61+
5962
# Step 3: Run `make docs` to generate HTML files and static files for these tutorialis
6063
pip3 install -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
6164
make docs
@@ -118,6 +121,10 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then
118121
7z a worker_${WORKER_ID}.7z docs
119122
awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${COMMIT_ID}/worker_${WORKER_ID}.7z
120123
elif [[ "${JOB_TYPE}" == "manager" ]]; then
124+
125+
# Step 0.9: Add timestamps to .py and .rst files in source directories
126+
bash $DIR/update_timestamps_batch.sh .
127+
121128
# Step 1: Generate no-plot HTML pages for all tutorials
122129
pip3 install -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
123130
make html-noplot

.jenkins/update_timestamps.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import re
2+
import sys
3+
import os
4+
import subprocess
5+
6+
def get_last_commit_timestamp_for_file(file_path: str) -> str:
7+
"""Get the last commit timestamp for a file.
8+
9+
Args:
10+
file_path (str): Path to file
11+
12+
Returns:
13+
str: Last committed timestamp string
14+
"""
15+
git_command = ["git", "log", "-1", "--format=%at", "--", file_path]
16+
timestamp = subprocess.check_output(git_command).decode().strip()
17+
18+
if not timestamp:
19+
# If there is no git commit history, use last modified date
20+
timestamp = str(int(os.path.getmtime(file_path)))
21+
22+
date_command = ["date", "-d", "@" + timestamp, "+%I:%M %p, %B %d, %Y"]
23+
return subprocess.check_output(date_command).decode().strip()
24+
25+
def update_timestamp(file_path: str):
26+
"""Adds a timestamp of the most recent time the file was edited.
27+
28+
Args:
29+
file_path (str): Path to file
30+
"""
31+
with open(file_path, 'r') as file:
32+
lines = file.readlines()
33+
34+
author_line_index = -1
35+
36+
# Find the index of the author line and extract author's name and GitHub link
37+
for i, line in enumerate(lines):
38+
if re.search(r'(Author|Authors).*?:', line):
39+
author_line_index = i
40+
break
41+
42+
# Get current timestamp
43+
timestamp = get_last_commit_timestamp_for_file(file_path)
44+
timestamp_line = f'**Updated:** *{timestamp}*\n'
45+
46+
# If author line is found, add timestamp below it
47+
if author_line_index != -1:
48+
49+
if lines[author_line_index].startswith('#'):
50+
# We can assume we need a #, too
51+
timestamp_line = '# ' + timestamp_line
52+
53+
updated_lines = lines[:author_line_index + 1]
54+
# Check if the timestamp line exists below the author line or if there are only blank lines between them
55+
if author_line_index + 1 < len(lines) and (lines[author_line_index + 1].strip() == '' or re.search(r'\*\*Updated:\*\*\s\**\d{1,2}:\d{2} [AP]M, \w+ \d{1,2}, \d{4}\*', lines[author_line_index + 1])):
56+
# If timestamp line exists or there are only blank lines, update it
57+
i = author_line_index + 1
58+
while i < len(lines) and lines[i].strip() == '':
59+
# Find first non-empty line after Author
60+
updated_lines.append(lines[i])
61+
i += 1
62+
63+
if re.search(r'\*\*Updated:\*\*\s\**\d{1,2}:\d{2} [AP]M, \w+ \d{1,2}, \d{4}\*', lines[i]):
64+
updated_lines.append(timestamp_line)
65+
else:
66+
updated_lines[author_line_index + 1] = timestamp_line
67+
if i == author_line_index + 2: updated_lines.append('\n')
68+
69+
updated_lines.extend(lines[i:])
70+
else:
71+
# If timestamp line does not exist and there are no blank lines, add it below author line
72+
updated_lines += [timestamp_line, '\n'] + lines[author_line_index + 1:]
73+
else:
74+
# If author line is not found, add timestamp to the last line
75+
updated_lines = lines
76+
77+
if file_path.endswith('.py'): timestamp_line = '# ' + timestamp_line
78+
79+
i = len(lines) - 1
80+
while i >= 0 and lines[i].strip() == '':
81+
# Go to the last non-blank line, check if it is the timestamp
82+
i -= 1
83+
84+
if i >= 0 and re.search(r'\*\*Updated:\*\*\s\**\d{1,2}:\d{2} [AP]M, \w+ \d{1,2}, \d{4}\*', lines[i]):
85+
updated_lines[i] = timestamp_line
86+
else:
87+
updated_lines.append(f'\n\n{timestamp_line}')
88+
89+
# Write updated lines back to file
90+
with open(file_path, 'w') as file:
91+
file.writelines(updated_lines)
92+
93+
94+
file_path = sys.argv[1]
95+
update_timestamp(file_path)

.jenkins/update_timestamps_batch.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
SOURCEDIR=$1
4+
5+
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
6+
7+
directories=("$SOURCEDIR/beginner_source" "$SOURCEDIR/intermediate_source" "$SOURCEDIR/advanced_source")
8+
9+
for dir in "${directories[@]}"; do
10+
# Process .py and .rst files in the current directory
11+
for file in "$dir"/*.{py,rst}; do
12+
if [ -f "$file" ]; then
13+
python "$DIR/update_timestamps.py" "$file"
14+
fi
15+
done
16+
done

0 commit comments

Comments
 (0)