1
1
import json
2
2
import os
3
3
import subprocess
4
- from bs4 import BeautifulSoup
4
+ import sys
5
5
from datetime import datetime
6
6
7
- # Path to the single JSON file
8
- json_file_path = "output.json"
9
-
10
- # Base directory for the generated HTML files
11
- build_dir = "_build/html"
7
+ from bs4 import BeautifulSoup
12
8
13
- # Define the source to build path mapping
9
+ # Check if the build directory is provided as an argument in the Makefile
10
+ if len (sys .argv ) < 2 :
11
+ print ("Error: Build directory not provided. Exiting." )
12
+ exit (1 )
13
+
14
+ build_dir = sys .argv [1 ]
15
+ print (f"Build directory: { build_dir } " )
16
+
17
+ json_file_path = "tutorials-review-data.json"
18
+ build_dir = "_build/html" # for testing after _build/html is created
19
+
20
+ # paths to skip from the post-processing script
21
+ paths_to_skip = [
22
+ "beginner/examples_autograd/two_layer_net_custom_function" , # not present in the repo
23
+ "beginner/examples_nn/two_layer_net_module" , # not present in the repo
24
+ "beginner/examples_tensor/two_layer_net_numpy" , # not present in the repo
25
+ "beginner/examples_tensor/two_layer_net_tensor" , # not present in the repo
26
+ "beginner/examples_autograd/two_layer_net_autograd" , # not present in the repo
27
+ "beginner/examples_nn/two_layer_net_optim" , # not present in the repo
28
+ "beginner/examples_nn/two_layer_net_nn" , # not present in the repo
29
+ "intermediate/coding_ddpg" , # not present in the repo - will delete the carryover
30
+ ]
31
+ # Mapping of source directories to build directories
14
32
source_to_build_mapping = {
15
33
"beginner" : "beginner_source" ,
16
34
"recipes" : "recipes_source" ,
17
35
"distributed" : "distributed" ,
18
36
"intermediate" : "intermediate_source" ,
19
37
"prototype" : "prototype_source" ,
20
- "advanced" : "advanced_source"
38
+ "advanced" : "advanced_source" ,
39
+ "" : "" , # root dir for index.rst
21
40
}
22
41
23
- # Function to get the creation date of a file using git log
42
+
43
+ # Use git log to get the creation date of the file
24
44
def get_creation_date (file_path ):
25
45
try :
26
- # Run git log to get the date of the first commit for the file
27
46
result = subprocess .run (
28
47
["git" , "log" , "--diff-filter=A" , "--format=%aD" , "--" , file_path ],
29
48
capture_output = True ,
30
49
text = True ,
31
- check = True
50
+ check = True ,
32
51
)
33
- # Check if the output is not empty
34
52
if result .stdout :
35
53
creation_date = result .stdout .splitlines ()[0 ]
36
- # Parse and format the date
37
54
creation_date = datetime .strptime (creation_date , "%a, %d %b %Y %H:%M:%S %z" )
38
55
formatted_date = creation_date .strftime ("%d %b, %Y" )
39
56
else :
@@ -42,82 +59,122 @@ def get_creation_date(file_path):
42
59
except subprocess .CalledProcessError :
43
60
return "Unknown"
44
61
45
- # Function to find the source file with any common extension
62
+
63
+ # Use git log to get the last updated date of the file
64
+ def get_last_updated_date (file_path ):
65
+ try :
66
+ result = subprocess .run (
67
+ ["git" , "log" , "-1" , "--format=%aD" , "--" , file_path ],
68
+ capture_output = True ,
69
+ text = True ,
70
+ check = True ,
71
+ )
72
+ if result .stdout :
73
+ last_updated_date = result .stdout .strip ()
74
+ last_updated_date = datetime .strptime (
75
+ last_updated_date , "%a, %d %b %Y %H:%M:%S %z"
76
+ )
77
+ formatted_date = last_updated_date .strftime ("%d %b, %Y" )
78
+ else :
79
+ formatted_date = "Unknown"
80
+ return formatted_date
81
+ except subprocess .CalledProcessError :
82
+ return "Unknown"
83
+
84
+
85
+ # Try to find the source file with the given base path and the extensions .rst and .py
46
86
def find_source_file (base_path ):
47
- for ext in [' .rst' , ' .py' ]:
87
+ for ext in [" .rst" , " .py" ]:
48
88
source_file_path = base_path + ext
49
89
if os .path .exists (source_file_path ):
50
90
return source_file_path
51
91
return None
52
92
53
- # Function to process the JSON file
93
+
94
+ # Function to process a JSON file and insert the "Last Verified" information into the HTML files
54
95
def process_json_file (json_file_path ):
55
96
with open (json_file_path , "r" , encoding = "utf-8" ) as json_file :
56
97
json_data = json .load (json_file )
57
98
58
- # Process each entry in the JSON data
59
99
for entry in json_data :
60
100
path = entry ["Path" ]
61
101
last_verified = entry ["Last Verified" ]
102
+ status = entry .get ("Status" , "" )
103
+ if path in paths_to_skip :
104
+ print (f"Skipping path: { path } " )
105
+ continue
106
+ if status in ["needs update" , "not verified" ]:
107
+ formatted_last_verified = "Not Verified"
108
+ elif last_verified :
109
+ try :
110
+ last_verified_date = datetime .strptime (last_verified , "%Y-%m-%d" )
111
+ formatted_last_verified = last_verified_date .strftime ("%d %b, %Y" )
112
+ except ValueError :
113
+ formatted_last_verified = "Unknown"
114
+ else :
115
+ formatted_last_verified = "Not Verified"
116
+ if status == "deprecated" :
117
+ formatted_last_verified += "Deprecated"
62
118
63
- # Format the "Last Verified" date
64
- try :
65
- last_verified_date = datetime .strptime (last_verified , "%Y-%m-%d" )
66
- formatted_last_verified = last_verified_date .strftime ("%d %b, %Y" )
67
- except ValueError :
68
- formatted_last_verified = "Unknown"
69
-
70
- # Determine the source directory and file name
71
119
for build_subdir , source_subdir in source_to_build_mapping .items ():
72
120
if path .startswith (build_subdir ):
73
- # Construct the path to the HTML file
74
121
html_file_path = os .path .join (build_dir , path + ".html" )
75
- # Construct the base path to the source file
76
- base_source_path = os . path . join ( source_subdir , path [len (build_subdir )+ 1 :])
77
- # Find the actual source file
122
+ base_source_path = os . path . join (
123
+ source_subdir , path [len (build_subdir ) + 1 :]
124
+ )
78
125
source_file_path = find_source_file (base_source_path )
79
126
break
80
127
else :
81
128
print (f"Warning: No mapping found for path { path } " )
82
129
continue
83
130
84
- # Check if the HTML file exists
85
131
if not os .path .exists (html_file_path ):
86
- print (f"Warning: HTML file not found for path { html_file_path } " )
132
+ print (
133
+ f"Warning: HTML file not found for path { html_file_path } ."
134
+ "If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date."
135
+ )
87
136
continue
88
137
89
- # Check if the source file was found
90
138
if not source_file_path :
91
- print (f"Warning: Source file not found for path { base_source_path } " )
139
+ print (f"Warning: Source file not found for path { base_source_path } . " )
92
140
continue
93
141
94
- # Get the creation date of the source file
95
142
created_on = get_creation_date (source_file_path )
143
+ last_updated = get_last_updated_date (source_file_path )
96
144
97
- # Open and parse the HTML file
98
145
with open (html_file_path , "r" , encoding = "utf-8" ) as file :
99
146
soup = BeautifulSoup (file , "html.parser" )
147
+ # Check if the <p> tag with class "date-info-last-verified" already exists
148
+ existing_date_info = soup .find ("p" , {"class" : "date-info-last-verified" })
149
+ if existing_date_info :
150
+ print (
151
+ f"Warning: <p> tag with class 'date-info-last-verified' already exists in { html_file_path } "
152
+ )
153
+ continue
100
154
101
- # Find the first <h1> tag and insert the "Last Verified" and "Created On" dates after it
102
- h1_tag = soup .find ("h1" )
155
+ h1_tag = soup .find ("h1" ) # Find the h1 tag to insert the dates
103
156
if h1_tag :
104
- # Create a new tag for the dates
105
- date_info_tag = soup .new_tag ("p" )
106
- date_info_tag ['style' ] = "color: #6c6c6d; font-size: small;"
107
-
108
- # Add the "Created On" and "Last Verified" information
109
- date_info_tag .string = f"Created On: { created_on } | Last Verified: { formatted_last_verified } "
110
-
157
+ date_info_tag = soup .new_tag ("p" , ** {"class" : "date-info-last-verified" })
158
+ date_info_tag ["style" ] = "color: #6c6c6d; font-size: small;"
159
+ # Add the "Created On", "Last Updated", and "Last Verified" information
160
+ date_info_tag .string = (
161
+ f"Created On: { created_on } | "
162
+ f"Last Updated: { last_updated } | "
163
+ f"Last Verified: { formatted_last_verified } "
164
+ )
111
165
# Insert the new tag after the <h1> tag
112
166
h1_tag .insert_after (date_info_tag )
113
-
114
- # Save the modified HTML back to the file
167
+ # Save back to the HTML.
115
168
with open (html_file_path , "w" , encoding = "utf-8" ) as file :
116
169
file .write (str (soup ))
117
170
else :
118
171
print (f"Warning: <h1> tag not found in { html_file_path } " )
119
172
120
- # Process the single JSON file
121
- process_json_file (json_file_path )
122
173
123
- print ("Processing complete." )
174
+ process_json_file (json_file_path )
175
+ print (
176
+ f"Finished processing JSON file. Please check the output for any warnings. "
177
+ "Pages like `nlp/index.html` are generated only during the full `make docs` "
178
+ "or `make html` build. Warnings about these files when you run `make html-noplot` "
179
+ "can be ignored."
180
+ )
0 commit comments