11import json
22import os
33import subprocess
4- from bs4 import BeautifulSoup
4+ import sys
55from datetime import datetime
66
7- # Path to the single JSON file
8- json_file_path = "output.json"
9-
10- # Base directory for the generated HTML files
11- build_dir = "_build/html"
7+ from bs4 import BeautifulSoup
128
13- # Define the source to build path mapping
9+ # Check if the build directory is provided as an argument in the Makefile
10+ if len (sys .argv ) < 2 :
11+ print ("Error: Build directory not provided. Exiting." )
12+ exit (1 )
13+
14+ build_dir = sys .argv [1 ]
15+ print (f"Build directory: { build_dir } " )
16+
17+ json_file_path = "tutorials-review-data.json"
18+ build_dir = "_build/html" # for testing after _build/html is created
19+
20+ # paths to skip from the post-processing script
21+ paths_to_skip = [
22+ "beginner/examples_autograd/two_layer_net_custom_function" , # not present in the repo
23+ "beginner/examples_nn/two_layer_net_module" , # not present in the repo
24+ "beginner/examples_tensor/two_layer_net_numpy" , # not present in the repo
25+ "beginner/examples_tensor/two_layer_net_tensor" , # not present in the repo
26+ "beginner/examples_autograd/two_layer_net_autograd" , # not present in the repo
27+ "beginner/examples_nn/two_layer_net_optim" , # not present in the repo
28+ "beginner/examples_nn/two_layer_net_nn" , # not present in the repo
29+ "intermediate/coding_ddpg" , # not present in the repo - will delete the carryover
30+ ]
31+ # Mapping of source directories to build directories
1432source_to_build_mapping = {
1533 "beginner" : "beginner_source" ,
1634 "recipes" : "recipes_source" ,
1735 "distributed" : "distributed" ,
1836 "intermediate" : "intermediate_source" ,
1937 "prototype" : "prototype_source" ,
20- "advanced" : "advanced_source"
38+ "advanced" : "advanced_source" ,
39+ "" : "" , # root dir for index.rst
2140}
2241
23- # Function to get the creation date of a file using git log
42+
43+ # Use git log to get the creation date of the file
2444def get_creation_date (file_path ):
2545 try :
26- # Run git log to get the date of the first commit for the file
2746 result = subprocess .run (
2847 ["git" , "log" , "--diff-filter=A" , "--format=%aD" , "--" , file_path ],
2948 capture_output = True ,
3049 text = True ,
31- check = True
50+ check = True ,
3251 )
33- # Check if the output is not empty
3452 if result .stdout :
3553 creation_date = result .stdout .splitlines ()[0 ]
36- # Parse and format the date
3754 creation_date = datetime .strptime (creation_date , "%a, %d %b %Y %H:%M:%S %z" )
3855 formatted_date = creation_date .strftime ("%d %b, %Y" )
3956 else :
@@ -42,82 +59,122 @@ def get_creation_date(file_path):
4259 except subprocess .CalledProcessError :
4360 return "Unknown"
4461
45- # Function to find the source file with any common extension
62+
63+ # Use git log to get the last updated date of the file
64+ def get_last_updated_date (file_path ):
65+ try :
66+ result = subprocess .run (
67+ ["git" , "log" , "-1" , "--format=%aD" , "--" , file_path ],
68+ capture_output = True ,
69+ text = True ,
70+ check = True ,
71+ )
72+ if result .stdout :
73+ last_updated_date = result .stdout .strip ()
74+ last_updated_date = datetime .strptime (
75+ last_updated_date , "%a, %d %b %Y %H:%M:%S %z"
76+ )
77+ formatted_date = last_updated_date .strftime ("%d %b, %Y" )
78+ else :
79+ formatted_date = "Unknown"
80+ return formatted_date
81+ except subprocess .CalledProcessError :
82+ return "Unknown"
83+
84+
85+ # Try to find the source file with the given base path and the extensions .rst and .py
4686def find_source_file (base_path ):
47- for ext in [' .rst' , ' .py' ]:
87+ for ext in [" .rst" , " .py" ]:
4888 source_file_path = base_path + ext
4989 if os .path .exists (source_file_path ):
5090 return source_file_path
5191 return None
5292
53- # Function to process the JSON file
93+
94+ # Function to process a JSON file and insert the "Last Verified" information into the HTML files
5495def process_json_file (json_file_path ):
5596 with open (json_file_path , "r" , encoding = "utf-8" ) as json_file :
5697 json_data = json .load (json_file )
5798
58- # Process each entry in the JSON data
5999 for entry in json_data :
60100 path = entry ["Path" ]
61101 last_verified = entry ["Last Verified" ]
102+ status = entry .get ("Status" , "" )
103+ if path in paths_to_skip :
104+ print (f"Skipping path: { path } " )
105+ continue
106+ if status in ["needs update" , "not verified" ]:
107+ formatted_last_verified = "Not Verified"
108+ elif last_verified :
109+ try :
110+ last_verified_date = datetime .strptime (last_verified , "%Y-%m-%d" )
111+ formatted_last_verified = last_verified_date .strftime ("%d %b, %Y" )
112+ except ValueError :
113+ formatted_last_verified = "Unknown"
114+ else :
115+ formatted_last_verified = "Not Verified"
116+ if status == "deprecated" :
117+ formatted_last_verified += "Deprecated"
62118
63- # Format the "Last Verified" date
64- try :
65- last_verified_date = datetime .strptime (last_verified , "%Y-%m-%d" )
66- formatted_last_verified = last_verified_date .strftime ("%d %b, %Y" )
67- except ValueError :
68- formatted_last_verified = "Unknown"
69-
70- # Determine the source directory and file name
71119 for build_subdir , source_subdir in source_to_build_mapping .items ():
72120 if path .startswith (build_subdir ):
73- # Construct the path to the HTML file
74121 html_file_path = os .path .join (build_dir , path + ".html" )
75- # Construct the base path to the source file
76- base_source_path = os . path . join ( source_subdir , path [len (build_subdir )+ 1 :])
77- # Find the actual source file
122+ base_source_path = os . path . join (
123+ source_subdir , path [len (build_subdir ) + 1 :]
124+ )
78125 source_file_path = find_source_file (base_source_path )
79126 break
80127 else :
81128 print (f"Warning: No mapping found for path { path } " )
82129 continue
83130
84- # Check if the HTML file exists
85131 if not os .path .exists (html_file_path ):
86- print (f"Warning: HTML file not found for path { html_file_path } " )
132+ print (
133+ f"Warning: HTML file not found for path { html_file_path } ."
134+ "If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date."
135+ )
87136 continue
88137
89- # Check if the source file was found
90138 if not source_file_path :
91- print (f"Warning: Source file not found for path { base_source_path } " )
139+ print (f"Warning: Source file not found for path { base_source_path } . " )
92140 continue
93141
94- # Get the creation date of the source file
95142 created_on = get_creation_date (source_file_path )
143+ last_updated = get_last_updated_date (source_file_path )
96144
97- # Open and parse the HTML file
98145 with open (html_file_path , "r" , encoding = "utf-8" ) as file :
99146 soup = BeautifulSoup (file , "html.parser" )
147+ # Check if the <p> tag with class "date-info-last-verified" already exists
148+ existing_date_info = soup .find ("p" , {"class" : "date-info-last-verified" })
149+ if existing_date_info :
150+ print (
151+ f"Warning: <p> tag with class 'date-info-last-verified' already exists in { html_file_path } "
152+ )
153+ continue
100154
101- # Find the first <h1> tag and insert the "Last Verified" and "Created On" dates after it
102- h1_tag = soup .find ("h1" )
155+ h1_tag = soup .find ("h1" ) # Find the h1 tag to insert the dates
103156 if h1_tag :
104- # Create a new tag for the dates
105- date_info_tag = soup .new_tag ("p" )
106- date_info_tag ['style' ] = "color: #6c6c6d; font-size: small;"
107-
108- # Add the "Created On" and "Last Verified" information
109- date_info_tag .string = f"Created On: { created_on } | Last Verified: { formatted_last_verified } "
110-
157+ date_info_tag = soup .new_tag ("p" , ** {"class" : "date-info-last-verified" })
158+ date_info_tag ["style" ] = "color: #6c6c6d; font-size: small;"
159+ # Add the "Created On", "Last Updated", and "Last Verified" information
160+ date_info_tag .string = (
161+ f"Created On: { created_on } | "
162+ f"Last Updated: { last_updated } | "
163+ f"Last Verified: { formatted_last_verified } "
164+ )
111165 # Insert the new tag after the <h1> tag
112166 h1_tag .insert_after (date_info_tag )
113-
114- # Save the modified HTML back to the file
167+ # Save back to the HTML.
115168 with open (html_file_path , "w" , encoding = "utf-8" ) as file :
116169 file .write (str (soup ))
117170 else :
118171 print (f"Warning: <h1> tag not found in { html_file_path } " )
119172
120- # Process the single JSON file
121- process_json_file (json_file_path )
122173
123- print ("Processing complete." )
174+ process_json_file (json_file_path )
175+ print (
176+ f"Finished processing JSON file. Please check the output for any warnings. "
177+ "Pages like `nlp/index.html` are generated only during the full `make docs` "
178+ "or `make html` build. Warnings about these files when you run `make html-noplot` "
179+ "can be ignored."
180+ )
0 commit comments