1+ #!/usr/bin/env python3
2+ """Download and process IBM Cloud Terraform white paper from source markdown.
3+
4+ This script downloads the white paper markdown from the IBM Cloud documentation
5+ repository, processes it by replacing template variables with actual values from
6+ keywords.yml, removes Jekyll metadata, and saves the result to static/terraform-white-paper.md.
7+ """
8+
9+ import re
10+ from pathlib import Path
11+ import httpx
12+ import yaml
13+
14+
15+ def load_keyword_mappings ():
16+ """Load keyword mappings from the local keyword.yml file."""
17+ keyword_path = Path (__file__ ).parent .parent / "keywords.yml"
18+
19+ try :
20+ if not keyword_path .exists ():
21+ print (f"Warning: keyword.yml not found at { keyword_path } " )
22+ return {}
23+
24+ with open (keyword_path , 'r' , encoding = 'utf-8' ) as f :
25+ data = yaml .safe_load (f )
26+ # Extract just the keyword mappings
27+ keyword_dict = data .get ('keyword' , {})
28+ print (f"Loaded { len (keyword_dict )} keyword mappings" )
29+ return keyword_dict
30+ except Exception as e :
31+ print (f"Warning: Could not load keyword mappings: { e } " )
32+ return {}
33+
34+
35+ def process_markdown (text , keywords ):
36+ """Process markdown to replace template variables and clean up."""
37+
38+ # Replace {{site.data.keyword.XXX}} with actual values
39+ def replace_keyword (match ):
40+ key = match .group (1 )
41+ replacement = keywords .get (key , match .group (0 ))
42+ return replacement
43+
44+ text = re .sub (r'\{\{site\.data\.keyword\.([^}]+)\}\}' , replace_keyword , text )
45+
46+ # Remove ALL Jekyll metadata blocks starting with {:
47+ # This matches any line that starts with {: and ends with }
48+ text = re .sub (r'\{:[^}]+\}\n?' , '' , text )
49+
50+ # Remove the YAML frontmatter block (between --- delimiters)
51+ text = re .sub (r'^---\n.*?\n---\n' , '' , text , flags = re .DOTALL | re .MULTILINE )
52+
53+ # Strip leading/trailing whitespace
54+ text = text .strip ()
55+
56+ return text
57+
58+
59+ def main ():
60+ """Download and process the IBM Cloud Terraform white paper.
61+
62+ Downloads the white paper from GitHub, replaces template variables,
63+ removes Jekyll metadata, and saves to static/terraform-white-paper.md.
64+ """
65+ markdown_url = "https://raw.githubusercontent.com/ibm-cloud-docs/terraform-on-ibm-cloud/refs/heads/master/white-paper.md"
66+ static_dir = Path (__file__ ).parent .parent / "static"
67+ markdown_path = static_dir / "terraform-white-paper.md"
68+
69+ print ("Loading keyword mappings..." )
70+ keywords = load_keyword_mappings ()
71+
72+ # Download markdown
73+ print (f"Downloading markdown from { markdown_url } ..." )
74+ response = httpx .get (markdown_url , follow_redirects = True )
75+ response .raise_for_status ()
76+
77+ markdown_content = response .text
78+ print (f"Downloaded { len (markdown_content )} characters" )
79+
80+ # Process the markdown
81+ print ("Processing markdown..." )
82+ processed_content = process_markdown (markdown_content , keywords )
83+
84+ # Add header
85+ final_content = f"""# IBM Cloud Terraform Best Practices
86+
87+ *Source: https://cloud.ibm.com/docs/terraform-on-ibm-cloud*
88+
89+ { processed_content }
90+ """
91+
92+ # Save to file
93+ static_dir .mkdir (exist_ok = True )
94+ markdown_path .write_text (final_content , encoding = "utf-8" )
95+ print (f"Created markdown file: { markdown_path } " )
96+ print (f"Final content: { len (final_content )} characters" )
97+
98+
99+ if __name__ == "__main__" :
100+ main ()
0 commit comments