Skip to content

Commit b684602

Browse files
committed
Optimize compiler options hardening scraper/parser.
Signed-off-by: sueszli <[email protected]>
1 parent 02bbed2 commit b684602

File tree

6 files changed

+96
-176
lines changed

6 files changed

+96
-176
lines changed

docs/Compiler-Hardening-Guides/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ The objective of compiler options hardening is to produce application binaries (
99

1010
## Usage in Tools
1111

12-
A python script is also provided (in the `compiler-options-scraper` directory) that can fetch the latest version of the OpenSSF compiler hardening guide from the internet, obtain the recommended options tables from it and convert them to a machine readable JSON for usage in tools.
12+
A python script is also provided that can fetch the latest version of the OpenSSF compiler hardening guide from the internet, obtain the recommended options tables from it and convert them to a machine readable JSON for usage in tools.
1313

1414
## How to Contribute
1515

docs/Compiler-Hardening-Guides/compiler-options-scraper/.gitignore

Lines changed: 0 additions & 1 deletion
This file was deleted.

docs/Compiler-Hardening-Guides/compiler-options-scraper/README.md

Lines changed: 0 additions & 17 deletions
This file was deleted.

docs/Compiler-Hardening-Guides/compiler-options-scraper/main.py

Lines changed: 0 additions & 149 deletions
This file was deleted.

docs/Compiler-Hardening-Guides/compiler-options-scraper/requirements.txt

Lines changed: 0 additions & 8 deletions
This file was deleted.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#
2+
# Usage: `uv run get_options.py > options.json`
3+
# See: https://docs.astral.sh/uv/guides/scripts/#declaring-script-dependencies
4+
#
5+
#
6+
# /// script
7+
# requires-python = ">=3.10"
8+
# dependencies = [
9+
# "requests==2.32.4",
10+
# "markdown==3.6",
11+
# "beautifulsoup4==4.12.3",
12+
# ]
13+
# ///
14+
15+
import json
16+
import re
17+
import sys
18+
from pathlib import Path
19+
from typing import Any, Dict, List, Tuple
20+
21+
import markdown
22+
import requests
23+
from bs4 import BeautifulSoup
24+
25+
__author__ = "Yahya Jabary"
26+
__copyright__ = "The OpenSSF Best Practices WG"
27+
__license__ = "Apache-2.0"
28+
29+
30+
def extract_versions(input_string: str) -> Dict[str, str]:
31+
version_patterns = {
32+
"gcc": r"GCC\s+(\d+\.\d+\.\d)",
33+
"clang": r"Clang\s+(\d+\.\d+\.\d)",
34+
"binutils": r"Binutils\s+(\d+\.\d+\.\d)",
35+
"libc++": r"libc\+\+\s+(\d+\.\d+\.\d)",
36+
"libstdc++": r"libstdc\+\+\s+(\d+\.\d+\.\d)",
37+
}
38+
return {key: match.group(1) for key, pattern in version_patterns.items() if (match := re.search(pattern, input_string))}
39+
40+
41+
def get_desc_preq_pair(desc: str) -> Tuple[str, str]:
42+
split_index = desc.find("Requires")
43+
return (desc[:split_index], desc[split_index:]) if split_index != -1 else (desc, "")
44+
45+
46+
def create_option_dict(row_data: Dict[str, str]) -> Dict[str, Any]:
47+
description, prerequisite = get_desc_preq_pair(row_data["Description"])
48+
49+
option_dict = {
50+
"option": row_data["Compiler Flag"],
51+
"description": description,
52+
"requires": extract_versions(row_data["Supported since"]),
53+
}
54+
55+
if prerequisite:
56+
option_dict["prerequisite"] = prerequisite
57+
58+
return option_dict
59+
60+
61+
def table_to_dict(table: BeautifulSoup) -> List[Dict[str, Any]]:
62+
headers = [header.get_text().strip() for header in table.find_all("th")]
63+
rows = table.find_all("tr")[1:]
64+
65+
header_value_dicts = [dict(zip(headers, [cell.get_text().strip() for cell in row.find_all("td")])) for row in rows]
66+
67+
return [create_option_dict(row_data) for row_data in header_value_dicts]
68+
69+
70+
def get_content() -> str:
71+
filename = "Compiler-Options-Hardening-Guide-for-C-and-C++.md"
72+
cwd_files = list(Path().cwd().glob(filename))
73+
if cwd_files:
74+
return cwd_files[0].read_text()
75+
76+
# remote fallback if not found in current working directory
77+
fallback = "https://raw.githubusercontent.com/ossf/wg-best-practices-os-developers/refs/heads/main/docs/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C%2B%2B.md"
78+
response = requests.get(fallback)
79+
assert response.status_code == 200
80+
return response.text
81+
82+
83+
if __name__ == "__main__":
84+
content = get_content()
85+
86+
html = markdown.markdown(content, extensions=["tables"])
87+
soup = BeautifulSoup(html, "html.parser")
88+
tables = soup.find_all("table")
89+
90+
version = re.search(r"\b\d{4}-\d{2}-\d{2}\b", content).group(0)
91+
compile_time_options = table_to_dict(tables[1])
92+
runtime_options = table_to_dict(tables[2])
93+
94+
output = {"version": version, "options": compile_time_options + runtime_options}
95+
json.dump(output, fp=sys.stdout, indent=4)

0 commit comments

Comments
 (0)