diff --git a/.github/workflows/check-files.yml b/.github/workflows/check-files.yml index 9a71754d8..219e3dbfe 100644 --- a/.github/workflows/check-files.yml +++ b/.github/workflows/check-files.yml @@ -32,23 +32,23 @@ jobs: git fetch --no-tags --depth=1 origin master git switch master - - name: Run check-files.sh + - name: Run check_files.py run: | # Disable color output export NO_COLOR=true # Run the test - bin/delta.sh -a master -b pr -- ./bin/check-files.sh + bin/delta.sh -a master -b pr -- ./bin/check_files.py # Prepare summary WARNING_COUNT=$(wc -l < _new-warn.log) if [ "$WARNING_COUNT" -gt "0" ]; then - echo "New unreachable files found with check-files.sh:" + echo "New unreachable files found with check_files.py:" echo '```text' cat _new-warn.log echo '```' else - echo "No new unreachable files found with check-files.sh" + echo "No new unreachable files found with check_files.py" fi >> "$GITHUB_STEP_SUMMARY" # Prepare the artifacts diff --git a/bin/check-files.sh b/bin/check-files.sh deleted file mode 100755 index 2832c4e13..000000000 --- a/bin/check-files.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# A script to check for, and list, files that exist in the source directory that -# meet both of the following conditions: -# -# 1. Are not rst files. -# 2. Are not referenced by any other files in the source directory. - -find "source/" ! -name '*.rst' ! -name 'version.txt' -type f -print0 | - while IFS= read -r -d '' file_name; do - if ! rg -q -i "$(basename "$file_name")" "source/"; then - echo "$file_name" - fi - done diff --git a/bin/check_files.py b/bin/check_files.py new file mode 100755 index 000000000..5884cf1d1 --- /dev/null +++ b/bin/check_files.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +"""Tool to check that all files are being used + +SPDX-License-Identifier: MIT +Copyright (C) 2025 Texas Instruments Incorporated - https://www.ti.com +""" + +import logging +import re +from pathlib import Path + +logger = logging.getLogger(__name__) + +SOURCE_PATH = Path("source/") +RST_SOURCE = set(SOURCE_PATH.glob("**/*.rst")) +IGNORED = re.compile(r"([^_].*\.rst)|(version\.txt)") + + +def get_names(base): + """Get a set of file names to check for, ignoring anything in that matches the IGNORED regex. + + :param base: Pathlib path to directory to search + :return: Set of string path names + """ + files_to_check = set() + for file in base.glob("**/*"): + if file.is_dir(): + continue + + name = file.name + if IGNORED.match(name): + logger.debug("Ignored: %s", name) + continue + + files_to_check.add(name) + return files_to_check + + +def check_file(string, file): + """Check to see if the given string appears in the file. + + :param string: String to look up + :param file: Pathlib path to file + :return: Boolean based on presence of string + """ + pattern = re.compile(re.escape(string)) + text = file.read_text(encoding="utf-8") + for _ in pattern.finditer(text): + return True + return False + + +def check_all(string): + """Use an scan for any matches in RST_SOURCE files. Do not look for matches in the file itself. + That last bit is particularly relevant for RST files that exist to be included in other files. + + :param string: String to look up + :return: Boolean based on presence of string in any other files + """ + for file in RST_SOURCE: + if file == string: + continue + + if check_file(string, file): + return True + return False + + +def main(): + """Main CLI entrypoint""" + logging.basicConfig(level=logging.INFO) + + files_to_check = get_names(SOURCE_PATH) + for filename in files_to_check: + if check_all(filename): + continue + logging.info("File not used: %s", filename) + + +if __name__ == "__main__": + main()