chore: add mypy and pytest CI step (#275)

gruebel · web-flow · commit 7a78eed6568f · 2024-11-27T12:17:01.000-05:00
Signed-off-by: gruebel &lt;anton.gruebel@gmail.com&gt;
diff --git a/.github/workflows/pr-python.yaml b/.github/workflows/pr-python.yaml
@@ -30,3 +30,9 @@ jobs:
       - name: Lint
         working-directory: ${{ env.WORKING_DIR }}
         run: ruff check
+      - name: Typing
+        working-directory: ${{ env.WORKING_DIR }}
+        run: mypy
+      - name: Unit tests
+        working-directory: ${{ env.WORKING_DIR }}
+        run: pytest
diff --git a/tools/repo_parser/pyproject.toml b/tools/repo_parser/pyproject.toml
@@ -1,3 +1,9 @@
+[tool.mypy]
+files = "spec_finder.py"
+local_partial_types = true # will become the new default from version 2
+pretty = true
+strict = true
+
 [tool.ruff]
 line-length = 120
 target-version = "py312"
diff --git a/tools/repo_parser/spec_finder.py b/tools/repo_parser/spec_finder.py
@@ -12,12 +12,23 @@
 
 class Config(TypedDict):
     file_extension: str
-    multiline_regex: str | None
-    number_subregex: str | None
-    text_subregex: str | None
+    multiline_regex: str
+    number_subregex: str
+    text_subregex: str
     inline_comment_prefix: str | None
 
 
+Report = TypedDict(
+    'Report',
+    {
+        'extra': list[str],
+        'missing': list[str],
+        'different-text': list[str],
+        'good': list[str],
+    },
+)
+
+
 def _demarkdown(t: str) -> str:
     return t.replace('**', '').replace('`', '').replace('"', '')
 
@@ -47,7 +58,6 @@ def get_spec_parser(code_dir: str) -> Config:
 def get_spec(force_refresh: bool = False, path_prefix: str = './') -> dict[str, Any]:
     spec_path = os.path.join(path_prefix, 'specification.json')
     print('Going to look in ', spec_path)
-    data = ''
     if os.path.exists(spec_path) and not force_refresh:
         with open(spec_path) as f:
             data = ''.join(f.readlines())
@@ -62,25 +72,29 @@ def get_spec(force_refresh: bool = False, path_prefix: str = './') -> dict[str,
         data = ''.join(raw)
         with open(spec_path, 'w') as f:
             f.write(data)
-    return json.loads(data)
+    return cast('dict[str, Any]', json.loads(data))
 
 
 def specmap_from_file(actual_spec: dict[str, Any]) -> dict[str, str]:
     spec_map = {}
     for entry in actual_spec['rules']:
-        number = re.search(r'[\d.]+', entry['id']).group()
         if 'requirement' in entry['machine_id']:
-            spec_map[number] = _demarkdown(entry['content'])
+            if number := re.search(r'[\d.]+', entry['id']):
+                spec_map[number.group()] = _demarkdown(entry['content'])
+            else:
+                print(f'Skipping invalid ID {entry["id"]}')
 
-        if len(entry['children']) > 0:
+        if entry['children']:
             for ch in entry['children']:
-                number = re.search(r'[\d.]+', ch['id']).group()
                 if 'requirement' in ch['machine_id']:
-                    spec_map[number] = _demarkdown(ch['content'])
+                    if number := re.search(r'[\d.]+', ch['id']):
+                        spec_map[number.group()] = _demarkdown(ch['content'])
+                    else:
+                        print(f'Skipping invalid child ID {ch["id"]}')
     return spec_map
 
 
-def find_covered_specs(config: Config, data: str) -> dict[str, dict[str, str]]:
+def find_covered_specs(config: Config, data: str) -> dict[str, str]:
     repo_specs = {}
     for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL):
         match = match.replace('\n', '').replace(config['inline_comment_prefix'], '')
@@ -93,16 +107,13 @@ def find_covered_specs(config: Config, data: str) -> dict[str, dict[str, str]]:
             text = ''.join(text_with_concat_chars).strip()
             # We have to match for ") to capture text with parens inside, so we add the trailing " back in.
             text = _demarkdown(eval('"%s"' % text))
-            entry = repo_specs[number] = {
-                'number': number,
-                'text': text,
-            }
+            repo_specs[number] = text
         except Exception as e:
             print(f"Skipping {match} b/c we couldn't parse it")
     return repo_specs
 
 
-def gen_report(from_spec: dict[str, str], from_repo: dict[str, dict[str, str]]) -> dict[str, set[str]]:
+def gen_report(from_spec: dict[str, str], from_repo: dict[str, str]) -> Report:
     extra = set()
     different_text = set()
     good = set()
@@ -121,34 +132,26 @@ def gen_report(from_spec: dict[str, str], from_repo: dict[str, dict[str, str]])
             different_text.add(number)
 
     return {
-        'extra': extra,
-        'missing': missing,
-        'different-text': different_text,
-        'good': good,
+        'extra': sorted(extra),
+        'missing': sorted(missing),
+        'different-text': sorted(different_text),
+        'good': sorted(good),
     }
 
 
 def main(
+    code_directory: str,
     refresh_spec: bool = False,
     diff_output: bool = False,
     limit_numbers: str | None = None,
-    code_directory: str | None = None,
     json_report: bool = False,
 ) -> None:
-    report = {
-        'extra': set(),
-        'missing': set(),
-        'different-text': set(),
-        'good': set(),
-    }
-
     actual_spec = get_spec(refresh_spec, path_prefix=code_directory)
     config = get_spec_parser(code_directory)
 
     spec_map = specmap_from_file(actual_spec)
 
-    repo_specs = {}
-    missing = set(spec_map.keys())
+    repo_specs: dict[str, str] = {}
     bad_num = 0
 
     for root, dirs, files in os.walk('.', topdown=False):
@@ -179,14 +182,12 @@ def main(
 
     missing = report['missing']
     bad_num += len(missing)
-    if len(missing) > 0:
+    if missing:
         print('In the spec, but not in our tests: ')
         for m in sorted(missing):
             print(f'  {m}: {spec_map[m]}')
 
     if json_report:
-        for k in report.keys():
-            report[k] = sorted(list(report[k]))
         report_txt = json.dumps(report, indent=4)
         loc = os.path.join(code_directory, '%s-report.json' % config['file_extension'])
         with open(loc, 'w') as f:
@@ -206,9 +207,9 @@ def main(
 
     args = parser.parse_args()
     main(
+        code_directory=args.code_directory,
         refresh_spec=args.refresh_spec,
         diff_output=args.diff_output,
         limit_numbers=args.specific_numbers,
-        code_directory=args.code_directory,
         json_report=args.json_report,
     )
diff --git a/tools/repo_parser/test_spec_finder.py b/tools/repo_parser/test_spec_finder.py
@@ -1,11 +1,12 @@
-from spec_finder import find_covered_specs, gen_report
+from spec_finder import Config, find_covered_specs, gen_report, specmap_from_file
 
 
 def test_simple_singleline():
     text = """
     // spec:4.3.6:The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value.:end
     """
-    cfg = {
+    cfg: Config = {
+        'file_extension': 'rust',
         'multiline_regex': r'spec:(.*):end',
         'number_subregex': r'(?P<number>[\d.]+):',
         'text_subregex': r'[\d.]+:(.*)',
@@ -14,7 +15,7 @@ def test_simple_singleline():
     output = find_covered_specs(cfg, text)
     assert '4.3.6' in output
     assert (
-        output['4.3.6']['text']
+        output['4.3.6']
         == 'The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value.'
     )
 
@@ -26,7 +27,8 @@ def test_multiline_comment():
     // context (required), exception representing what went wrong (required), and
     // hook hints (optional). It has no return value.:end
     """
-    cfg = {
+    cfg: Config = {
+        'file_extension': 'rust',
         'multiline_regex': r'spec:(.*):end',
         'number_subregex': r'(?P<number>[\d.]+):',
         'text_subregex': r'[\d.]+:(.*)',
@@ -35,7 +37,7 @@ def test_multiline_comment():
     output = find_covered_specs(cfg, text)
     assert '4.3.7' in output
     assert (
-        output['4.3.7']['text']
+        output['4.3.7']
         == """The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value."""
     )
 
@@ -59,7 +61,68 @@ def test_report():
     assert len(report['missing']) == 1
     assert len(report['extra']) == 1
 
-    assert report['good'] == set(['1.2.3'])
-    assert report['different-text'] == set(['2.3.4'])
-    assert report['missing'] == set(['3.4.5'])
-    assert report['extra'] == set(['4.5.6'])
+    assert report['good'] == ['1.2.3']
+    assert report['different-text'] == ['2.3.4']
+    assert report['missing'] == ['3.4.5']
+    assert report['extra'] == ['4.5.6']
+
+
+def test_report_with_found_spec():
+    spec = {
+        '4.3.6': 'good text',
+    }
+    text = """
+        // spec:4.3.6:good text:end
+        """
+    cfg: Config = {
+        'file_extension': 'rust',
+        'multiline_regex': r'spec:(.*):end',
+        'number_subregex': r'(?P<number>[\d.]+):',
+        'text_subregex': r'[\d.]+:(.*)',
+        'inline_comment_prefix': '//',
+    }
+    output = find_covered_specs(cfg, text)
+    report = gen_report(spec, output)
+
+    assert report['good'] == ['4.3.6']
+
+
+def test_specmap_from_file():
+    actual_spec = {
+        'rules': [
+            {
+                'id': 'Requirement 1.1.1',
+                'machine_id': 'requirement_1_1_1',
+                'content': 'The `API`, and any state it maintains SHOULD exist as a global singleton, even in cases wherein multiple versions of the `API` are present at runtime.',
+                'RFC 2119 keyword': 'SHOULD',
+                'children': [],
+            },
+            {
+                'id': 'Condition 2.2.2',
+                'machine_id': 'condition_2_2_2',
+                'content': 'The implementing language type system differentiates between strings, numbers, booleans and structures.',
+                'RFC 2119 keyword': None,
+                'children': [
+                    {
+                        'id': 'Conditional Requirement 2.2.2.1',
+                        'machine_id': 'conditional_requirement_2_2_2_1',
+                        'content': 'The `feature provider` interface MUST define methods for typed flag resolution, including boolean, numeric, string, and structure.',
+                        'RFC 2119 keyword': 'MUST',
+                        'children': [],
+                    }
+                ],
+            },
+        ]
+    }
+
+    spec_map = specmap_from_file(actual_spec)
+
+    assert len(spec_map) == 2
+    assert (
+        spec_map['1.1.1']
+        == 'The API, and any state it maintains SHOULD exist as a global singleton, even in cases wherein multiple versions of the API are present at runtime.'
+    )
+    assert (
+        spec_map['2.2.2.1']
+        == 'The feature provider interface MUST define methods for typed flag resolution, including boolean, numeric, string, and structure.'
+    )