diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ab5ca4..f5e881d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,3 +45,4 @@ repos: - . args: [--no-strict-optional, --ignore-missing-imports, --show-error-codes] + exclude: tests/ diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index aa2203c..f9ef80c 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -5,3 +5,10 @@ language: python types: [text] stages: [commit, push, manual] +- id: arabic-presentation-form + name: Arabic Presentation Form Normalizer + description: Replaces Arabic Presentation for and other contextual forms to default. + entry: arabic-presentation-form + language: python + types: [text] + stages: [commit, push, manual] diff --git a/.vscode/launch.json b/.vscode/launch.json index 4edae3e..9cf2d40 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "configurations": [ { "name": "pyModule", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pre_commit_hooks.check_header_footer", "console": "integratedTerminal", @@ -14,13 +14,14 @@ }, { "name": "pytest", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "console": "integratedTerminal", "args": [ "--no-cov", // disable as it affects breakpoints - "-vv", "-k", + "-vv", + "-k", "" // add test function name here ], "justMyCode": true diff --git a/.vscode/settings.json b/.vscode/settings.json index ea3674f..8d264d4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,8 +3,8 @@ "editor.wordBasedSuggestions": "off", "editor.defaultFormatter": "charliermarsh.ruff", "editor.codeActionsOnSave": { - "source.fixAll": "explicit", - "source.organizeImports": "explicit" + "source.fixAll": "always", + "source.organizeImports": "always" } } } diff --git a/README.md b/README.md index b9851fd..41f6829 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,35 @@ repos: Future work: 1. Support a year parameter that can be used to apply fixes. + +### arabic-presentation-form + +Replace characters in Arabic Presentation form (A or B), and convert them into 'default' unicode characters. +One application is when using the 'Scheherazade New' font, and it does not support these characters. + +Arguments: +- `--excluded-chars`: Regex of characters to exclude from being fixed. +- `--custom-rules`: Rules to update or override the tools inbuilt configuration. Format and example below: + ```json + "RuleName": {"rule": {"ReplacementCharacter(s)": "RegexOfApplicableCharacter(s)"}} + "ʾalif": {"rule": {"\u0627": "(\ufe8d|\ufe8e)"}}, + ``` + +Example where we are extending the applicable file types and using a specific folder (all subfolders under `site/data`) + +```yaml +repos: + - repo: https://github.com/adehad/pre-commit-hooks + rev: main + hooks: + - id: arabic-presentation-form + entry: arabic-presentation-form + language: python + types_or: [text, json, markdown] + args: [--excluded-chars, (ﷺ)] + files: ^site/data/ +``` + ## Local Installation ```console diff --git a/pyproject.toml b/pyproject.toml index c1e164f..380bde4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,15 +6,13 @@ build-backend = "hatchling.build" name = "pre_commit_hooks" description = "A selection of pre-commit hooks for pre-commit.com." readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" license = "GPL-3.0-or-later" keywords = ["pre-commit"] authors = [{ name = "adehad", email = "26027314+adehad@users.noreply.github.com" }] classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", @@ -30,6 +28,7 @@ Source = "https://github.com/adehad/pre-commit-hooks/" [project.scripts] check-header-footer = "pre_commit_hooks.check_header_footer:main" +arabic-presentation-form = "pre_commit_hooks.arabic_presentation_form:main" [tool.hatch.build] sources = ["src"] @@ -66,18 +65,20 @@ lint = "python -m pre_commit run --color=always {args:--all-files}" # Tests ######################################################################################## [[tool.hatch.envs.test.matrix]] -python = ["38", "39", "310", "311"] +python = ["310", "311"] ######################################################################################## # External Tool Config ######################################################################################## [tool.mypy] -python_version = 3.8 +python_version = '3.10' +strict = true ignore_missing_imports = true namespace_packages = true show_error_codes = true strict_optional = true warn_unused_configs = true +exclude = ["tests/"] [tool.coverage.run] branch = true @@ -87,7 +88,7 @@ omit = ["src/pre_commit_hooks/__about__.py"] [tool.coverage.report] exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] -[tool.ruff] +[tool.ruff.lint] select = [ "E", # pycodestyle "W", # pycodestyle @@ -99,22 +100,23 @@ select = [ ] ignore = [] -# Same as Black. -line-length = 88 - # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.8. (minimum supported) -target-version = "py38" +[tool.ruff] +# Same as Black. +line-length = 88 + +# Assume Python 3.10. (minimum supported) +target-version = "py310" # The source code paths to consider, e.g., when resolving first- vs. third-party imports src = ["pre_commit_hooks", "tests"] -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["pre_commit_hooks", "tests"] required-imports = ["from __future__ import annotations"] -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] # Use Google-style docstrings. convention = "google" diff --git a/src/pre_commit_hooks/arabic_presentation_form/__init__.py b/src/pre_commit_hooks/arabic_presentation_form/__init__.py new file mode 100644 index 0000000..7ac63fb --- /dev/null +++ b/src/pre_commit_hooks/arabic_presentation_form/__init__.py @@ -0,0 +1,185 @@ +"""Arabic Presentation Form.""" + +from __future__ import annotations + +import functools +import pathlib +import re +import sys +from collections.abc import Sequence +from typing import Any + +from ..util import ( + ABCArgs, + ABCHook, + ExitCode, + HashableDict, + load_json_source, +) +from . import char_map + +sys.stdout.reconfigure( # type: ignore[attr-defined] + encoding="utf-8" # For Windows: we want to be sure to use UTF-8 +) +RulesDict = dict[re.Pattern[Any], str] + + +def apply_rules_to_lines( + line: str, + rules: RulesDict, + exclude: re.Pattern[str], + file_name: pathlib.Path | str, + line_no: str | int, +) -> tuple[ExitCode, str]: + """Check the text for rules. + + Args: + line (str): Line to check the rules. + rules (RulesDict): The rules to check form. + exclude (re.Pattern): characters to exclude from check. + file_name (str): the name of the file being checked. + line_no (int): The line number being checked. + + Returns: + (ExitCode, str): (The PASS/FAIL state, The new line). + """ + exit_code = ExitCode.OK + new_line = exclude.sub(" ", line) # Replace with space to not affect col numbers + + if not char_map.is_contains_non_general_form(max(new_line)): + return exit_code, line + + new_chars: list[str] = [] + exit_code = ExitCode.FAIL + + for col_no, c in enumerate(line, start=1): + new_c = apply_rule(rules=HashableDict(rules), character=c) + new_c_as_unicode_hex = [f"\\u{ord(c):04x}" for c in new_c] + fix_char_loc = ( + f"{file_name}:{line_no}:{col_no} [{new_c} ({new_c_as_unicode_hex})]" + ) + if c != new_c: + output_str = f"[Fixed] {fix_char_loc}" + elif char_map.is_contains_non_general_form(new_c): + output_str = f"[Not Fixed] {fix_char_loc}" + else: + output_str = "" + + if output_str: + print(output_str) + output_str = "" + + new_chars.append(new_c) + + new_line = "".join(new_chars) + + return exit_code, new_line + + +def get_rules(custom_rules: char_map.CHAR_MAP_TYPE) -> RulesDict: + """Return the rules from a given config string. + + Args: + custom_rules (str): Any additional rules to apply. + + Returns: + RulesDict: The compiles rules. + + """ + regex_rules = {} + complete_rules: char_map.CHAR_MAP_TYPE = {} + complete_rules.update(char_map.CHAR_MAP) + complete_rules.update(custom_rules) + for _rule_name, char_mapping_rule in complete_rules.items(): + for expected_out, expected_regex in char_mapping_rule["rule"].items(): + regex_rules.update({re.compile(expected_regex): expected_out}) + return regex_rules + + +@functools.lru_cache +def apply_rule(rules: RulesDict, character: str) -> str: + """Apply the rule from the list of rules to the character. + + Args: + rules (RulesDict): rules to apply for the character. + character (str): The letter/character to check against. + + Returns: + str: The character after applying any rules. + """ + new_char = character + for reg_pattern, replace_char in rules.items(): + if reg_pattern.match(character): + new_char = reg_pattern.sub(replace_char, character) + break + return new_char + + +class ArabicPresentationFormArgs(ABCArgs): + """Args.""" + + excluded_chars: str + custom_rules: char_map.CHAR_MAP_TYPE + + +class ArabicPresentationFormChecker(ABCHook): + """Checker for Header and Footer.""" + + def setup_parser(self) -> None: + """Custom arguments.""" + self.parser.add_argument( + "--excluded-chars", + type=str, + default="", + metavar="exclude-char-regex", + help="Regex for characters to exclude. e.g. (ﷺ)", + ) + self.parser.add_argument( + "--custom-rules", + type=load_json_source, + default=dict(), + metavar="Path-OR-JSON-String", + help=( + '"RuleName": {"rule": {"ReplacementCharacter(s)": "RegexOfApplicableCharacter(s)"}}' # noqa: E501 + '. e.g. "ʾalif": {"rule": {"\u0627": "(\ufe8d|\ufe8e)"}},' # noqa: RUF001 + + ". To exclude a unicode character, overwrite its default entry." + ), + ) + + def implementation( + self, + file_name: pathlib.Path, + args: ArabicPresentationFormArgs, + ) -> ExitCode: + """Check Implementation.""" + exit_code = int(ExitCode.OK) + exclude_regex = re.compile(args.excluded_chars) + + new_file_lines = [] + with file_name.open("r", encoding="utf-8") as f: + for line_no, line in enumerate(iter(f.readlines()), start=1): + intermediate_exit_code, new_line = apply_rules_to_lines( + line=line, + line_no=line_no, + file_name=file_name, + rules=get_rules(args.custom_rules), + exclude=exclude_regex, + ) + exit_code |= intermediate_exit_code + + if char_map.is_contains_non_general_form( + max(exclude_regex.sub("", new_line) or " ") + ): + print(f"Incomplete Fixes Applied: {file_name}:{line_no}") + + new_file_lines.append(new_line) + + with file_name.open("w", encoding="utf-8") as f: + f.writelines(new_file_lines) + return ExitCode(exit_code) + + +def main(argv: Sequence[str] | None = None) -> int: + """Main entrypoint.""" + argparser = ArabicPresentationFormChecker() + return argparser.run(argv=argv) diff --git a/src/pre_commit_hooks/arabic_presentation_form/__main__.py b/src/pre_commit_hooks/arabic_presentation_form/__main__.py new file mode 100644 index 0000000..94ec307 --- /dev/null +++ b/src/pre_commit_hooks/arabic_presentation_form/__main__.py @@ -0,0 +1,8 @@ +"""Arabic Presentation Form Hook.""" + +from __future__ import annotations + +from . import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/pre_commit_hooks/arabic_presentation_form/char_map.py b/src/pre_commit_hooks/arabic_presentation_form/char_map.py new file mode 100644 index 0000000..d5d5176 --- /dev/null +++ b/src/pre_commit_hooks/arabic_presentation_form/char_map.py @@ -0,0 +1,129 @@ +"""Character Mappings. + +In Python so can make comments and still be valid. +""" + +from __future__ import annotations + +import enum + +REMAP_RULE_TYPE = dict[str, dict[str, str]] +CHAR_MAP_TYPE = dict[str, REMAP_RULE_TYPE] + +# spell-checker: disable +# ruff: noqa: E501, RUF001, RUF003 +# fmt: off +CHAR_MAP: CHAR_MAP_TYPE = { + # Contextual + "ʾalif": {"rule": {"\u0627": "(\ufe8d|\ufe8e)"}}, # "ا": "(ﺍ|ﺎ)" + "bāʾ": {"rule": {"\u0628": "(\ufe8f|\ufe90|\ufe92|\ufe91)"}}, # "ب": "(ﺏ|ﺐ|ﺒ|ﺑ)" + "tāʾ": {"rule": {"\u062a": "(\ufe95|\ufe96|\ufe98|\ufe97)"}}, # "ت": "(ﺕ|ﺖ|ﺘ|ﺗ)" + "ṯāʾ": {"rule": {"\u062b": "(\ufe99|\ufe9a|\ufe9c|\ufe9b)"}}, # "ث": "(ﺙ|ﺚ|ﺜ|ﺛ)" + "ǧīm": {"rule": {"\u062c": "(\ufe9d|\ufe9e|\ufea0|\ufe9f)"}}, # "ج": "(ﺝ|ﺞ|ﺠ|ﺟ)" + "ḥāʾ": {"rule": {"\u062d": "(\ufea1|\ufea2|\ufea4|\ufea3)"}}, # "ح": "(ﺡ|ﺢ|ﺤ|ﺣ)" + "ḫāʾ": {"rule": {"\u062e": "(\ufea5|\ufea6|\ufea8|\ufea7)"}}, # "خ": "(ﺥ|ﺦ|ﺨ|ﺧ)" + "dāl": {"rule": {"\u062f": "(\ufea9|\ufeaa)"}}, # "د": "(ﺩ|ﺪ)" + "ḏāl": {"rule": {"\u0630": "(\ufeab|\ufeac)"}}, # "ذ": "(ﺫ|ﺬ)" + "rāʾ": {"rule": {"\u0631": "(\ufead|\ufeae)"}}, # "ر": "(ﺭ|ﺮ)" + "zayn/zāy": {"rule": {"\u0632": "(\ufeaf|\ufeb0)"}}, # "ز": "(ﺯ|ﺰ)" + "sīn": {"rule": {"\u0633": "(\ufeb1|\ufeb2|\ufeb4|\ufeb3)"}}, # "س": "(ﺱ|ﺲ|ﺴ|ﺳ)" + "šīn": {"rule": {"\u0634": "(\ufeb5|\ufeb6|\ufeb8|\ufeb7)"}}, # "ش": "(ﺵ|ﺶ|ﺸ|ﺷ)" + "ṣād": {"rule": {"\u0635": "(\ufeb9|\ufeba|\ufebc|\ufebb)"}}, # "ص": "(ﺹ|ﺺ|ﺼ|ﺻ)" + "ḍād": {"rule": {"\u0636": "(\ufebd|\ufebe|\ufec0|\ufebf)"}}, # "ض": "(ﺽ|ﺾ|ﻀ|ﺿ)" + "ṭāʾ": {"rule": {"\u0637": "(\ufec1|\ufec2|\ufec4|\ufec3)"}}, # "ط": "(ﻁ|ﻂ|ﻄ|ﻃ)" + "ẓāʾ": {"rule": {"\u0638": "(\ufec5|\ufec6|\ufec8|\ufec7)"}}, # "ظ": "(ﻅ|ﻆ|ﻈ|ﻇ)" + "ʿayn": {"rule": {"\u0639": "(\ufec9|\ufeca|\ufecc|\ufecb)"}}, # "ع": "(ﻉ|ﻊ|ﻌ|ﻋ)" + "ġayn": {"rule": {"\u063a": "(\ufecd|\ufece|\ufed0|\ufecf)"}}, # "غ": "(ﻍ|ﻎ|ﻐ|ﻏ)" + "fāʾ": {"rule": {"\u0641": "(\ufed1|\ufed2|\ufed4|\ufed3)"}}, # "ف": "(ﻑ|ﻒ|ﻔ|ﻓ)" + "qāf": {"rule": {"\u0642": "(\ufed5|\ufed6|\ufed8|\ufed7)"}}, # "ق": "(ﻕ|ﻖ|ﻘ|ﻗ)" + "kāf": {"rule": {"\u0643": "(\ufed9|\ufeda|\ufedc|\ufedb)"}}, # "ك": "(ﻙ|ﻚ|ﻜ|ﻛ)" + "lām": {"rule": {"\u0644": "(\ufedd|\ufede|\ufee0|\ufedf)"}}, # "ل": "(ﻝ|ﻞ|ﻠ|ﻟ)" + "mīm": {"rule": {"\u0645": "(\ufee1|\ufee2|\ufee4|\ufee3)"}}, # "م": "(ﻡ|ﻢ|ﻤ|ﻣ)" + "nūn": {"rule": {"\u0646": "(\ufee5|\ufee6|\ufee8|\ufee7)"}}, # "ن": "(ﻥ|ﻦ|ﻨ|ﻧ)" + "hāʾ": {"rule": {"\u0647": "(\ufee9|\ufeea|\ufeec|\ufeeb)"}}, # "ه": "(ﻩ|ﻪ|ﻬ|ﻫ)" + "wāw": {"rule": {"\u0648": "(\ufeed|\ufeee)"}}, # "و": "(ﻭ|ﻮ)" + "yāʾ": {"rule": {"\u064a": "(\ufef0|\ufef1|\ufef2|\ufef4|\ufef3)"}}, # "ي": "(ﻱ|ﻲ|ﻴ|ﻳ)" + # Presentation Form [A or B] + "ʾalif with hamza [B]": {"rule": {"\u0625": "(\ufe87)"}}, # "إ": "(ﺇ)" + "hamza [B]": {"rule": {"\u0621": "(\ufe80)"}}, # "ء": "(ﺀ)" + "yāʾ [B]": {"rule": {"\u064a": "(\ufef0|\ufeef)"}}, # "ي": "(ﻯ|ﻰ)" + "ʾalif lām [B]": {"rule": {"\u0644\u0627": "(\ufefc)"}}, # "لا": "(ﻼ)" +} +""""RuleName": {"rule": {"ReplacementCharacter(s)": "RegexOfApplicableCharacter(s)"}}""" +# fmt: on +# spell-checker: enable + + +@enum.unique +class ArabicUnicodeGroup(enum.Enum): + """Unicode Groups for Arabic Characters as of Unicode 15.1.""" + + Unknown = enum.auto() + """Maybe not Arabic.""" + + Arabic = enum.auto() + """256 characters""" + ArabicSupplement = enum.auto() + """48 characters""" + ArabicExtendedB = enum.auto() + """41 characters""" + ArabicExtendedA = enum.auto() + """96 characters""" + ArabicPresentationFormsA = enum.auto() + """631 characters""" + ArabicPresentationFormsB = enum.auto() + """141 characters""" + RumiNumeralSymbols = enum.auto() + """31 characters""" + ArabicExtendedC = enum.auto() + """3 characters""" + IndicSiyaqNumbers = enum.auto() + """68 characters""" + OttomanSiyaqNumbers = enum.auto() + """61 characters""" + ArabicMathematicalAlphabeticSymbols = enum.auto() + """143 characters""" + + @classmethod + def get_type(cls: type[ArabicUnicodeGroup], input_char: str) -> ArabicUnicodeGroup: + """Return the Arabic Unicode Group.""" + unicode_group = cls.Unknown + if "\u0600" <= input_char <= "\u06ff": + unicode_group = cls.Arabic + elif "\u0750" <= input_char <= "\u077f": + unicode_group = cls.ArabicSupplement + elif "\u0870" <= input_char <= "\u089f": + unicode_group = cls.ArabicExtendedB + elif "\u08a0" <= input_char <= "\u08ff": + unicode_group = cls.ArabicExtendedA + elif "\ufb50" <= input_char <= "\ufdff": + unicode_group = cls.ArabicPresentationFormsA + elif "\ufe70" <= input_char <= "\ufeff": + unicode_group = cls.ArabicPresentationFormsB + elif "\u10e60" <= input_char <= "\u10e7F": + unicode_group = cls.RumiNumeralSymbols + elif "\u10ec0" <= input_char <= "\u10efF": + unicode_group = cls.ArabicExtendedC + elif "\u1ec70" <= input_char <= "\u1ecbF": + unicode_group = cls.IndicSiyaqNumbers + elif "\u1ed00" <= input_char <= "\u1ed4F": + unicode_group = cls.OttomanSiyaqNumbers + elif "\u1ee00" <= input_char <= "\u1eefF": + unicode_group = cls.ArabicMathematicalAlphabeticSymbols + + return unicode_group + + +def is_contains_non_general_form(char: str) -> bool: + """True if the character is not generally supported.""" + return ArabicUnicodeGroup.get_type(max(char)) not in { + ArabicUnicodeGroup.Unknown, + ArabicUnicodeGroup.Arabic, + ArabicUnicodeGroup.ArabicSupplement, + ArabicUnicodeGroup.ArabicExtendedB, + ArabicUnicodeGroup.ArabicExtendedC, + ArabicUnicodeGroup.RumiNumeralSymbols, + ArabicUnicodeGroup.IndicSiyaqNumbers, + ArabicUnicodeGroup.OttomanSiyaqNumbers, + ArabicUnicodeGroup.ArabicMathematicalAlphabeticSymbols, + } diff --git a/src/pre_commit_hooks/check_header_footer.py b/src/pre_commit_hooks/check_header_footer.py index a1a2c46..a7c6a45 100644 --- a/src/pre_commit_hooks/check_header_footer.py +++ b/src/pre_commit_hooks/check_header_footer.py @@ -7,7 +7,8 @@ import json import pathlib import re -from typing import Any, Dict, NamedTuple, Sequence +from collections.abc import Sequence +from typing import Any, NamedTuple from .util import ( ABCArgs, @@ -17,7 +18,7 @@ sanitize_rb_line, ) -RulesDict = Dict[str, re.Pattern[Any]] +RulesDict = dict[str, re.Pattern[Any]] def check_rules_in_file( diff --git a/src/pre_commit_hooks/util.py b/src/pre_commit_hooks/util.py index 3909826..68db788 100644 --- a/src/pre_commit_hooks/util.py +++ b/src/pre_commit_hooks/util.py @@ -6,9 +6,12 @@ import argparse import enum import io +import json import os import pathlib -from typing import Any, Sequence +import typing +from collections.abc import Sequence +from typing import Any class ExitCode(enum.IntEnum): @@ -96,3 +99,33 @@ def sanitize_rb_line(line: bytes) -> str: str: The sanitized line. """ return line.decode().rstrip() + + +def load_json_source(file_or_json_str: str) -> dict[str, Any]: + """Load a potential JSON source. + + Args: + file_or_json_str (str): path to JSON file, or stringified JSON. + + Returns: + dict[str, Any]: Loaded JSON. + """ + default: dict[str, Any] = {} + file_source = pathlib.Path(file_or_json_str) + if file_source.is_file(): + with file_source.open(encoding="utf-8") as fp: + return typing.cast(dict[str, Any], json.load(fp)) + + try: + return json.loads(file_or_json_str) # type: ignore[no-any-return] + except Exception: + print("Unsupported JSON source, no custom rules applied!") + return default + + +class HashableDict(dict): # type: ignore[type-arg] + """Hashable dict but not immutable.""" + + def __hash__(self) -> int: # type: ignore[override] + """Hash.""" + return hash((frozenset(self), frozenset(self.values()))) diff --git a/tests/arabic_presentation_form/__init__.py b/tests/arabic_presentation_form/__init__.py new file mode 100644 index 0000000..bc8c077 --- /dev/null +++ b/tests/arabic_presentation_form/__init__.py @@ -0,0 +1 @@ +"""Arabic Presentation Form tests.""" diff --git a/tests/arabic_presentation_form/test_arabic_presentation_form.py b/tests/arabic_presentation_form/test_arabic_presentation_form.py new file mode 100644 index 0000000..0df5431 --- /dev/null +++ b/tests/arabic_presentation_form/test_arabic_presentation_form.py @@ -0,0 +1,78 @@ +"""Test check_header_footer.""" + +from __future__ import annotations + +import json +import pathlib +import tempfile + +import pytest + +from pre_commit_hooks import arabic_presentation_form as _hook +from pre_commit_hooks.util import ExitCode, load_json_source + +PATCH_BASE = f"{_hook.__name__}" + +CUSTOM_RULES = {} + +EXAMPLE_OF_EXPECTED = {} + + +def generic_test( + test_file: pathlib.Path, + custom_rules: _hook.char_map.CHAR_MAP_TYPE = CUSTOM_RULES, + excluded_chars="", +) -> ExitCode: + """Helper function to coordinate the running of the test. + + Args: + test_file (pathlib.Path): test file. + + Returns: + ExitCode: the result of the `.implementation()` call. + """ + parsed_args = _hook.ArabicPresentationFormArgs( + filenames=[test_file], + custom_rules=load_json_source(json.dumps(custom_rules)), + excluded_chars=excluded_chars, + ) + argparser = _hook.ArabicPresentationFormChecker() + return_code = argparser.implementation(test_file, parsed_args) + return return_code + + +@pytest.mark.parametrize( + ("raw_arabic", "parsed_arabic", "custom_rules", "excluded_chars", "expected"), + [ + ("ﻃَﺎﻟَﻤَﺎ", "طَالَمَا", {}, "", ExitCode.FAIL), + ("ﻃَﺎﻟَﻤَﺎ", "ﻃَالَمَا", {"ṭāʾ": {"rule": {"\u0637": "(NOPE)"}}}, "", ExitCode.FAIL), + ("بَابَ", "بَابَ", {}, "", ExitCode.OK), + ("ﷺ", "ﷺ", {}, "(ﷺ)", ExitCode.OK), + ], + ids=["fix", "unfixable (rule override)", "no fix required", "excluded char"], +) +def test_fixer( + raw_arabic: str, + parsed_arabic: str, + custom_rules: _hook.char_map.CHAR_MAP_TYPE, + excluded_chars: str, + expected: ExitCode, +): + with tempfile.NamedTemporaryFile(mode="a+", encoding="utf-8", delete=False) as file: + try: + # GIVEN: a test file + file.write(raw_arabic) + file.seek(0) + # and the rules to test with + # WHEN: we run against the test file + return_code = generic_test( + pathlib.Path(file.name), + custom_rules=custom_rules, + excluded_chars=excluded_chars, + ) + # file.close() + assert parsed_arabic in file.read() + finally: + file.__del__() + # THEN: We get the expected exit code + assert return_code == expected diff --git a/tests/ruff.toml b/tests/ruff.toml index ffb16fb..36aac9b 100644 --- a/tests/ruff.toml +++ b/tests/ruff.toml @@ -1,4 +1,5 @@ extend = "../pyproject.toml" +[lint] ignore = [ "D", # Allow undocumented test functions ]