Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ repos:
- .
args:
[--no-strict-optional, --ignore-missing-imports, --show-error-codes]
exclude: tests/
7 changes: 7 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,10 @@
language: python
types: [text]
stages: [commit, push, manual]
- id: arabic-presentation-form
name: Arabic Presentation Form Normalizer
description: Replaces Arabic Presentation for and other contextual forms to default.
entry: arabic-presentation-form
language: python
types: [text]
stages: [commit, push, manual]
7 changes: 4 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,22 @@
"configurations": [
{
"name": "pyModule",
"type": "python",
"type": "debugpy",
"request": "launch",
"module": "pre_commit_hooks.check_header_footer",
"console": "integratedTerminal",
"justMyCode": true
},
{
"name": "pytest",
"type": "python",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"console": "integratedTerminal",
"args": [
"--no-cov", // disable as it affects breakpoints
"-vv", "-k",
"-vv",
"-k",
"" // add test function name here
],
"justMyCode": true
Expand Down
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"editor.wordBasedSuggestions": "off",
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
"source.organizeImports": "explicit"
"source.fixAll": "always",
"source.organizeImports": "always"
}
}
}
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,35 @@ repos:
Future work:
1. Support a year parameter that can be used to apply fixes.


### arabic-presentation-form

Replace characters in Arabic Presentation form (A or B), and convert them into 'default' unicode characters.
One application is when using the 'Scheherazade New' font, and it does not support these characters.

Arguments:
- `--excluded-chars`: Regex of characters to exclude from being fixed.
- `--custom-rules`: Rules to update or override the tools inbuilt configuration. Format and example below:
```json
"RuleName": {"rule": {"ReplacementCharacter(s)": "RegexOfApplicableCharacter(s)"}}
"ʾalif": {"rule": {"\u0627": "(\ufe8d|\ufe8e)"}},
```

Example where we are extending the applicable file types and using a specific folder (all subfolders under `site/data`)

```yaml
repos:
- repo: https://github.com/adehad/pre-commit-hooks
rev: main
hooks:
- id: arabic-presentation-form
entry: arabic-presentation-form
language: python
types_or: [text, json, markdown]
args: [--excluded-chars, (ﷺ)]
files: ^site/data/
```

## Local Installation

```console
Expand Down
28 changes: 15 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@ build-backend = "hatchling.build"
name = "pre_commit_hooks"
description = "A selection of pre-commit hooks for pre-commit.com."
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license = "GPL-3.0-or-later"
keywords = ["pre-commit"]
authors = [{ name = "adehad", email = "[email protected]" }]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: CPython",
Expand All @@ -30,6 +28,7 @@ Source = "https://github.com/adehad/pre-commit-hooks/"

[project.scripts]
check-header-footer = "pre_commit_hooks.check_header_footer:main"
arabic-presentation-form = "pre_commit_hooks.arabic_presentation_form:main"

[tool.hatch.build]
sources = ["src"]
Expand Down Expand Up @@ -66,18 +65,20 @@ lint = "python -m pre_commit run --color=always {args:--all-files}"
# Tests
########################################################################################
[[tool.hatch.envs.test.matrix]]
python = ["38", "39", "310", "311"]
python = ["310", "311"]

########################################################################################
# External Tool Config
########################################################################################
[tool.mypy]
python_version = 3.8
python_version = '3.10'
strict = true
ignore_missing_imports = true
namespace_packages = true
show_error_codes = true
strict_optional = true
warn_unused_configs = true
exclude = ["tests/"]

[tool.coverage.run]
branch = true
Expand All @@ -87,7 +88,7 @@ omit = ["src/pre_commit_hooks/__about__.py"]
[tool.coverage.report]
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]

[tool.ruff]
[tool.ruff.lint]
select = [
"E", # pycodestyle
"W", # pycodestyle
Expand All @@ -99,22 +100,23 @@ select = [
]
ignore = []

# Same as Black.
line-length = 88

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Assume Python 3.8. (minimum supported)
target-version = "py38"
[tool.ruff]
# Same as Black.
line-length = 88

# Assume Python 3.10. (minimum supported)
target-version = "py310"

# The source code paths to consider, e.g., when resolving first- vs. third-party imports
src = ["pre_commit_hooks", "tests"]

[tool.ruff.isort]
[tool.ruff.lint.isort]
known-first-party = ["pre_commit_hooks", "tests"]
required-imports = ["from __future__ import annotations"]

[tool.ruff.pydocstyle]
[tool.ruff.lint.pydocstyle]
# Use Google-style docstrings.
convention = "google"
185 changes: 185 additions & 0 deletions src/pre_commit_hooks/arabic_presentation_form/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""Arabic Presentation Form."""

from __future__ import annotations

import functools
import pathlib
import re
import sys
from collections.abc import Sequence
from typing import Any

from ..util import (
ABCArgs,
ABCHook,
ExitCode,
HashableDict,
load_json_source,
)
from . import char_map

sys.stdout.reconfigure( # type: ignore[attr-defined]
encoding="utf-8" # For Windows: we want to be sure to use UTF-8
)
RulesDict = dict[re.Pattern[Any], str]


def apply_rules_to_lines(
line: str,
rules: RulesDict,
exclude: re.Pattern[str],
file_name: pathlib.Path | str,
line_no: str | int,
) -> tuple[ExitCode, str]:
"""Check the text for rules.

Args:
line (str): Line to check the rules.
rules (RulesDict): The rules to check form.
exclude (re.Pattern): characters to exclude from check.
file_name (str): the name of the file being checked.
line_no (int): The line number being checked.

Returns:
(ExitCode, str): (The PASS/FAIL state, The new line).
"""
exit_code = ExitCode.OK
new_line = exclude.sub(" ", line) # Replace with space to not affect col numbers

if not char_map.is_contains_non_general_form(max(new_line)):
return exit_code, line

new_chars: list[str] = []
exit_code = ExitCode.FAIL

for col_no, c in enumerate(line, start=1):
new_c = apply_rule(rules=HashableDict(rules), character=c)
new_c_as_unicode_hex = [f"\\u{ord(c):04x}" for c in new_c]
fix_char_loc = (
f"{file_name}:{line_no}:{col_no} [{new_c} ({new_c_as_unicode_hex})]"
)
if c != new_c:
output_str = f"[Fixed] {fix_char_loc}"
elif char_map.is_contains_non_general_form(new_c):
output_str = f"[Not Fixed] {fix_char_loc}"
else:
output_str = ""

if output_str:
print(output_str)
output_str = ""

new_chars.append(new_c)

new_line = "".join(new_chars)

return exit_code, new_line


def get_rules(custom_rules: char_map.CHAR_MAP_TYPE) -> RulesDict:
"""Return the rules from a given config string.

Args:
custom_rules (str): Any additional rules to apply.

Returns:
RulesDict: The compiles rules.

"""
regex_rules = {}
complete_rules: char_map.CHAR_MAP_TYPE = {}
complete_rules.update(char_map.CHAR_MAP)
complete_rules.update(custom_rules)
for _rule_name, char_mapping_rule in complete_rules.items():
for expected_out, expected_regex in char_mapping_rule["rule"].items():
regex_rules.update({re.compile(expected_regex): expected_out})
return regex_rules


@functools.lru_cache
def apply_rule(rules: RulesDict, character: str) -> str:
"""Apply the rule from the list of rules to the character.

Args:
rules (RulesDict): rules to apply for the character.
character (str): The letter/character to check against.

Returns:
str: The character after applying any rules.
"""
new_char = character
for reg_pattern, replace_char in rules.items():
if reg_pattern.match(character):
new_char = reg_pattern.sub(replace_char, character)
break
return new_char


class ArabicPresentationFormArgs(ABCArgs):
"""Args."""

excluded_chars: str
custom_rules: char_map.CHAR_MAP_TYPE


class ArabicPresentationFormChecker(ABCHook):
"""Checker for Header and Footer."""

def setup_parser(self) -> None:
"""Custom arguments."""
self.parser.add_argument(
"--excluded-chars",
type=str,
default="",
metavar="exclude-char-regex",
help="Regex for characters to exclude. e.g. (ﷺ)",
)
self.parser.add_argument(
"--custom-rules",
type=load_json_source,
default=dict(),
metavar="Path-OR-JSON-String",
help=(
'"RuleName": {"rule": {"ReplacementCharacter(s)": "RegexOfApplicableCharacter(s)"}}' # noqa: E501
'. e.g. "ʾalif": {"rule": {"\u0627": "(\ufe8d|\ufe8e)"}},' # noqa: RUF001
+ ". To exclude a unicode character, overwrite its default entry."
),
)

def implementation(
self,
file_name: pathlib.Path,
args: ArabicPresentationFormArgs,
) -> ExitCode:
"""Check Implementation."""
exit_code = int(ExitCode.OK)
exclude_regex = re.compile(args.excluded_chars)

new_file_lines = []
with file_name.open("r", encoding="utf-8") as f:
for line_no, line in enumerate(iter(f.readlines()), start=1):
intermediate_exit_code, new_line = apply_rules_to_lines(
line=line,
line_no=line_no,
file_name=file_name,
rules=get_rules(args.custom_rules),
exclude=exclude_regex,
)
exit_code |= intermediate_exit_code

if char_map.is_contains_non_general_form(
max(exclude_regex.sub("", new_line) or " ")
):
print(f"Incomplete Fixes Applied: {file_name}:{line_no}")

new_file_lines.append(new_line)

with file_name.open("w", encoding="utf-8") as f:
f.writelines(new_file_lines)
return ExitCode(exit_code)


def main(argv: Sequence[str] | None = None) -> int:
"""Main entrypoint."""
argparser = ArabicPresentationFormChecker()
return argparser.run(argv=argv)
8 changes: 8 additions & 0 deletions src/pre_commit_hooks/arabic_presentation_form/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Arabic Presentation Form Hook."""

from __future__ import annotations

from . import main

if __name__ == "__main__":
raise SystemExit(main())
Loading