From 9e035823de872dd999a3736bfa23ff3676996998 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:20:27 +0100
Subject: [PATCH 1/9] fix: address "Module is imported with `import` and
 `import from`" issue from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../accounts_office_reference_number.py       |  3 +--
 .../american_bankers_association.py           |  3 +--
 .../classification/identifiers/date.py        | 19 +++++++------
 .../identifiers/french_postal_code.py         |  3 +--
 .../identifiers/japan_address.py              | 27 +++++++++----------
 .../classification/identifiers/vehicle.py     |  5 ++--
 6 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/risk_assessment/classification/identifiers/accounts_office_reference_number.py b/src/risk_assessment/classification/identifiers/accounts_office_reference_number.py
index 3a64ad3..d96de3c 100644
--- a/src/risk_assessment/classification/identifiers/accounts_office_reference_number.py
+++ b/src/risk_assessment/classification/identifiers/accounts_office_reference_number.py
@@ -1,5 +1,4 @@
 import re
-from re import Pattern
 
 from risk_assessment.classification.identifiers import Identifier
 
@@ -20,7 +19,7 @@
 
 
 class AccountsOfficeReferenceNumber(Identifier):
-    pattern: Pattern[str] = re.compile(r"^\d{3}P[a-z]\d{7}(?:\d|X)(\d{4})?$", re.I)  # 13 or 16 characters
+    pattern: re.Pattern[str] = re.compile(r"^\d{3}P[a-z]\d{7}(?:\d|X)(\d{4})?$", re.I)  # 13 or 16 characters
 
     def is_of_this_type(self, text: str) -> bool:
         match = self.pattern.match(text)
diff --git a/src/risk_assessment/classification/identifiers/american_bankers_association.py b/src/risk_assessment/classification/identifiers/american_bankers_association.py
index 24d01f6..53df487 100644
--- a/src/risk_assessment/classification/identifiers/american_bankers_association.py
+++ b/src/risk_assessment/classification/identifiers/american_bankers_association.py
@@ -1,5 +1,4 @@
 import re
-from re import Pattern
 
 from risk_assessment.classification.identifiers import Identifier
 
@@ -57,7 +56,7 @@ def _validate_checksum(federal_reserve_routing: str, aba_institution: str, check
 
 
 class AmericanBankersAssociationNumber(Identifier):
-    pattern: Pattern[str] = re.compile(r"^(\d{4})(\d{4})(\d)$")
+    pattern: re.Pattern[str] = re.compile(r"^(\d{4})(\d{4})(\d)$")
 
     def is_of_this_type(self, text: str) -> bool:
         match = self.pattern.match(text)
diff --git a/src/risk_assessment/classification/identifiers/date.py b/src/risk_assessment/classification/identifiers/date.py
index fbfb410..93e6aec 100644
--- a/src/risk_assessment/classification/identifiers/date.py
+++ b/src/risk_assessment/classification/identifiers/date.py
@@ -8,7 +8,6 @@
 import re
 from collections.abc import Callable, Iterable
 from datetime import datetime
-from re import Match, Pattern
 from typing import Any
 
 import re2
@@ -17,9 +16,9 @@
 
 
 def _compute_unique_patterns(
-    patterns: dict[str, Pattern[str]],
-    ampm_patterns: dict[str, Pattern[str]],
-    patterns_with_processing: dict[str, tuple[Pattern[str], Callable[[Match[str]], str]]],
+    patterns: dict[str, re.Pattern[str]],
+    ampm_patterns: dict[str, re.Pattern[str]],
+    patterns_with_processing: dict[str, tuple[re.Pattern[str], Callable[[re.Match[str]], str]]],
 ) -> str:
     """Compute unique regex patterns from multiple pattern dictionaries.
 
@@ -43,7 +42,7 @@ def _compute_unique_patterns(
     return "|".join(unique_patterns)
 
 
-_RePatternLike = Pattern[str] | Any
+_RePatternLike = re.Pattern[str] | Any
 
 
 class DateTime(Identifier):
@@ -77,7 +76,7 @@ class DateTime(Identifier):
         True
     """
 
-    patterns: dict[str, Pattern[str]] = {
+    patterns: dict[str, re.Pattern[str]] = {
         r"%d %b %Y %H:%M:%S %z": re.compile(
             r"^\d{1,2} \w{3} \d{4} \d{1,2}:\d{1,2}:\d{1,2} [+-]?\d{2}\d{2}(?:\d{2}(?:\.\d{6})?)?$", re.I | re.U
         ),
@@ -158,7 +157,7 @@ class DateTime(Identifier):
         r"%y年%m・%d": re.compile(r"^\d{2}年\d{1,2}・\d{1,2}$", re.I | re.U),
         r"%y年%m": re.compile(r"^\d{2}年\d{1,2}$", re.I | re.U),
     }
-    ampm_patterns: dict[str, Pattern[str]] = {
+    ampm_patterns: dict[str, re.Pattern[str]] = {
         r"%B %d, %Y %I:%M %p": re.compile(r"^\w{4,} \d{1,2}, \d{4} \d{1,2}:\d{1,2} [AP]M$", re.I | re.U),
         r"%a %b %d, %Y %I:%M %p": re.compile(r"^\w{3} \w{3} \d{1,2}, \d{4} \d{1,2}:\d{1,2} [AP]M$", re.I | re.U),
         r"%d/%m/%Y %I:%M %p": re.compile(r"^\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{1,2} [AP]M$", re.I | re.U),
@@ -180,7 +179,7 @@ class DateTime(Identifier):
             r"^\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2} [AP]M GMT[+-]\d{1,2}$", re.I | re.U
         ),
     }
-    patterns_with_processing: dict[str, tuple[Pattern[str], Callable[[Match[str]], str]]] = {
+    patterns_with_processing: dict[str, tuple[re.Pattern[str], Callable[[re.Match[str]], str]]] = {
         r"%Y/%m/%d %I:%M:%S %p %Z": (
             re.compile(r"^(\d{4}/\d{1,2}/\d{1,2} \d{1,2}:\d{1,2}:\d{1,2} [AP]M (?:\w{3}))[\+-]\d+$", re.I | re.U),
             lambda m: m.group(1),
@@ -388,7 +387,7 @@ def is_of_this_type(self, text: str) -> bool:
 
 
 def _match_patterns_with_code(
-    patterns: Iterable[tuple[str, tuple[Pattern[str], Callable[[Match[str]], str]]]], text: str
+    patterns: Iterable[tuple[str, tuple[re.Pattern[str], Callable[[re.Match[str]], str]]]], text: str
 ) -> bool:
     """Match text against patterns that require preprocessing.
 
@@ -439,7 +438,7 @@ def _match_format(format: str, text: str) -> bool:
         return False
 
 
-def _match_patterns(patterns: Iterable[tuple[str, Pattern[str]]], text: str) -> bool:
+def _match_patterns(patterns: Iterable[tuple[str, re.Pattern[str]]], text: str) -> bool:
     """Match text against multiple datetime patterns.
 
     Args:
diff --git a/src/risk_assessment/classification/identifiers/french_postal_code.py b/src/risk_assessment/classification/identifiers/french_postal_code.py
index aa10323..0e253a8 100644
--- a/src/risk_assessment/classification/identifiers/french_postal_code.py
+++ b/src/risk_assessment/classification/identifiers/french_postal_code.py
@@ -1,6 +1,5 @@
 import re
 from pathlib import Path
-from re import Pattern
 
 from risk_assessment.classification.identifiers import Identifier
 
@@ -20,7 +19,7 @@ def _load_valid_zipcodes() -> dict[str, set[str]]:
 
 
 class FrenchPostalCode(Identifier):
-    pattern: Pattern[str] = re.compile(r"^(\d{2})(\d{3})$")
+    pattern: re.Pattern[str] = re.compile(r"^(\d{2})(\d{3})$")
     departments: dict[str, set[str]] = _load_valid_zipcodes()
 
     def is_of_this_type(self, text: str) -> bool:
diff --git a/src/risk_assessment/classification/identifiers/japan_address.py b/src/risk_assessment/classification/identifiers/japan_address.py
index 356cd37..59fdb76 100644
--- a/src/risk_assessment/classification/identifiers/japan_address.py
+++ b/src/risk_assessment/classification/identifiers/japan_address.py
@@ -1,32 +1,31 @@
-import re
-from re import Pattern
+from re import I, Pattern, U, compile
 
 from risk_assessment.classification.identifiers import Identifier
 
 
 class JapanAddress(Identifier):
     patterns: list[Pattern[str]] = [
-        re.compile(
-            r"^\d+\s+\w+\s+\w{3,}-\w{2,5}\s+\w{3,}(?:-\w{2,3})?,\s+\w{3,}\s+(?:〒\s*)?\d{3}-\d{4}\s+JAPAN$", re.I | re.U
+        compile(
+            r"^\d+\s+\w+\s+\w{3,}-\w{2,5}\s+\w{3,}(?:-\w{2,3})?,\s+\w{3,}\s+(?:〒\s*)?\d{3}-\d{4}\s+JAPAN$", I | U
         ),  # rural
-        re.compile(
+        compile(
             r"^\d+-\d+,\s+\w{3,}\s+\d+-chome\s+\w{3,}(?:-\w{3,})*-(?:shi|gun|ku|machi|cho),\s+\w+(?:-(?:ken|fu|to))?\s+(?:〒\s*)?\d{3}-\d{4}\s+JAPAN$",
-            re.I | re.U,
+            I | U,
         ),  # city
-        re.compile(
+        compile(
             r"^\d+-\d+-\d+,\s+\w{3,}\s+\w{3,}(?:-\w{3,})*-(?:shi|gun|ku|machi|cho),\s+\w+(?:-(?:ken|fu|to))?\s+(?:〒\s*)?\d{3}-\d{4}\s+JAPAN$",
-            re.I | re.U,
-        ),  # city, compressed       # re.compile(r"", re.I | re.U),  # city
-        re.compile(
+            I | U,
+        ),  # city, compressed
+        compile(
             r"^\d+-\d+,\s+\w{3,}\s+\w{3,}(?:-\w{3,})*-(?:shi|gun|ku|machi|cho),\s+\w+(?:-(?:ken|fu|to))?\s+(?:〒\s*)?\d{3}-\d{4}\s+JAPAN$$",
-            re.I | re.U,
+            I | U,
         ),  # city as prefecture
-        re.compile(
+        compile(
             r"^JAPAN\s+(?:〒\s*)?\d{3}-\d{4}\s+\w+(?:-(?:ken|fu|to))?\s+\w{3,}(?:-\w{3,})*-(?:shi|gun|ku|machi|cho)\s+\w{3,}\s+\d+(?:-chome)?(?:\s+|-)\d+-\d+$",
-            re.I | re.U,
+            I | U,
         ),  # oneliner
         # from RWD
-        re.compile(r"^〒?(:?\d+-\d+)\s+\w+\s*\d+$"),
+        compile(r"^〒?(:?\d+-\d+)\s+\w+\s*\d+$"),
     ]
 
     def is_of_this_type(self, text: str) -> bool:
diff --git a/src/risk_assessment/classification/identifiers/vehicle.py b/src/risk_assessment/classification/identifiers/vehicle.py
index 50e0a06..7d7f55a 100644
--- a/src/risk_assessment/classification/identifiers/vehicle.py
+++ b/src/risk_assessment/classification/identifiers/vehicle.py
@@ -4,8 +4,7 @@
 with checksum verification and World Manufacturer Identifier validation.
 """
 
-import re
-from re import Pattern
+from re import Pattern, compile
 
 from risk_assessment.classification.identifiers import Identifier
 
@@ -32,7 +31,7 @@ class VehicleIdentificationNumber(Identifier):
         True
     """
 
-    pattern: Pattern[str] = re.compile(
+    pattern: Pattern[str] = compile(
         r"^([ABCDEFGHJKLMNPRSTUVWXYZ0-9]{3})([ABCDEFGHJKLMNPRSTUVWXYZ0-9]{6})([ABCDEFGHJKLMNPRSTUVWXYZ0-9]{8})$"
     )
 

From fd3e5841b1c2f4df65e044e522b6be766fb39856 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:28:01 +0100
Subject: [PATCH 2/9] fix: address "Empty except" issue from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../classification/identifiers/age.py         |  5 ++---
 .../classification/identifiers/geography.py   |  5 ++---
 .../classification/identifiers/network.py     | 21 +++++++------------
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/risk_assessment/classification/identifiers/age.py b/src/risk_assessment/classification/identifiers/age.py
index 2732119..26a3f91 100644
--- a/src/risk_assessment/classification/identifiers/age.py
+++ b/src/risk_assessment/classification/identifiers/age.py
@@ -4,6 +4,7 @@
 age expressions with units (years, months, weeks), and age-related phrases.
 """
 
+from contextlib import suppress
 from re import I, Pattern, U, compile
 
 from word2number.w2n import word_to_num
@@ -38,13 +39,11 @@ def is_of_this_type(self, text: str | int) -> bool:
         int_value: int = 10_000_000
 
         if isinstance(text, str):
-            try:
+            with suppress(ValueError):
                 int_value = int(text, base=10)
 
                 if text != str(int_value):
                     return False
-            except ValueError:
-                pass
 
         elif isinstance(text, int):
             int_value = text
diff --git a/src/risk_assessment/classification/identifiers/geography.py b/src/risk_assessment/classification/identifiers/geography.py
index 2193d7e..7f844a8 100644
--- a/src/risk_assessment/classification/identifiers/geography.py
+++ b/src/risk_assessment/classification/identifiers/geography.py
@@ -8,6 +8,7 @@
 import logging
 import re
 from collections.abc import Callable, Iterable
+from contextlib import suppress
 from pathlib import Path
 
 from risk_assessment.classification.identifiers import DictionaryIdentifier, Identifier
@@ -512,13 +513,11 @@ def is_of_this_type(self, text: str) -> bool:
         """
         text = text.strip()
         if len(text) == 5:
-            try:
+            with suppress(ValueError):
                 int_code = int(text, base=10)
                 for _, (m, M) in self.valid_codes.items():
                     if m <= int_code <= M:
                         return True
-            except ValueError:
-                pass
 
         return False
 
diff --git a/src/risk_assessment/classification/identifiers/network.py b/src/risk_assessment/classification/identifiers/network.py
index f92e0b3..c004cdf 100644
--- a/src/risk_assessment/classification/identifiers/network.py
+++ b/src/risk_assessment/classification/identifiers/network.py
@@ -4,6 +4,7 @@
 IP addresses (both versions), and URIs/URLs.
 """
 
+from contextlib import suppress
 from ipaddress import AddressValueError, IPv4Address, IPv6Address
 from logging import getLogger
 from pathlib import Path
@@ -63,11 +64,9 @@ def _valid_ipv6_hostname(text: str) -> bool:
     Returns:
         True if text is a valid IPv6 address, False otherwise.
     """
-    try:
-        if IPv6Address(text) is not None:
+    with suppress(AddressValueError):
+        if IPv6Address(text):
             return True
-    except AddressValueError:
-        pass
     return False
 
 
@@ -106,11 +105,9 @@ def is_of_this_type(self, text: str) -> bool:
         Returns:
             True if text is a valid IPv4 address, False otherwise.
         """
-        try:
+        with suppress(AddressValueError):
             if IPv4Address(text) is not None:
                 return True
-        except AddressValueError:
-            pass
 
         return False
 
@@ -136,13 +133,11 @@ def is_of_this_type(self, text: str, allow_double_colon: bool = True) -> bool:
         Returns:
             True if text is a valid IPv6 address, False otherwise.
         """
-        try:
+        with suppress(AddressValueError):
             if IPv6Address(text) is not None:
                 if text == "::" and not allow_double_colon:
                     return False
                 return True
-        except AddressValueError:
-            pass
 
         return False
 
@@ -211,7 +206,8 @@ def is_of_this_type(self, text: str) -> bool:
         """
         if len(text.strip()) != len(text):
             return False
-        try:
+
+        with suppress(Exception):
             result = urlparse(text)
 
             if result is not None:
@@ -223,7 +219,4 @@ def is_of_this_type(self, text: str) -> bool:
                     if text.startswith("www.") or text.startswith("mail."):
                         return self.is_of_this_type(f"http://{text}")
 
-        except Exception:
-            return False
-
         return False

From b4dca85cf8bdd150c5b5987f2ca246d3d869d642 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:29:44 +0100
Subject: [PATCH 3/9] fix: address "Unused import" from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 tests/classification/identifiers/test_credit_card.py | 3 ---
 tests/classification/identifiers/test_us_address.py  | 4 ----
 tests/classification/unstructured/test_utility.py    | 2 --
 3 files changed, 9 deletions(-)

diff --git a/tests/classification/identifiers/test_credit_card.py b/tests/classification/identifiers/test_credit_card.py
index c7f0638..c00e7bb 100644
--- a/tests/classification/identifiers/test_credit_card.py
+++ b/tests/classification/identifiers/test_credit_card.py
@@ -1,6 +1,3 @@
-import json
-from pathlib import Path
-
 import pytest
 
 from risk_assessment.classification.identifiers import CreditCard
diff --git a/tests/classification/identifiers/test_us_address.py b/tests/classification/identifiers/test_us_address.py
index 92c405a..9332331 100644
--- a/tests/classification/identifiers/test_us_address.py
+++ b/tests/classification/identifiers/test_us_address.py
@@ -1,7 +1,3 @@
-import datetime
-
-import pytest
-
 from risk_assessment.classification.identifiers import USPostalAddress
 
 
diff --git a/tests/classification/unstructured/test_utility.py b/tests/classification/unstructured/test_utility.py
index 4844a3a..ffa266d 100644
--- a/tests/classification/unstructured/test_utility.py
+++ b/tests/classification/unstructured/test_utility.py
@@ -1,8 +1,6 @@
 import json
 from pathlib import Path
 
-import pytest
-
 from risk_assessment.classification.unstructured import Entity
 from risk_assessment.classification.unstructured.utility import AnnotationWriter
 

From 139c0b8c7edd0bb36435990bc75dc6b5c5976ad8 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:34:28 +0100
Subject: [PATCH 4/9] fix: address "Unmatchable caret in regular expression"
 from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../classification/identifiers/national_identifier.py     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/risk_assessment/classification/identifiers/national_identifier.py b/src/risk_assessment/classification/identifiers/national_identifier.py
index 7184090..7596c1c 100644
--- a/src/risk_assessment/classification/identifiers/national_identifier.py
+++ b/src/risk_assessment/classification/identifiers/national_identifier.py
@@ -81,7 +81,7 @@ class NIRFrance(Identifier):
     """
 
     pattern = re.compile(
-        r"^([7182]\d{14})$|^([7182])( )(\d{2})( )(\d{2})( )(\d{2})( )(\d{3})( )(\d{3})( )(\d{2})$|^([7128]\d{12} \d{2})$|^([7182]\d{4}2[AB]\d{8})$|^([7182])( )(\d{2})( )(\d{2})( )(2[AB])( )(\d{3})( )(\d{3})( )(\d{2})$"
+        r"^(([7182]\d{14})|([7182])( )(\d{2})( )(\d{2})( )(\d{2})( )(\d{3})( )(\d{3})( )(\d{2})|([7128]\d{12} \d{2})|([7182]\d{4}2[AB]\d{8})|([7182])( )(\d{2})( )(\d{2})( )(2[AB])( )(\d{3})( )(\d{3})( )(\d{2}))$"
     )
 
     def is_of_this_type(self, text: str) -> bool:
@@ -117,7 +117,7 @@ class TINGermany(Identifier):
     """
 
     pattern = re.compile(
-        r"^([1-9]\d{11})$|^([1-9]\d \d{3} \d{3} \d{3})$|^([1-9][0-9])([,])(d{3})([,])(\d{3})([,])(\d{3})$|^([1-9][0-9])([.])(d{3})([.])(\d{3})([.])(\d{3})$|^([1-9][0-9])([\/])(d{3})([\/])(\d{3})([\/])(\d{3})$|^([1-9]\d{10})$"
+        r"^(([1-9]\d{11})|([1-9]\d \d{3} \d{3} \d{3})|([1-9][0-9])([,])(d{3})([,])(\d{3})([,])(\d{3})|([1-9][0-9])([.])(d{3})([.])(\d{3})([.])(\d{3})|([1-9][0-9])([\/])(d{3})([\/])(\d{3})([\/])(\d{3})|([1-9]\d{10}))$"
     )  # noqa
 
     def check_last_digit(self, first_ten_digits: list[str], check_digit: str) -> bool:
@@ -182,7 +182,7 @@ def is_of_this_type(self, text: str) -> bool:
 
 
 class AadhaarNumber(Identifier):
-    pattern = re.compile(r"^([2-9]\d{3} \d{4} \d{4})$|^([2-9]\d{11})$")
+    pattern = re.compile(r"^(([2-9]\d{3} \d{4} \d{4})|([2-9]\d{11}))$")
 
     def is_of_this_type(self, text: str) -> bool:
         # https://en.wikipedia.org/wiki/Aadhaar
@@ -312,7 +312,7 @@ def validate_checksum(self, text: str) -> bool:
 
 class MyNumberJapan(Identifier):
     pattern = re.compile(
-        r"^(\d{4}.\d{4}.\d{4})$|^(\d{4},\d{4},\d{4})$|^(\d{4}-\d{4}-\d{4})|(\d{4} \d{4} \d{4})$|^(\d{12})$"
+        r"^((\d{4}.\d{4}.\d{4})|(\d{4},\d{4},\d{4})|(\d{4}-\d{4}-\d{4})|(\d{4} \d{4} \d{4})|(\d{12}))$"
     )
 
     def is_of_this_type(self, text: str) -> bool:

From 73d61a39f27c190b09122c3fb0db031da73bdaca Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:35:31 +0100
Subject: [PATCH 5/9] fix: address "Commented-out code" from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../classification/unstructured/aggregator.py               | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/risk_assessment/classification/unstructured/aggregator.py b/src/risk_assessment/classification/unstructured/aggregator.py
index 02456d2..b7599be 100644
--- a/src/risk_assessment/classification/unstructured/aggregator.py
+++ b/src/risk_assessment/classification/unstructured/aggregator.py
@@ -335,12 +335,6 @@ def find_split_point(self, entities: list[Entity]) -> list[int]:
         return sorted(split_points)
 
     def validate_entities(self, entity_list: list[Entity], text: str) -> list[Entity]:
-        # validated_entitites: list[Entity] = []
-        # for entity in entity_list:
-        #     if self.validate_entity(entity, text):
-        #         validated_entitites.append(entity)
-        # return validated_entitites
-
         return [entity for entity in entity_list if self.validate_entity(entity, text)]
 
     def validate_entity(self, entity: Entity, text: str) -> bool:

From b0131fdfd8ee2a9fba30b24f6b5954f99cc46312 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:39:42 +0100
Subject: [PATCH 6/9] fix: address "Non-callable called" from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../classification/__init__.py                | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/risk_assessment/classification/__init__.py b/src/risk_assessment/classification/__init__.py
index 58e571e..60ebdc0 100644
--- a/src/risk_assessment/classification/__init__.py
+++ b/src/risk_assessment/classification/__init__.py
@@ -6,6 +6,7 @@
 """
 
 from collections import defaultdict
+from contextlib import suppress
 from dataclasses import dataclass
 from typing import Any, cast
 
@@ -45,12 +46,24 @@ def create_instance(identifier_fqn: str) -> Identifier:
     for comp in parts[1:]:
         module = getattr(module, comp)
 
-    if type(module) is type(Identifier):
-        m = module()
-        return cast(Identifier, m)
+    # Verify that module is a class (type) and is a subclass of Identifier
+    if not isinstance(module, type):
+        raise ValueError(
+            f"{identifier_fqn} is not a class. "
+            "Expected a subclass of `risk_assessment.classification.identifiers.Identifier`, "
+            f"but got {type(module).__name__}"
+        )
+
+    try:
+        if issubclass(module, Identifier):
+            return module()
+    except TypeError as e:
+        # issubclass() raises TypeError if module is not a class (shouldn't happen due to isinstance check above)
+        raise ValueError(f"{identifier_fqn} cannot be checked as a subclass: {e}") from e
 
     raise ValueError(
-        f"{identifier_fqn} does not exists or is not a subclass of `risk_assessment.classification.identifiers.Identifier`"
+        f"{identifier_fqn} is not a subclass of `risk_assessment.classification.identifiers.Identifier`. "
+        f"Found class: {module.__name__}"
     )
 
 

From c7000f3846886a13b392bea10e5b3b7813c4a35b Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:41:49 +0100
Subject: [PATCH 7/9] fix: address "Redundant comparison" from CodeQL

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .../identifiers/us_postal_address.py          | 36 +++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/risk_assessment/classification/identifiers/us_postal_address.py b/src/risk_assessment/classification/identifiers/us_postal_address.py
index 19b5d9d..e829c4e 100644
--- a/src/risk_assessment/classification/identifiers/us_postal_address.py
+++ b/src/risk_assessment/classification/identifiers/us_postal_address.py
@@ -758,24 +758,40 @@ def _quick_check_there_are_multiple_tokens(text: str) -> bool:
 
 
 def _check_that_case_is_consistent(text: str) -> bool:
-    tokens = text.split(r"[\s+|,]")
+    """Check if the text has consistent casing (all uppercase or all lowercase initial letters).
+
+    Args:
+        text: The text to check for case consistency
+
+    Returns:
+        bool: True if all alphabetic initial letters are consistently upper or lower case,
+              False if mixed case is detected
+    """
+    import re
+
+    # Split on whitespace and commas using proper regex
+    tokens = re.split(r"[\s,]+", text)
 
     upper_count = 0
     lower_count = 0
 
     for token in tokens:
-        if len(token.strip()) == 0:
+        token = token.strip()
+        if not token:
             continue
 
-        begin = token[0]
+        # Get first character
+        first_char = token[0]
 
-        if begin.isalpha():
-            if begin.islower():
+        if first_char.isalpha():
+            if first_char.islower():
                 lower_count += 1
-            elif begin.isupper():
+            elif first_char.isupper():
                 upper_count += 1
-            else:
-                # raise ValueError()
-                return False
+            # Note: Non-ASCII alphabetic characters that are neither upper nor lower
+            # are ignored rather than causing the function to return False
 
-    return not (lower_count > 0 and lower_count > 0)  # either all upper or all lower
+    # Return True if case is consistent: either all upper OR all lower (not both)
+    # If no alphabetic characters found, consider it consistent (return True)
+    has_mixed_case = lower_count > 0 and upper_count > 0
+    return not has_mixed_case

From 66c22eb21d9eab235d361c524a076e12a2a32ab2 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 20:45:45 +0100
Subject: [PATCH 8/9] fix: tests were not passing "correctly", re-classified
 examples

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 tests/classification/identifiers/test_us_address.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/classification/identifiers/test_us_address.py b/tests/classification/identifiers/test_us_address.py
index 9332331..88dcb07 100644
--- a/tests/classification/identifiers/test_us_address.py
+++ b/tests/classification/identifiers/test_us_address.py
@@ -55,24 +55,22 @@ def test_from_rwd():
 
     assert identifier.is_of_this_type("1160 South Main Street 322, Middletown, Connecticut, U.S.A. 06457-5044")
     assert identifier.is_of_this_type("12489 W 84Th DR, ARVADA, Jefferson County, CO")
-    assert identifier.is_of_this_type("12489 W 84Th DR, ARVADA, Jefferson County, CO")
-    assert identifier.is_of_this_type("13992 E 107Th AVE, COMMERCE CITY, Adams County, CO")
-    assert identifier.is_of_this_type("13992 E 107Th AVE, COMMERCE CITY, Adams County, CO")
     assert identifier.is_of_this_type("13992 E 107Th AVE, COMMERCE CITY, Adams County, CO")
     assert identifier.is_of_this_type("16101 Road J, CORTEZ, Montezuma County, CO")
     assert identifier.is_of_this_type("16122 W 70Th AVE, ARVADA, Jefferson County, CO")
+    assert identifier.is_of_this_type("1824 Alto Ln, Lutz, FL 33558")
     assert identifier.is_of_this_type("1824 Alto Ln, Lutz")
-    assert identifier.is_of_this_type("1824 Alto Ln, Lutz, FL 33558 Republican Party of Florida")
     assert identifier.is_of_this_type("415 W. Route 66, 201")
     assert identifier.is_of_this_type("52 Canyon Cove LN, DRAKE, Larimer County, CO")
     assert identifier.is_of_this_type("646 Riverview Trace Ct, Fort Myers, Florida 33916")
-    assert identifier.is_of_this_type("Lutz, FL 33558 Republican Party of Florida")
     assert not identifier.is_of_this_type("1160 South Main Street 322, Middletown, Connecticut, U.S.A. 06457-5044.")
     assert not identifier.is_of_this_type("1824 Alto Ln, Lutz ")
     assert not identifier.is_of_this_type("1824 Alto Ln, Lutz, FL 33558 Republican Party of Florida.")
+    assert not identifier.is_of_this_type("1824 Alto Ln, Lutz, FL 33558 Republican Party of Florida")
     assert not identifier.is_of_this_type("Camera LensesSimply put, the better you understand different")
     assert not identifier.is_of_this_type("Dec 2018 00:00:00 -0000Template - Content graphicsAlex")
     assert not identifier.is_of_this_type("Lutz, FL 33558 Republican Party of Florida.")
+    assert not identifier.is_of_this_type("Lutz, FL 33558 Republican Party of Florida")
 
 
 def test_invalid():

From 6466d3460c83df026f4078e9b03fe556e5161e66 Mon Sep 17 00:00:00 2001
From: Stefano Braghin <527806+stefano81@users.noreply.github.com>
Date: Sat, 16 May 2026 21:54:59 +0100
Subject: [PATCH 9/9] fix: fix "Unused local variable" from CodeQL, and make
 the Mexican CURP work properly

Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com>
---
 .secrets.baseline                             |  8 +--
 .../identifiers/national_identifier.py        | 71 +++++++++++--------
 .../identifiers/test_national_id.py           | 10 ++-
 3 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index d309f75..22df845 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2026-05-15T10:59:03Z",
+  "generated_at": "2026-05-16T20:54:57Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -82,7 +82,7 @@
         "hashed_secret": "f3e0d184814b86dc1c4eb623edde7610cf212567",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 845,
+        "line_number": 860,
         "type": "Hex High Entropy String",
         "verified_result": null
       }
@@ -188,7 +188,7 @@
         "hashed_secret": "f3e0d184814b86dc1c4eb623edde7610cf212567",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 163,
+        "line_number": 161,
         "type": "Hex High Entropy String",
         "verified_result": null
       },
@@ -196,7 +196,7 @@
         "hashed_secret": "804ec071803318791b835cffd6e509c8d32239db",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 165,
+        "line_number": 163,
         "type": "Hex High Entropy String",
         "verified_result": null
       }
diff --git a/src/risk_assessment/classification/identifiers/national_identifier.py b/src/risk_assessment/classification/identifiers/national_identifier.py
index 7596c1c..bb4c364 100644
--- a/src/risk_assessment/classification/identifiers/national_identifier.py
+++ b/src/risk_assessment/classification/identifiers/national_identifier.py
@@ -364,46 +364,44 @@ def is_of_this_type(self, text: str) -> bool:
 
 
 class MexicoCURP(Identifier):
-    pattern = re.compile(
-        r"^[A-Z][AEIOU][A-Z]{2}(\d{2})(\d{2})(\d{2})[HMX]([A-Z]{2})[BCDFGHJKLMNPQRSTVWXYZ]{3}([A-Z0-9])(\d)"
-    )
+    pattern = re.compile(r"^[A-Z][AEIOU][A-Z]{2}(\d{2})(\d{2})(\d{2})[HMX]([A-Z]{2})[A-Z]{3}([A-Z0-9])(\d)")
     #  http://www.statoids.com/umx.html
     states: set[str] = {
-        "AG",
-        "BN",
+        "AS",
+        "BC",
         "BS",
-        "CA",
+        "CC",
+        "CS",
         "CH",
+        "DF",
         "CL",
         "CM",
-        "COCP",
-        "DF",
-        "DU",
-        "GJ",
+        "DG",
+        "GT",
         "GR",
-        "HI",
-        "JA",
+        "HG",
+        "JC",
         "MC",
-        "MR",
-        "MX",
-        "NA",
+        "MN",
+        "MS",
+        "NT",
         "NL",
-        "OA",
-        "PU",
-        "QE",
+        "OC",
+        "PL",
+        "QO",
         "QR",
-        "SI",
+        "SP",
         "SL",
-        "SO",
-        "TB",
+        "SR",
+        "TC",
+        "TS",
         "TL",
-        "TM",
-        "VE",
         "VZ",
-        "YU",
-        "ZA",
+        "YN",
+        "ZS",
         "NE",  # code for people born abroad
     }
+    CURP_CHARACTERS = "0123456789ABCDEFGHIJKLMNÑOPQRSTUVWXYZ"
 
     def is_of_this_type(self, text: str) -> bool:
         match = MexicoCURP.pattern.match(text)
@@ -414,19 +412,36 @@ def is_of_this_type(self, text: str) -> bool:
             day = int(match.group(3), base=10)
             state = match.group(4)
             century_flag = match.group(5)
-            parity = match.group(6)  # noqa
+            # parity = match.group(6)  # noqa
 
             if all(c.isdigit() for c in century_flag):
                 year = int(f"20{year_2d}")
             else:
                 year = int(f"19{year_2d}")
 
-            if state in self.states and _valid_birth_date(day, month, year):
-                return True
+            if state in self.states:
+                if _valid_birth_date(day, month, year):
+                    if _valid_curp_parity(text):
+                        return True
 
         return False
 
 
+def _valid_curp_parity(text: str) -> bool:
+    """Validate the parity check digit of a Mexican CURP.
+
+    Args:
+        text: The CURP string to validate.
+
+    Returns:
+        True if the parity check digit is valid, False otherwise.
+    """
+    start = 18
+    return text[-1] == str(
+        -sum((start - i) * MexicoCURP.CURP_CHARACTERS.index(n) for i, n in enumerate(text[:-1])) % 10
+    )
+
+
 class CanadaSIN(LuhnIdentifier):
     _pattern = re.compile(r"^\d{3}[\- ]?\d{3}[\- ]?\d{3}$")
 
diff --git a/tests/classification/identifiers/test_national_id.py b/tests/classification/identifiers/test_national_id.py
index 5a2a313..e870f14 100644
--- a/tests/classification/identifiers/test_national_id.py
+++ b/tests/classification/identifiers/test_national_id.py
@@ -1,4 +1,5 @@
 import pytest
+from faker import Faker
 
 from risk_assessment.classification.identifiers import IsraelID
 from risk_assessment.classification.identifiers.national_identifier import (
@@ -39,14 +40,11 @@ def test_israel_national_id(faker):
         assert identifier.is_of_this_type(example), example
 
 
-def test_mexican_curb():
-    examples = [
-        "HEGG560427MVZRRL04",
-    ]
-
+def test_mexican_curp():
     identifier = MexicoCURP()
 
-    for example in examples:
+    faker = Faker("es_MX")
+    for example in [faker.curp() for _ in range(100)]:
         assert identifier.is_of_this_type(example), example