1+ #import " OAArabicNormalizer.h"
2+
3+ @implementation OAArabicNormalizer
4+
5+ static NSRegularExpression *diacriticRegex;
6+ static NSString *const arabicDigits = @" ٠١٢٣٤٥٦٧٨٩" ;
7+ static NSString *const digitsReplacement = @" 0123456789" ;
8+ static NSString *const kashida = @" \u0640 " ;
9+
10+ + (void )initialize {
11+ NSError *error = nil ;
12+ diacriticRegex = [NSRegularExpression regularExpressionWithPattern: @" [\u064B -\u0652 ]"
13+ options: 0
14+ error: &error];
15+ if (error) {
16+ NSLog (@" Error initializing regex: %@ " , error.localizedDescription);
17+ }
18+ }
19+
20+ + (BOOL )isSpecialArabic : (NSString *)text {
21+ if (text == nil || text.length == 0 ) {
22+ return NO ;
23+ }
24+
25+ unichar firstChar = [text characterAtIndex: 0 ];
26+ if ([self isArabicCharacter: firstChar]) {
27+ for (NSUInteger i = 0 ; i < text.length ; i++) {
28+ unichar c = [text characterAtIndex: i];
29+ if ([self isDiacritic: c] || [self isArabicDigit: c] || [self isKashida: c]) {
30+ return YES ;
31+ }
32+ }
33+ }
34+
35+ return NO ;
36+ }
37+
38+ + (NSString *)normalize : (NSString *)text {
39+ if (text == nil || text.length == 0 ) {
40+ return text;
41+ }
42+
43+ // Remove diacritics
44+ NSMutableString *result = [NSMutableString stringWithString: text];
45+ result = [[diacriticRegex stringByReplacingMatchesInString: result
46+ options: 0
47+ range: NSMakeRange (0 , result.length)
48+ withTemplate: @" " ] mutableCopy ];
49+
50+ // Remove Kashida
51+ [result replaceOccurrencesOfString: kashida
52+ withString: @" "
53+ options: 0
54+ range: NSMakeRange (0 , result.length)];
55+
56+ return [self replaceDigits: result];
57+ }
58+
59+ + (NSString *)replaceDigits : (NSString *)text {
60+ if (text == nil ) {
61+ return nil ;
62+ }
63+
64+ unichar firstChar = [text characterAtIndex: 0 ];
65+ if (![self isArabicCharacter: firstChar]) {
66+ return text;
67+ }
68+
69+ NSMutableString *mutableText = [text mutableCopy ];
70+ for (NSUInteger i = 0 ; i < arabicDigits.length ; i++) {
71+ unichar arabicDigit = [arabicDigits characterAtIndex: i];
72+ NSString *replacement = [NSString stringWithFormat: @" %c " , [digitsReplacement characterAtIndex: i]];
73+ NSString *arabicDigitStr = [NSString stringWithFormat: @" %C " , arabicDigit];
74+
75+ [mutableText replaceOccurrencesOfString: arabicDigitStr
76+ withString: replacement
77+ options: 0
78+ range: NSMakeRange (0 , mutableText.length)];
79+ }
80+
81+ return mutableText;
82+ }
83+
84+ + (BOOL )isDiacritic : (unichar )c {
85+ return (c >= 0x064B && c <= 0x0652 );
86+ }
87+
88+ + (BOOL )isArabicDigit : (unichar )c {
89+ return (c >= 0x0660 && c <= 0x0669 );
90+ }
91+
92+ + (BOOL )isKashida : (unichar )c {
93+ return (c == 0x0640 );
94+ }
95+
96+ + (BOOL )isArabicCharacter : (unichar )c {
97+ return (c >= 0x0600 && c <= 0x06FF );
98+ }
99+
100+ @end
0 commit comments