Skip to content

Commit 19c803f

Browse files
authored
Merge pull request #4349 from osmandapp/arabic_norm
Arabic normalizer
2 parents a488913 + e8b04d0 commit 19c803f

File tree

5 files changed

+130
-0
lines changed

5 files changed

+130
-0
lines changed

OsmAnd.xcodeproj/project.pbxproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@
211211
15CD7EA725B1C1A700BCB36A /* ic_small_time_start@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EA425B1C1A700BCB36A /* ic_small_time_start@2x.png */; };
212212
15CD7EAD25B1C1B900BCB36A /* ic_small_waypoints@3x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EAA25B1C1B700BCB36A /* ic_small_waypoints@3x.png */; };
213213
15CD7EAF25B1C1B900BCB36A /* ic_small_waypoints@2x.png in Resources */ = {isa = PBXBuildFile; fileRef = 15CD7EAC25B1C1B800BCB36A /* ic_small_waypoints@2x.png */; };
214+
2C8E8A562D5104E600746A69 /* OAArabicNormalizer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */; };
214215
320076142BFC775100CDDDAF /* SpeedLimitWarningViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 320076132BFC775100CDDDAF /* SpeedLimitWarningViewController.swift */; };
215216
320427D62BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 320427D52BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift */; };
216217
32048FC52A25E70200AA4B71 /* BaseOAuthHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 32048FC42A25E70200AA4B71 /* BaseOAuthHelper.swift */; };
@@ -3474,6 +3475,8 @@
34743475
15CD7EAA25B1C1B700BCB36A /* ic_small_waypoints@3x.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = "ic_small_waypoints@3x.png"; path = "Resources/Icons/ic_small_waypoints@3x.png"; sourceTree = "<group>"; };
34753476
15CD7EAC25B1C1B800BCB36A /* ic_small_waypoints@2x.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = "ic_small_waypoints@2x.png"; path = "Resources/Icons/ic_small_waypoints@2x.png"; sourceTree = "<group>"; };
34763477
2503BB823D105783DC4982C8 /* Pods-OsmAnd Maps.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-OsmAnd Maps.debug.xcconfig"; path = "Target Support Files/Pods-OsmAnd Maps/Pods-OsmAnd Maps.debug.xcconfig"; sourceTree = "<group>"; };
3478+
2C8E8A542D5104E600746A69 /* OAArabicNormalizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = OAArabicNormalizer.h; sourceTree = "<group>"; };
3479+
2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = OAArabicNormalizer.mm; sourceTree = "<group>"; };
34773480
320076132BFC775100CDDDAF /* SpeedLimitWarningViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeedLimitWarningViewController.swift; sourceTree = "<group>"; };
34783481
320427D52BF4D5250085DCA1 /* SpeedometerWidgetSettingsViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeedometerWidgetSettingsViewController.swift; sourceTree = "<group>"; };
34793482
32048FC42A25E70200AA4B71 /* BaseOAuthHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BaseOAuthHelper.swift; sourceTree = "<group>"; };
@@ -13075,6 +13078,8 @@
1307513078
DA5A808E26C563A500F274C7 /* Helpers */ = {
1307613079
isa = PBXGroup;
1307713080
children = (
13081+
2C8E8A542D5104E600746A69 /* OAArabicNormalizer.h */,
13082+
2C8E8A552D5104E600746A69 /* OAArabicNormalizer.mm */,
1307813083
FA5D71E02CE24C4E0062BC4D /* LockHelper.swift */,
1307913084
FA45851C2C946046003A5AD7 /* SharedLibHelpers */,
1308013085
32AB48672C9C50A1005EF1D4 /* DownloadingListHelper */,
@@ -16252,6 +16257,7 @@
1625216257
4656BD362C4855D200B69928 /* ColorizationType.swift in Sources */,
1625316258
DA5A816026C563A700F274C7 /* OAParkingPositionPlugin.mm in Sources */,
1625416259
DA5A816826C563A700F274C7 /* OAWikiImageCard.mm in Sources */,
16260+
2C8E8A562D5104E600746A69 /* OAArabicNormalizer.mm in Sources */,
1625516261
DA5A843226C563A800F274C7 /* OATransportDetailsTableViewController.mm in Sources */,
1625616262
DAC849B426CF967C00018091 /* OADownloadMapWidget.mm in Sources */,
1625716263
322B53952C78DD14006B48B0 /* OAIconsPaletteCell.m in Sources */,

Sources/Common/OACollatorStringMatcher.m

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#import "OACollatorStringMatcher.h"
1010
#import "OAUtilities.h"
11+
#import "OAArabicNormalizer.h"
1112

1213
static NSStringCompareOptions comparisonOptions = NSCaseInsensitiveSearch | NSWidthInsensitiveSearch | NSDiacriticInsensitiveSearch;
1314

@@ -46,6 +47,13 @@ - (BOOL) matches:(NSString *)name
4647

4748
+ (BOOL) cmatches:(NSString *)fullName part:(NSString *)part mode:(StringMatcherMode)mode
4849
{
50+
if ([OAArabicNormalizer isSpecialArabic:fullName]) {
51+
fullName = [OAArabicNormalizer normalize:fullName];
52+
}
53+
54+
if ([OAArabicNormalizer isSpecialArabic:part]) {
55+
part = [OAArabicNormalizer normalize:part];
56+
}
4957
switch (mode)
5058
{
5159
case CHECK_CONTAINS:
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#import <Foundation/Foundation.h>
2+
3+
@interface OAArabicNormalizer : NSObject
4+
5+
+ (BOOL)isSpecialArabic:(NSString *)text;
6+
+ (NSString *)normalize:(NSString *)text;
7+
8+
@end
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#import "OAArabicNormalizer.h"
2+
3+
@implementation OAArabicNormalizer
4+
5+
static NSRegularExpression *diacriticRegex;
6+
static NSString *const kArabicDigits = @"٠١٢٣٤٥٦٧٨٩";
7+
static NSString *const kDigitsReplacement = @"0123456789";
8+
static NSString *const kKashida = @"\u0640";
9+
10+
+ (void)initialize {
11+
NSError *error = nil;
12+
diacriticRegex = [NSRegularExpression regularExpressionWithPattern:@"[\u064B-\u0652]"
13+
options:0
14+
error:&error];
15+
if (error) {
16+
NSLog(@"Error initializing regex: %@", error.localizedDescription);
17+
}
18+
}
19+
20+
+ (BOOL)isSpecialArabic:(NSString *)text {
21+
if (text == nil || text.length == 0) {
22+
return NO;
23+
}
24+
25+
unichar firstChar = [text characterAtIndex:0];
26+
if ([self isArabicCharacter:firstChar]) {
27+
for (NSUInteger i = 0; i < text.length; i++) {
28+
unichar c = [text characterAtIndex:i];
29+
if ([self isDiacritic:c] || [self isArabicDigit:c] || [self isKashida:c]) {
30+
return YES;
31+
}
32+
}
33+
}
34+
35+
return NO;
36+
}
37+
38+
+ (NSString *)normalize:(NSString *)text {
39+
if (text == nil || text.length == 0) {
40+
return text;
41+
}
42+
43+
// Remove diacritics
44+
NSMutableString *result = [NSMutableString stringWithString:text];
45+
result = [[diacriticRegex stringByReplacingMatchesInString:result
46+
options:0
47+
range:NSMakeRange(0, result.length)
48+
withTemplate:@""] mutableCopy];
49+
50+
// Remove Kashida
51+
[result replaceOccurrencesOfString:kKashida
52+
withString:@""
53+
options:0
54+
range:NSMakeRange(0, result.length)];
55+
56+
return [self replaceDigits:result];
57+
}
58+
59+
+ (NSString *)replaceDigits:(NSString *)text {
60+
if (text == nil || text.length == 0) {
61+
return nil;
62+
}
63+
64+
unichar firstChar = [text characterAtIndex:0];
65+
if (![self isArabicCharacter:firstChar]) {
66+
return text;
67+
}
68+
69+
NSMutableString *mutableText = [text mutableCopy];
70+
for (NSUInteger i = 0; i < kArabicDigits.length; i++) {
71+
unichar arabicDigit = [kArabicDigits characterAtIndex:i];
72+
NSString *replacement = [NSString stringWithFormat:@"%c", [kDigitsReplacement characterAtIndex:i]];
73+
NSString *arabicDigitStr = [NSString stringWithFormat:@"%C", arabicDigit];
74+
75+
[mutableText replaceOccurrencesOfString:arabicDigitStr
76+
withString:replacement
77+
options:0
78+
range:NSMakeRange(0, mutableText.length)];
79+
}
80+
81+
return mutableText;
82+
}
83+
84+
+ (BOOL)isDiacritic:(unichar)c {
85+
return (c >= 0x064B && c <= 0x0652);
86+
}
87+
88+
+ (BOOL)isArabicDigit:(unichar)c {
89+
return (c >= 0x0660 && c <= 0x0669);
90+
}
91+
92+
+ (BOOL)isKashida:(unichar)c {
93+
return (c == 0x0640);
94+
}
95+
96+
+ (BOOL)isArabicCharacter:(unichar)c {
97+
return (c >= 0x0600 && c <= 0x06FF);
98+
}
99+
100+
@end

Sources/Search/OASearchCoreFactory.mm

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#import "OAResultMatcher.h"
3939
#import "OATopIndexFilter.h"
4040
#import "OACollatorStringMatcher.h"
41+
#import "OAArabicNormalizer.h"
4142

4243
#include <OsmAndCore.h>
4344
#include <OsmAndCore/IObfsCollection.h>
@@ -478,6 +479,10 @@ - (void) searchByName:(OASearchPhrase *)phrase resultMatcher:(OASearchResultMatc
478479
if ([phrase getRadiusLevel] > 1 || [phrase getUnknownWordToSearch].length > 3 || [phrase hasMoreThanOneUnknownSearchWord] || [phrase isSearchTypeAllowed:POSTCODE exclusive:YES])
479480
{
480481
NSString *wordToSearch = [phrase getUnknownWordToSearch];
482+
if ([OAArabicNormalizer isSpecialArabic:wordToSearch]) {
483+
wordToSearch = [OAArabicNormalizer normalize:wordToSearch];
484+
}
485+
481486
if (wordToSearch.length == 0)
482487
return;
483488

@@ -721,6 +726,9 @@ - (BOOL) search:(OASearchPhrase *)phrase resultMatcher:(OASearchResultMatcher *)
721726
NSMutableSet<NSString *> *ids = [NSMutableSet new];
722727

723728
NSString *searchWord = [phrase getUnknownWordToSearch];
729+
if ([OAArabicNormalizer isSpecialArabic:searchWord]) {
730+
searchWord = [OAArabicNormalizer normalize:searchWord];
731+
}
724732
OANameStringMatcher *nm = [phrase getMainUnknownNameStringMatcher];
725733

726734
QuadRect *bbox = [phrase getFileId] != nil ? [phrase getRadiusBBox31ToSearch:BBOX_RADIUS_POI_IN_CITY] : [phrase getRadiusBBox31ToSearch:BBOX_RADIUS_INSIDE];

0 commit comments

Comments
 (0)