Skip to content

Commit 2b4465e

Browse files
committed
New mode for Harakat
+ Updated letters + Updated ligatures New mode for Harakat to shift Harakat position for bidi get_display
1 parent 0839a95 commit 2b4465e

File tree

6 files changed

+51
-16
lines changed

6 files changed

+51
-16
lines changed

arabic_reshaper/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '2.0.11'
1+
__version__ = '2.0.12'

arabic_reshaper/arabic_reshaper.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,15 +167,24 @@ def reshape(self, text):
167167
delete_harakat = self.configuration.getboolean('delete_harakat')
168168
delete_tatweel = self.configuration.getboolean('delete_tatweel')
169169
support_zwj = self.configuration.getboolean('support_zwj')
170+
shift_harakat_position = self.configuration.getboolean(
171+
'shift_harakat_position'
172+
)
173+
170174
positions_harakat = {}
171175

172176
for letter in text:
173177
if HARAKAT_RE.match(letter):
174178
if not delete_harakat:
175179
position = len(output) - 1
180+
if shift_harakat_position:
181+
position -= 1
176182
if position not in positions_harakat:
177183
positions_harakat[position] = []
178-
positions_harakat[position].append(letter)
184+
if shift_harakat_position:
185+
positions_harakat[position].insert(0, letter)
186+
else:
187+
positions_harakat[position].append(letter)
179188
elif letter == TATWEEL and delete_tatweel:
180189
pass
181190
elif letter == ZWJ and not support_zwj:

arabic_reshaper/default-config.ini

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ language = Arabic
66
# Whether to delete the Harakat (Tashkeel) before reshaping or not.
77
delete_harakat = yes
88

9+
# Whether to shift the Harakat (Tashkeel) one position so they appear correctly when string is reversed
10+
shift_harakat_position = no
11+
912
# Whether to delete the Tatweel (U+0640) before reshaping or not.
1013
delete_tatweel = no
1114

@@ -189,6 +192,7 @@ ARABIC LIGATURE QAF WITH MEEM WITH HAH = no
189192
ARABIC LIGATURE QAF WITH MEEM WITH MEEM = no
190193
ARABIC LIGATURE QAF WITH MEEM WITH YEH = no
191194
ARABIC LIGATURE QAF WITH YEH = no
195+
ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN = no
192196
ARABIC LIGATURE REH WITH SUPERSCRIPT ALEF = no
193197
ARABIC LIGATURE SAD WITH ALEF MAKSURA = no
194198
ARABIC LIGATURE SAD WITH HAH = no
@@ -199,6 +203,7 @@ ARABIC LIGATURE SAD WITH MEEM = no
199203
ARABIC LIGATURE SAD WITH MEEM WITH MEEM = no
200204
ARABIC LIGATURE SAD WITH REH = no
201205
ARABIC LIGATURE SAD WITH YEH = no
206+
ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN = no
202207
ARABIC LIGATURE SEEN WITH ALEF MAKSURA = no
203208
ARABIC LIGATURE SEEN WITH HAH = no
204209
ARABIC LIGATURE SEEN WITH HAH WITH JEEM = no
@@ -215,18 +220,19 @@ ARABIC LIGATURE SEEN WITH MEEM WITH JEEM = no
215220
ARABIC LIGATURE SEEN WITH MEEM WITH MEEM = no
216221
ARABIC LIGATURE SEEN WITH REH = no
217222
ARABIC LIGATURE SEEN WITH YEH = no
218-
ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM = no
219-
ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM = no
220-
ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM = no
223+
ARABIC LIGATURE SHADDA WITH DAMMA = no
221224
ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM = no
222-
ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM = no
223-
ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM = no
224-
ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM = no
225225
ARABIC LIGATURE SHADDA WITH DAMMA MEDIAL FORM = no
226-
ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM = no
226+
ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM = no
227227
ARABIC LIGATURE SHADDA WITH FATHA = no
228-
ARABIC LIGATURE SHADDA WITH DAMMA = no
228+
ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM = no
229+
ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM = no
229230
ARABIC LIGATURE SHADDA WITH KASRA = no
231+
ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM = no
232+
ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM = no
233+
ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM = no
234+
ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF = no
235+
ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM = no
230236
ARABIC LIGATURE SHEEN WITH ALEF MAKSURA = no
231237
ARABIC LIGATURE SHEEN WITH HAH = no
232238
ARABIC LIGATURE SHEEN WITH HAH WITH MEEM = no

arabic_reshaper/letters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@
9494
'\u0647': ('\uFEE9', '\uFEEB', '\uFEEC', '\uFEEA'),
9595
# ARABIC LETTER WAW
9696
'\u0648': ('\uFEED', '', '', '\uFEEE'),
97-
# ARABIC LETTER ALEF MAKSURA
98-
'\u0649': ('\uFEEF', '', '', '\uFEF0'),
97+
# ARABIC LETTER (UIGHUR KAZAKH KIRGHIZ)? ALEF MAKSURA
98+
'\u0649': ('\uFEEF', '\uFBE8', '\uFBE9', '\uFEF0'),
9999
# ARABIC LETTER YEH
100100
'\u064A': ('\uFEF1', '\uFEF3', '\uFEF4', '\uFEF2'),
101101
# ARABIC LETTER ALEF WASLA

arabic_reshaper/ligatures.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,9 @@
583583
('ARABIC LIGATURE QAF WITH YEH', (
584584
'\u0642\u064A', ('\uFC36', '', '', '\uFC7F'),
585585
)),
586+
('ARABIC LIGATURE QALA USED AS KORANIC STOP SIGN', (
587+
'\u0642\u0644\u06D2', ('\uFDF1', '', '', ''),
588+
)),
586589
('ARABIC LIGATURE REH WITH SUPERSCRIPT ALEF', (
587590
'\u0631\u0670', ('\uFC5C', '', '', ''),
588591
)),
@@ -613,6 +616,9 @@
613616
('ARABIC LIGATURE SAD WITH YEH', (
614617
'\u0635\u064A', ('\uFD06', '', '', '\uFD22'),
615618
)),
619+
('ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN', (
620+
'\u0635\u0644\u06D2', ('\uFDF0', '', '', ''),
621+
)),
616622
('ARABIC LIGATURE SEEN WITH ALEF MAKSURA', (
617623
'\u0633\u0649', ('\uFCFB', '', '', '\uFD17'),
618624
)),
@@ -688,10 +694,8 @@
688694

689695
('\uFC62', '\uFC62', '\uFC62', '\uFC62'),
690696
)),
691-
('ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM', (
692-
'(?:\u0670\u0651|\u0651\u0670)',
693-
694-
('\uFC63', '\uFC63', '\uFC63', '\uFC63'),
697+
('ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF', (
698+
'(?:\u0651\u0670|\u0670\u0651)', ('\uFC63', '', '', ''),
695699
)),
696700

697701
# There is a special case when they are with Tatweel

arabic_reshaper/tests/test_002_reshaping.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,22 @@ def test_reshaping(self):
177177
_reshaping_test(self)
178178

179179

180+
class TestReshapingWithShiftedHarakatWithoutLigatures(unittest.TestCase):
181+
def setUp(self):
182+
self.reshaper = arabic_reshaper.ArabicReshaper({
183+
'delete_harakat': False,
184+
'support_ligatures': False,
185+
'shift_harakat_position': True,
186+
})
187+
self.cases = (
188+
('فُعِلَ', 'ُﻓِﻌَﻞ'),
189+
('فُعِّلَ', 'ُﻓِّﻌَﻞ'),
190+
)
191+
192+
def test_reshaping(self):
193+
_reshaping_test(self)
194+
195+
180196
class TestReshapingSomeLigatures(unittest.TestCase):
181197
def setUp(self):
182198
self.reshaper = arabic_reshaper.ArabicReshaper({

0 commit comments

Comments
 (0)