@@ -166,9 +166,13 @@ def reshape(self, text):
166166
167167 delete_harakat = self .configuration .getboolean ('delete_harakat' )
168168 delete_tatweel = self .configuration .getboolean ('delete_tatweel' )
169+ support_zwj = self .configuration .getboolean ('support_zwj' )
169170 positions_harakat = {}
170171
171- for letter in text :
172+ arabic_word_start = - 1
173+ zwjs = []
174+
175+ for i , letter in enumerate (text ):
172176 if HARAKAT_RE .match (letter ):
173177 if not delete_harakat :
174178 position = len (output ) - 1
@@ -177,13 +181,51 @@ def reshape(self, text):
177181 positions_harakat [position ].append (letter )
178182 elif letter == TATWEEL and delete_tatweel :
179183 pass
184+ elif letter == ZWJ and support_zwj :
185+ zwjs .append (i )
186+
187+ if arabic_word_start != - 1 :
188+ # Handle three consecutive ZWJs or more
189+ if (
190+ len (zwjs ) > 2 and
191+ zwjs [- 2 ] == i - 1 and
192+ zwjs [- 3 ] == i - 2
193+ ):
194+ arabic_word_start = - 1
195+ # Handle when previous letter is not ZWJ
196+ elif (
197+ output and
198+ len (zwjs ) == 1 or (len (zwjs ) > 1 and zwjs [- 2 ] != i - 1 )
199+ ):
200+ previous_letter = output [- 1 ]
201+ if connects_with_letter_after (previous_letter [LETTER ]):
202+ if previous_letter [FORM ] == ISOLATED :
203+ output [- 1 ] = (
204+ previous_letter [LETTER ],
205+ INITIAL
206+ )
207+ else :
208+ output [- 1 ] = (
209+ previous_letter [LETTER ],
210+ MEDIAL
211+ )
180212 elif letter not in LETTERS :
213+ arabic_word_start = - 1
181214 output .append ((letter , NOT_SUPPORTED ))
182- elif not output :
215+ elif not output : # first letter
216+ arabic_word_start = i
183217 output .append ((letter , ISOLATED ))
184218 else :
219+ if arabic_word_start == - 1 :
220+ arabic_word_start = i
185221 previous_letter = output [- 1 ]
186- if previous_letter [FORM ] == NOT_SUPPORTED :
222+ if (
223+ arabic_word_start != i and
224+ zwjs and
225+ connects_with_letter_before (letter )
226+ ):
227+ output .append ((letter , FINAL ))
228+ elif previous_letter [FORM ] == NOT_SUPPORTED :
187229 output .append ((letter , ISOLATED ))
188230 elif not connects_with_letter_before (letter ):
189231 output .append ((letter , ISOLATED ))
@@ -211,6 +253,10 @@ def reshape(self, text):
211253 )
212254 output .append ((letter , FINAL ))
213255
256+ # clear ZWJs
257+ if zwjs and letter != ZWJ :
258+ zwjs = []
259+
214260 if self .configuration .getboolean ('support_ligatures' ):
215261 # Clean text from Harakat to be able to find ligatures
216262 text = HARAKAT_RE .sub ('' , text )
0 commit comments