Skip to content
This repository was archived by the owner on Oct 10, 2025. It is now read-only.

Commit 2661cb1

Browse files
committed
Cornelis's work and Dirk's work
Merge branch 'master' of https://github.com/among/fusus
2 parents fba7820 + 663ade5 commit 2661cb1

18 files changed

+221295
-44509
lines changed

fusust-text-laboratory/FixingBadSplits.ipynb

Lines changed: 3696 additions & 0 deletions
Large diffs are not rendered by default.

fusust-text-laboratory/FususReworking.ipynb

Lines changed: 4978 additions & 2967 deletions
Large diffs are not rendered by default.

fusust-text-laboratory/SplittingWords.ipynb

Lines changed: 3915 additions & 0 deletions
Large diffs are not rendered by default.

fusust-text-laboratory/TagTitles.ipynb

Lines changed: 761 additions & 0 deletions
Large diffs are not rendered by default.

fusust-text-laboratory/TaggingQuranCitations.ipynb

Lines changed: 4731 additions & 0 deletions
Large diffs are not rendered by default.

fusust-text-laboratory/arabicABC.py

Lines changed: 209 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
1+
QURANCLOSE = '﴾'
2+
QURANOPEN = '﴿'
23
COMMA = '\u060C'
34
SEMICOLON = '\u061B'
45
QUESTION = '\u061F'
@@ -92,6 +93,7 @@
9293
HAMZA, ALEF_MADDA, ALEF_HAMZA_ABOVE, WAW_HAMZA, ALEF_HAMZA_BELOW,
9394
YEH_HAMZA, ALEF_MAKSURA, ALEF_WASLA, HAMZA_ABOVE, HAMZA_BELOW
9495
)
96+
NUMBERS = (ZERO, ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE)
9597

9698
TASHKEEL = (FATHATAN, DAMMATAN, KASRATAN,
9799
FATHA, DAMMA, KASRA,
@@ -217,4 +219,210 @@
217219
KASRA: 'كسرة',
218220
SHADDA: 'شدة',
219221
SUKUN: 'سكون',
222+
}
223+
224+
QURAN_SURAS = {
225+
'آلعمران'
226+
:3
227+
,
228+
'غافر'
229+
:40
230+
,
231+
'الروم'
232+
:30
233+
,
234+
'يونس'
235+
:10
236+
,
237+
'مريم'
238+
:19
239+
,
240+
'فصلت'
241+
:41
242+
,
243+
'العراف'
244+
:7
245+
,
246+
'القيامة'
247+
:75
248+
,
249+
'الواقعة'
250+
:56
251+
,
252+
'الصافات'
253+
:37
254+
,
255+
'الكهف'
256+
:18
257+
,
258+
'الأنبياء'
259+
:21
260+
,
261+
'البقرة'
262+
:2
263+
,
264+
'الأحزاب'
265+
:33
266+
,
267+
'الأحقاف'
268+
:46
269+
,
270+
'المائدة'
271+
:5
272+
,
273+
'التحريم'
274+
:66
275+
,
276+
'البينة'
277+
:98
278+
,
279+
'الجاثية'
280+
:45
281+
,
282+
'الحجر'
283+
:15
284+
,
285+
'السجدة'
286+
:32
287+
,
288+
'التكوير'
289+
:81
290+
,
291+
'الفاتحة'
292+
:1
293+
,
294+
'النحل'
295+
:16
296+
,
297+
'النور'
298+
:24
299+
,
300+
'ق'
301+
:50
302+
,
303+
'الأنفال'
304+
:8
305+
,
306+
'الزلزال'
307+
:99
308+
,
309+
'الإسراء'
310+
:17
311+
,
312+
'الزخرف'
313+
:43
314+
,
315+
'الرعد'
316+
:13
317+
,
318+
'الرحمن'
319+
:55
320+
,
321+
'القصص'
322+
:28
323+
,
324+
'التوبة'
325+
:9
326+
,
327+
'العنكبوت'
328+
:29
329+
,
330+
'النمل'
331+
:27
332+
,
333+
'فاطر'
334+
:35
335+
,
336+
'الحديد'
337+
:57
338+
,
339+
'الفتح'
340+
:48
341+
,
342+
'الفرقان'
343+
:25
344+
,
345+
'لقمان'
346+
:31
347+
,
348+
'نوح'
349+
:71
350+
,
351+
'يس'
352+
:36
353+
,
354+
'ص'
355+
:38
356+
,
357+
'الحشر'
358+
:59
359+
,
360+
'القلم'
361+
:68
362+
,
363+
'يوسف'
364+
:12
365+
,
366+
'المزمل'
367+
:73
368+
,
369+
'الشورى'
370+
:42
371+
,
372+
'الأنعام'
373+
:6
374+
,
375+
'الزمر'
376+
:39
377+
,
378+
'عبس'
379+
:80
380+
,
381+
'الحج'
382+
:22
383+
,
384+
'الفجر'
385+
:89
386+
,
387+
'محمد'
388+
:47
389+
,
390+
'سبأ'
391+
:34
392+
,
393+
'النازعات'
394+
:79
395+
,
396+
'الشعراء'
397+
:26
398+
,
399+
'طه'
400+
:20
401+
,
402+
'النساء'
403+
:4
404+
,
405+
'الإخلاص'
406+
:112
407+
,
408+
'هود'
409+
:11
410+
,
411+
'ابراهيم'
412+
:14
413+
,
414+
'الإسرى'
415+
:17
416+
,
417+
'الأعراف'
418+
:7
419+
,
420+
'اقتباسيونس'
421+
:10
422+
,
423+
'اقتباسيوسف'
424+
:12
425+
,
426+
'الأنبيا'
427+
:21
220428
}

0 commit comments

Comments
 (0)