Skip to content

Commit 4358b37

Browse files
committed
update
1 parent 3caf530 commit 4358b37

File tree

23 files changed

+2202
-24
lines changed

23 files changed

+2202
-24
lines changed

build/Yunir.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
module Yunir
2+
3+
import Base: download, delete!
4+
5+
abstract type AbstractModel end
6+
7+
include("orthography/orthography.jl")
8+
# include("pos/morphfeats_types.jl")
9+
include("constants.jl")
10+
include("transliterator/transliterate.jl")
11+
include("utils/decode.jl")
12+
include("utils/normalize.jl")
13+
include("utils/dediac.jl")
14+
include("utils/clean.jl")
15+
include("alignment/align.jl")
16+
include("utils/encode.jl")
17+
include("utils/parse.jl")
18+
include("tokenizers/tokenize.jl")
19+
include("alignment/vis.jl")
20+
include("analysis/rhythmic/utils.jl")
21+
include("analysis/rhythmic/vis.jl")
22+
include("analysis/symmetric/utils.jl")
23+
24+
# data
25+
export CAMeLData, MorphologyDB, locate, load
26+
export AR_DIACS_REGEX, AR_VOWELS, BW_VOWELS, BW_ENCODING, DEFAULT_NORMALIZER, PUNCTUATIONS_REGEX, SP_REGEX_CHARS
27+
28+
export expand_archars, isfeat, vocals, numerals, parse, arabic, clean, dediac, encode, normalize, tokenize, disambig, predict, install_camel
29+
export align, score, count_matches, count_aligned, count_mismatches, count_insertions, count_deletions, collect
30+
export Alignment, AbstractCAMeLDB, AbstractEncoder, SimpleEncoding
31+
export @transliterator, genproperties
32+
33+
# Orthography
34+
export AbstractCharacter, AbstractCharacter, AbstractConsonant, AbstractSolar, AbstractLunar,
35+
AbstractVowel, AbstractTanween, AbstractQuranPauseMark
36+
37+
export Tatweel, Orthography, Fatha, Fathatan, Damma, Dammatan, Kasra, Kasratan, Shadda, Sukun, Maddah, HamzaAbove,
38+
HamzaBelow, HamzatWasl, AlifKhanjareeya, SmallHighSeen, SmallHighRoundedZero, SmallHighUprightRectangularZero,
39+
SmallHighMeemIsolatedForm, SmallLowSeen, SmallWaw, SmallYa, SmallHighNoon, EmptyCenterLowStop,
40+
EmptyCenterHighStop, RoundedHighStopWithFilledCenter, SmallLowMeem
41+
42+
# Consonants
43+
export Alif, AlifMaksurah, Ba, Ta, TaMarbuta, Tha, Jeem, HHa, Kha, Dal, Thal, Ra, Zain, Seen, Sheen, Sad,
44+
DDad, TTa, DTha, Ain, Ghain, Fa, Qaf, Kaf, Lam, Meem, Noon, Waw, Ha, Hamza, Ya,
45+
AlifMaddah, AlifHamzaAbove, AlifHamzaBelow, AlifHamzatWasl, WawHamzaAbove, YaHamzaAbove
46+
47+
# Rhyme
48+
export Harakaat, Syllabification, Syllable, Segment, Sequence, sequence, vowel_indices, syllabic_consistency
49+
50+
# Symmetric Analysis
51+
export Slicer, AyahEmbeddings, AyahMidpoints, gen_midpoints, gen_slices, fitness, selection, slicer, five_summary, refine!, mutate!, crossover!
52+
53+
end # module

build/alignment/align.jl

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
using BioAlignments
2+
mutable struct Alignment
3+
alignment::BioAlignments.PairwiseAlignment
4+
score::Int64
5+
end
6+
7+
"""
8+
align(src::String, tgt::String; costmodel::BioAlignments.CostModel=BioAlignments.CostModel(match=0, mismatch=1, insertion=1, deletion=1))
9+
10+
Align `tgt` string to `src` string using a particular `costmodel` from BioAlignments.jl.
11+
"""
12+
function align(src::String, tgt::String; costmodel::BioAlignments.CostModel=BioAlignments.CostModel(match=0, mismatch=1, insertion=1, deletion=1))
13+
res = BioAlignments.pairalign(BioAlignments.EditDistance(), tgt, src, costmodel)
14+
return Alignment(BioAlignments.alignment(res), BioAlignments.score(res))
15+
end
16+
17+
"""
18+
align(src::Array{String}, tgt::Array{String};
19+
costmodel::CostModel=CostModel(match=0, mismatch=1, insertion=1, deletion=0),
20+
store_results::Bool=true
21+
)
22+
23+
ALign `tgt` array of texts to `src` array of texts using a particular `costmodel` from BioAlignments.jl. `store_results` if results of alignment are stored or returned,
24+
otherwise, only the scores are returned.
25+
"""
26+
function align(src::Array{String}, tgt::Array{String};
27+
costmodel::CostModel=CostModel(match=0, mismatch=1, insertion=1, deletion=1),
28+
store_results::Bool=true)
29+
nref = length(src)
30+
ntgt = length(tgt)
31+
scores = Matrix{Int64}(undef, nref, ntgt)
32+
if store_results
33+
results = Matrix{Yunir.Alignment}(undef, nref, ntgt)
34+
end
35+
for i in 1:nref
36+
for j in 1:ntgt
37+
alignres = align(src[i], tgt[j], costmodel=costmodel)
38+
if store_results
39+
results[i, j] = alignres
40+
end
41+
scores[i, j] = score(alignres)
42+
end
43+
if string(i)[end] == '1'
44+
if i != 11
45+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)st reference milestone to all target milestone."
46+
else
47+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)th reference milestone to all target milestone."
48+
end
49+
elseif string(i)[end] == '2'
50+
if i != 12
51+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)nd reference milestone to all target milestone."
52+
else
53+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)th reference milestone to all target milestone."
54+
end
55+
elseif string(i)[end] == '3'
56+
if i != 13
57+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)rd reference milestone to all target milestone."
58+
else
59+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)th reference milestone to all target milestone."
60+
end
61+
else
62+
@info "$(round(i/nref, digits=4)*100)%, aligning $(i)th reference milestone to all target milestone."
63+
end
64+
end
65+
if store_results
66+
return results, scores
67+
else
68+
return scores
69+
end
70+
end
71+
72+
function Base.show(io::IO, t::Alignment)
73+
println(io, "PairwiseAlignment")
74+
println(io, "١-reference")
75+
println(io, "٢-target\n")
76+
print(io, print_align(t.alignment))
77+
end
78+
79+
function print_align(out)
80+
text_a = "٢ "
81+
text_b = "١ "
82+
text_m = [" "]
83+
output = ""
84+
85+
out = collect(out)
86+
nout = length(out)
87+
j = 1
88+
for i in out
89+
if i[1] == '-'
90+
a = "_"
91+
else
92+
a = i[1]
93+
end
94+
95+
if i[2] == '-'
96+
b = "_"
97+
else
98+
b = i[2]
99+
end
100+
101+
if i[1] == i[2]
102+
m = "ا"
103+
else
104+
m = " "
105+
end
106+
107+
if nout < 60
108+
if j != nout
109+
text_a *= a
110+
text_b *= b
111+
push!(text_m, m)
112+
else
113+
text_a = arabic(text_a) *
114+
"\n" * join(text_m) *
115+
"\n" * arabic(text_b) * "\n\n"
116+
output = output * text_a
117+
end
118+
else
119+
if j % 60 == 0
120+
text_a = arabic(text_a) *
121+
"\n" * join(text_m) *
122+
"\n" * arabic(text_b) * "\n\n"
123+
output = output * text_a
124+
text_a = "٢ "
125+
text_b = "١ "
126+
text_m = [" "]
127+
else
128+
text_a *= a
129+
text_b *= b
130+
push!(text_m, m)
131+
end
132+
end
133+
j += 1
134+
end
135+
return output
136+
end
137+
138+
function BioAlignments.count_matches(res::Yunir.Alignment)
139+
BioAlignments.count_matches(res.alignment)
140+
end
141+
142+
function BioAlignments.count_aligned(res::Yunir.Alignment)
143+
BioAlignments.count_aligned(res.alignment)
144+
end
145+
146+
function BioAlignments.count_deletions(res::Yunir.Alignment)
147+
BioAlignments.count_deletions(res.alignment)
148+
end
149+
150+
function BioAlignments.count_insertions(res::Yunir.Alignment)
151+
BioAlignments.count_insertions(res.alignment)
152+
end
153+
154+
function BioAlignments.count_mismatches(res::Yunir.Alignment)
155+
BioAlignments.count_mismatches(res.alignment)
156+
end
157+
158+
function BioAlignments.collect(res::Yunir.Alignment)
159+
BioAlignments.collect(res.alignment)
160+
end
161+
162+
function BioAlignments.score(res::Yunir.Alignment)
163+
res.score
164+
end

0 commit comments

Comments
 (0)