-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTestUnicode.ArabicShaping.pas
More file actions
148 lines (124 loc) · 4.28 KB
/
TestUnicode.ArabicShaping.pas
File metadata and controls
148 lines (124 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
unit TestUnicode.ArabicShaping;
interface
uses
Generics.Collections,
Windows, Classes, SysUtils,
PascalType.Unicode,
TestFramework;
type
TTestPascalTypeUnicodeArabicShaping = class(TTestCase)
public
procedure SetUp; override;
published
procedure TestArabicShapingData;
end;
implementation
uses
IOUtils,
TestUnicode;
const
sUnicodeDataFolder = '..\..\Source\Unicode\UCD';
sArabicShapingFileName = 'ArabicShaping.txt';
procedure TTestPascalTypeUnicodeArabicShaping.SetUp;
begin
inherited;
ArabicShapingClasses.Load;
end;
procedure TTestPascalTypeUnicodeArabicShaping.TestArabicShapingData;
function HexToCardinal(const Hex: string): TPascalTypeCodePoint;
begin
Result := StrToInt('$'+Hex);
end;
function HexToCardinals(const Hex: string): TPascalTypeCodePoints;
begin
var Values := Hex.Split([' ']);
SetLength(Result, Length(Values));
for var i := 0 to High(Values) do
Result[i] := HexToCardinal(Values[i]);
end;
begin
inherited;
(*
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
# form, of a character.
#
# Field 1: gives a short schematic name for that character.
# The schematic name is descriptive of the shape, based as
# consistently as possible on a name for the skeleton and
# then the diacritic marks applied to the skeleton, if any.
# Note that this schematic name is considered a comment,
# and does not constitute a formal property value.
#
# Field 2: defines the joining type (property name: Joining_Type)
# R Right_Joining
# L Left_Joining
# D Dual_Joining
# C Join_Causing
# U Non_Joining
# T Transparent
#
# See Section 9.2, Arabic for more information on these joining types.
# Note that for cursive joining scripts which are typically rendered
# top-to-bottom, rather than right-to-left, Joining_Type=L conventionally
# refers to bottom joining, and Joining_Type=R conventionally refers
# to top joining. See Section 14.4, Phags-pa for more information on the
# interpretation of joining types in vertical layout.
#
# Field 3: defines the joining group (property name: Joining_Group)
#
# The values of the joining group are based schematically on character
# names. Where a schematic character name consists of two or more parts
# separated by spaces, the formal Joining_Group property value, as specified in
# PropertyValueAliases.txt, consists of the same name parts joined by
# underscores. Hence, the entry:
#
# 0629; TEH MARBUTA; R; TEH MARBUTA
#
# corresponds to [Joining_Group = Teh_Marbuta].
*)
var Reader := TStreamReader.Create(TPath.Combine(sUnicodeDataFolder, sArabicShapingFileName), TEncoding.UTF8);
try
var LineNumber := 0;
while (not Reader.EndOfStream) do
begin
// # Unicode; Schematic Name; Joining Type; Joining Group
Inc(LineNumber);
var Line := Reader.ReadLine.Trim;
if (Line = '') or (Line.StartsWith('#')) then
continue;
var Columns := Line.Split([';']);
if (Length(Columns) < 4) then
continue;
var Code: Cardinal := StrToInt('$'+Columns[0].Trim);
var JoiningType: Char := Columns[2].Trim[1];
var ShapingClassValue: ArabicShapingClasses.TShapingClass := scUnassigned;
if (JoiningType = 'R') then
begin
var JoiningGroup := Columns[3].Trim;
if (JoiningGroup = 'ALAPH') then
ShapingClassValue := scALAPH
else
if (JoiningGroup = 'DALATH RISH') then
ShapingClassValue := scDALATH_RISH;
end;
if (ShapingClassValue = scUnassigned) then
case JoiningType of
'U': ShapingClassValue := scNon_Joining; // Non_Joining
'L': ShapingClassValue := scLeft_Joining; // Left_Joining
'R': ShapingClassValue := scRight_Joining; // Right_Joining
'D': ShapingClassValue := scDual_Joining; // Dual_Joining
'C': ShapingClassValue := scDual_Joining; // Join_Causing
'T': ShapingClassValue := scTransparent; // Transparent
end;
if (ShapingClassValue <> scUnassigned) then
CheckEquals(Ord(ShapingClassValue), Ord(ArabicShapingClasses.Trie.Values[Code]), Format('Line #%d', [LineNumber]));
end;
finally
Reader.Free;
end;
end;
initialization
TestSuiteUnicode.AddSuite(TTestPascalTypeUnicodeArabicShaping.Suite);
end.