Skip to content

Commit 78baae7

Browse files
committed
Added support for CJK radicals.
Addded CJK radical & stroke count information to the UI.
1 parent 475e9c5 commit 78baae7

18 files changed

+402
-57
lines changed

UnicodeCharacterInspector/App.xaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
<BooleanToVisibilityConverter x:Key="BooleanToVisibilityConverter" />
88
<local:NullToVisibilityConverter x:Key="NullToVisibilityConverter" />
99
<local:ZeroToVisibilityConverter x:Key="ZeroToVisibilityConverter" />
10-
<local:Utf32ToDisplayTextConverter x:Key="Utf32ToStringConverter" />
10+
<local:Utf32ToDisplayTextConverter x:Key="Utf32ToDisplayTextConverter" />
1111
<local:Utf32ToNameConverter x:Key="Utf32ToNameConverter" />
12+
<local:StringToUtf32Converter x:Key="StringToUtf32Converter" />
13+
<local:RadicalStrokeCountToCharConverter x:Key="RadicalStrokeCountToCharConverter" />
1214
</Application.Resources>
1315
</Application>

UnicodeCharacterInspector/CharacterInfoViewModel.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ public string Character
6767
NotifyPropertyChanged(nameof(NumericValue));
6868
NotifyPropertyChanged(nameof(ContributoryProperties));
6969
NotifyPropertyChanged(nameof(CoreProperties));
70+
NotifyPropertyChanged(nameof(RadicalStrokeCounts));
7071
NotifyPropertyChanged(nameof(CrossReferences));
7172
NotifyPropertyChanged(nameof(MandarinReading));
7273
NotifyPropertyChanged(nameof(CantoneseReading));
@@ -173,6 +174,11 @@ public CoreProperties? CoreProperties
173174
get { return character != null ? characterInfo.CoreProperties : null as CoreProperties?; }
174175
}
175176

177+
public UnicodeRadicalStrokeCountCollection RadicalStrokeCounts
178+
{
179+
get { return character != null ? characterInfo.UnicodeRadicalStrokeCounts : new UnicodeRadicalStrokeCountCollection(); }
180+
}
181+
176182
public UnicodeCrossReferenceCollection CrossReferences
177183
{
178184
get { return character != null ? characterInfo.CrossRerefences : new UnicodeCrossReferenceCollection(); }

UnicodeCharacterInspector/MainWindow.xaml

Lines changed: 56 additions & 43 deletions
Large diffs are not rendered by default.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
using System;
2+
using System.Globalization;
3+
using System.Unicode;
4+
using System.Windows.Data;
5+
6+
namespace UnicodeCharacterInspector
7+
{
8+
internal sealed class RadicalStrokeCountToCharConverter : IValueConverter
9+
{
10+
public object Convert(object value, Type targetType, object parameter, CultureInfo culture)
11+
{
12+
if (value == null) return null;
13+
14+
var radicalStrokeCount = (UnicodeRadicalStrokeCount)value;
15+
var radical = UnicodeInfo.GetCjkRadicalInfo(radicalStrokeCount.Radical);
16+
17+
return radicalStrokeCount.IsSimplified ? radical.TraditionalRadicalCodePoint : radical.SimplifiedRadicalCodePoint;
18+
}
19+
20+
public object ConvertBack(object value, Type targetType, object parameter, CultureInfo culture)
21+
{
22+
throw new NotSupportedException();
23+
}
24+
}
25+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Globalization;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
using System.Windows.Data;
8+
9+
namespace UnicodeCharacterInspector
10+
{
11+
internal class StringToUtf32Converter : IValueConverter
12+
{
13+
public object Convert(object value, Type targetType, object parameter, CultureInfo culture)
14+
{
15+
string text = value as string;
16+
17+
if (!string.IsNullOrEmpty(text)) return char.ConvertToUtf32(text, 0);
18+
else return null;
19+
}
20+
21+
public object ConvertBack(object value, Type targetType, object parameter, CultureInfo culture)
22+
{
23+
throw new NotImplementedException();
24+
}
25+
}
26+
}

UnicodeCharacterInspector/UnicodeCharacterInspector.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@
7777
<Compile Include="PlaceholderAdorner.cs">
7878
<DependentUpon>Placeholder.cs</DependentUpon>
7979
</Compile>
80+
<Compile Include="RadicalStrokeCountToCharConverter.cs" />
81+
<Compile Include="StringToUtf32Converter.cs" />
8082
<Compile Include="Utf32ToNameConverter.cs" />
8183
<Compile Include="Utf32ToDisplayTextConverter.cs" />
8284
<Compile Include="ZeroToVisibilityConverter.cs" />

UnicodeInformation.Builder/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ internal class Program
2121
"UnicodeData.txt",
2222
"PropList.txt",
2323
"DerivedCoreProperties.txt",
24+
"CJKRadicals.txt",
2425
//"Jamo.txt", // Not used right now, as the hangul syllable algorithm implementation takes care of this.
2526
"NameAliases.txt",
2627
"NamesList.txt",

UnicodeInformation.Builder/UnicodeCharacterDataBuilder.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public string Name
3737
set { name = value; }
3838
}
3939

40-
public ICollection<UnicodeNameAlias> NameAliases { get { return nameAliases; } }
40+
public IList<UnicodeNameAlias> NameAliases { get { return nameAliases; } }
4141

4242
public UnicodeCategory Category
4343
{
@@ -124,7 +124,7 @@ public CoreProperties CoreProperties
124124
set { coreProperties = value; }
125125
}
126126

127-
public ICollection<int> CrossRerefences { get { return crossRerefences; } }
127+
public IList<int> CrossRerefences { get { return crossRerefences; } }
128128

129129
public UnicodeCharacterDataBuilder(int codePoint)
130130
: this(new UnicodeCharacterRange(codePoint))

UnicodeInformation.Builder/UnicodeDataProcessor.cs

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ internal class UnicodeDataProcessor
1313
public const string UnicodeDataFileName = "UnicodeData.txt";
1414
public const string PropListFileName = "PropList.txt";
1515
public const string DerivedCorePropertiesFileName = "DerivedCoreProperties.txt";
16+
public const string CjkRadicalsFileName = "CJKRadicals.txt";
1617
public const string NameAliasesFileName = "NameAliases.txt";
1718
public const string NamesListFileName = "NamesList.txt";
1819
public const string BlocksFileName = "Blocks.txt";
@@ -40,6 +41,7 @@ public static async Task<UnicodeInfoBuilder> BuildDataAsync(IDataSource ucdSourc
4041
await ProcessUnicodeDataFile(ucdSource, builder).ConfigureAwait(false);
4142
await ProcessPropListFile(ucdSource, builder).ConfigureAwait(false);
4243
await ProcessDerivedCorePropertiesFile(ucdSource, builder).ConfigureAwait(false);
44+
await ProcessCjkRadicalsFile(ucdSource, builder).ConfigureAwait(false);
4345
await ProcessNameAliasesFile(ucdSource, builder).ConfigureAwait(false);
4446
await ProcessNamesListFile(ucdSource, builder).ConfigureAwait(false);
4547
await ProcessBlocksFile(ucdSource, builder).ConfigureAwait(false);
@@ -202,6 +204,53 @@ private static async Task ProcessDerivedCorePropertiesFile(IDataSource ucdSource
202204
}
203205
}
204206

207+
private static async Task ProcessCjkRadicalsFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
208+
{
209+
using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(CjkRadicalsFileName).ConfigureAwait(false), ';'))
210+
{
211+
int lastReadRadical = 0;
212+
213+
while (reader.MoveToNextLine())
214+
{
215+
string radicalIndexText = reader.ReadField();
216+
bool isSimplified = radicalIndexText[radicalIndexText.Length - 1] == '\'';
217+
int radicalIndex = int.Parse(isSimplified ? radicalIndexText.Substring(0, radicalIndexText.Length - 1) : radicalIndexText);
218+
219+
if (isSimplified ? radicalIndex != lastReadRadical : lastReadRadical + 1 != (lastReadRadical = radicalIndex))
220+
throw new InvalidDataException("Did not expect radical number " + radicalIndexText + ".");
221+
222+
char radicalCodePoint = checked((char)int.Parse(reader.ReadTrimmedField(), NumberStyles.HexNumber));
223+
char characterCodePoint = checked((char)int.Parse(reader.ReadTrimmedField(), NumberStyles.HexNumber));
224+
225+
if (!isSimplified && (radicalCodePoint & 0x8000) != 0)
226+
throw new InvalidOperationException("Did not expect the radical code point to be higher than U+8000 for radical " + radicalIndex.ToString() + ".");
227+
228+
if (isSimplified)
229+
{
230+
builder.SetRadicalInfo(radicalIndex, UpdateRadicalData(builder.GetRadicalInfo(radicalIndex), radicalCodePoint, characterCodePoint));
231+
}
232+
else
233+
{
234+
builder.SetRadicalInfo(radicalIndex, new CjkRadicalData(radicalCodePoint, characterCodePoint));
235+
}
236+
}
237+
238+
if (lastReadRadical != UnicodeInfoBuilder.CjkRadicalCount)
239+
throw new InvalidOperationException("There was not enough data for the 214 CJK radicals.");
240+
}
241+
}
242+
243+
private static CjkRadicalData UpdateRadicalData(CjkRadicalData traditionalData, char simplifiedRadicalCodePoint, char simplifiedCharacterCodePoint)
244+
{
245+
return new CjkRadicalData
246+
(
247+
traditionalData.TraditionalRadicalCodePoint,
248+
traditionalData.TraditionalCharacterCodePoint,
249+
simplifiedRadicalCodePoint,
250+
simplifiedCharacterCodePoint
251+
);
252+
}
253+
205254
private static async Task ProcessNameAliasesFile(IDataSource ucdSource, UnicodeInfoBuilder builder)
206255
{
207256
using (var reader = new UnicodeDataFileReader(await ucdSource.OpenDataFileAsync(NameAliasesFileName).ConfigureAwait(false), ';'))
@@ -475,7 +524,7 @@ private static async Task ProcessUnihanIrgSources(IDataSource unihanDataSource,
475524
SeparatorFound: ;
476525
entry.UnicodeRadicalStrokeCounts.Add(new UnicodeRadicalStrokeCount(byte.Parse(value.Substring(0, index), NumberStyles.None), byte.Parse(value.Substring(index + (isSimplified ? 2 : 1)), NumberStyles.None), isSimplified));
477526
}
478-
break;
527+
break;
479528
default:
480529
// Ignore unhandled properties for now.
481530
break;

UnicodeInformation.Builder/UnicodeInfoBuilder.cs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ namespace System.Unicode.Builder
99
{
1010
internal class UnicodeInfoBuilder
1111
{
12+
public const int CjkRadicalCount = 214; // The number of radicals (214) shouldn't change in the near future…
13+
1214
private readonly Version unicodeVersion;
1315
private UnicodeCharacterDataBuilder[] ucdEntries = new UnicodeCharacterDataBuilder[10000];
1416
private int ucdEntryCount;
1517
private UnihanCharacterDataBuilder[] unihanEntries = new UnihanCharacterDataBuilder[10000];
1618
private int unihanEntryCount;
17-
private List<UnicodeBlock> blockEntries = new List<UnicodeBlock>(100);
19+
private readonly List<UnicodeBlock> blockEntries = new List<UnicodeBlock>(100);
20+
private readonly CjkRadicalData[] cjkRadicals = new CjkRadicalData[CjkRadicalCount];
1821

1922
public UnicodeInfoBuilder(Version unicodeVersion)
2023
{
@@ -238,6 +241,20 @@ public void SetProperties(CoreProperties property, UnicodeCharacterRange codePoi
238241
}
239242
}
240243

244+
public void SetRadicalInfo(int radicalIndex, CjkRadicalData data)
245+
{
246+
if (radicalIndex < 1 || radicalIndex > CjkRadicalCount) throw new ArgumentOutOfRangeException(nameof(radicalIndex));
247+
248+
cjkRadicals[radicalIndex - 1] = data;
249+
}
250+
251+
public CjkRadicalData GetRadicalInfo(int radicalIndex)
252+
{
253+
if (radicalIndex < 1 || radicalIndex > CjkRadicalCount) throw new ArgumentOutOfRangeException(nameof(radicalIndex));
254+
255+
return cjkRadicals[radicalIndex - 1];
256+
}
257+
241258
public void AddBlockEntry(UnicodeBlock block)
242259
{
243260
blockEntries.Add(block);
@@ -283,6 +300,20 @@ public void WriteToStream(Stream stream)
283300
{
284301
WriteUnicodeBlockToFile(writer, blockEntries[i]);
285302
}
303+
writer.Write((byte)CjkRadicalCount);
304+
for (int i = 0; i < cjkRadicals.Length; ++i)
305+
{
306+
var radical = cjkRadicals[i];
307+
308+
writer.Write((ushort)(radical.HasSimplifiedForm ? 0x8000 | radical.TraditionalRadicalCodePoint : radical.TraditionalRadicalCodePoint));
309+
writer.Write((ushort)radical.TraditionalCharacterCodePoint);
310+
311+
if (radical.HasSimplifiedForm)
312+
{
313+
writer.Write((ushort)radical.SimplifiedRadicalCodePoint);
314+
writer.Write((ushort)radical.SimplifiedCharacterCodePoint);
315+
}
316+
}
286317
writer.WriteCodePoint(unihanEntryCount);
287318
for (int i = 0; i < unihanEntryCount; ++i)
288319
{

0 commit comments

Comments
 (0)