Skip to content

Commit 38f293a

Browse files
authored
fix(matcher): perform unicode NFKC before matching (#76)
1 parent 224b389 commit 38f293a

File tree

5 files changed

+146
-2
lines changed

5 files changed

+146
-2
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using SubRenamer.Core;
2+
using SubRenamer.Helper;
3+
4+
namespace SubRenamer.Tests.MatcherTests;
5+
6+
/// <summary>
7+
/// Test for filename normalization
8+
///
9+
/// NFKC is means Unicode Normalization Form KC (Compatibility Composition)
10+
/// https://unicode.org/reports/tr15/
11+
/// </summary>
12+
[TestFixture]
13+
public class FilenameNfkcTests
14+
{
15+
[Test]
16+
public void Basic()
17+
{
18+
var normalizer = new MatcherFilenameNormalizer();
19+
List<MatchItem> originalItems = [
20+
new("", "\u30CF\u309A", "\u30D5\u3099"),
21+
];
22+
23+
var normalizedItems = normalizer.Normalize(originalItems);
24+
25+
Assert.That(normalizedItems, Is.EqualTo([
26+
new MatchItem("", "\u30D1", "\u30D6"),
27+
]), "Normalize");
28+
29+
Assert.That(normalizer.Denormalize(normalizedItems), Is.EqualTo(originalItems), "Denormalize");
30+
31+
normalizer.Clear();
32+
}
33+
}

SubRenamer.Tests/MatcherTests/TopLevelTests.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Text.Json;
22
using SubRenamer.Core;
3+
using SubRenamer.Helper;
34

45
namespace SubRenamer.Tests.MatcherTests;
56

@@ -26,7 +27,8 @@ private static IEnumerable<TestCaseData> TestData
2627
[Test, TestCaseSource(nameof(TestData))]
2728
public void TestCasesFromJson(string name, List<MatchItem> input, List<MatchItem> expected)
2829
{
29-
var actual = Matcher.Execute(input);
30+
var normalizer = new MatcherFilenameNormalizer();
31+
var actual = Matcher.Execute(normalizer.Normalize(input));
3032

3133
var jsonOpts = new JsonSerializerOptions { WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping };
3234
TestContext.Progress.WriteLine("{1}\n\n \ud83c\udf1f Matcher Test Case: {0}\n\n{1}", name, new string('=', 50));
@@ -37,6 +39,7 @@ public void TestCasesFromJson(string name, List<MatchItem> input, List<MatchItem
3739
TestContext.Progress.WriteLine("{2}\n {0}\n{2}\n{1}", "Actual", JsonSerializer.Serialize(actual, jsonOpts),
3840
new string('-', 50));
3941

40-
Assert.That(actual, Is.EqualTo(expected));
42+
Assert.That(normalizer.Denormalize(actual), Is.EqualTo(expected));
43+
normalizer.Clear();
4144
}
4245
}

SubRenamer.Tests/MatcherTests/TopLevelTests.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,5 +145,37 @@
145145
{"Key": "1", "Video": "视频 1 xyz.mov", "Subtitle": "字幕 1xyz.srt"},
146146
{"Key": "77", "Video": "视频 77 test xyz.mov", "Subtitle": "字幕 77test xyz.srt"}
147147
]
148+
},
149+
{
150+
"Name": "Nのために (Japanese, Unicode NFKD Test#1)",
151+
"Input": [
152+
{"Key": "", "Video": "Nのために EP01 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
153+
{"Key": "", "Video": "Nのために EP02 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
154+
{"Key": "", "Video": "Nのために EP10 End 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
155+
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第01話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
156+
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第02話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
157+
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第10話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC][最終話].tc.srt"}
158+
],
159+
"Output": [
160+
{"Key": "1", "Video": "Nのために EP01 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第01話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
161+
{"Key": "2", "Video": "Nのために EP02 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第02話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
162+
{"Key": "10", "Video": "Nのために EP10 End 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第10話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC][最終話].tc.srt"}
163+
]
164+
},
165+
{
166+
"Name": "機動警察 (Japanese, Unicode NFKD Test#2)",
167+
"Input": [
168+
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #1 (BD HEVC 1440x1080 FLAC)[9F318F24].mkv", "Subtitle": ""},
169+
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #2 (BD HEVC 1440x1080 FLAC)[7F3281F4].mkv", "Subtitle": ""},
170+
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #10 (BD HEVC 1440x1080 FLAC)[8F01DF54].mkv", "Subtitle": ""},
171+
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 01.ass"},
172+
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 02.ass"},
173+
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 10.ass"}
174+
],
175+
"Output": [
176+
{"Key": "1", "Video": "[AI-Raws] 机动警察パトレイバー #1 (BD HEVC 1440x1080 FLAC)[9F318F24].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 01.ass"},
177+
{"Key": "2", "Video": "[AI-Raws] 机动警察パトレイバー #2 (BD HEVC 1440x1080 FLAC)[7F3281F4].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 02.ass"},
178+
{"Key": "10", "Video": "[AI-Raws] 机动警察パトレイバー #10 (BD HEVC 1440x1080 FLAC)[8F01DF54].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 10.ass"}
179+
]
148180
}
149181
]
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
using System.Collections.Generic;
2+
using System.Text;
3+
4+
namespace SubRenamer.Helper;
5+
6+
/// <summary>
7+
/// Handles normalization and denormalization of filenames in MatchItems to ensure consistent Unicode handling.
8+
/// Uses NormalizationForm.FormKC for compatibility normalization with composition.
9+
/// </summary>
10+
public class MatcherFilenameNormalizer
11+
{
12+
private readonly Dictionary<string, string> _normalizedToRawVideos = new();
13+
private readonly Dictionary<string, string> _normalizedToRawSubtitles = new();
14+
15+
/// <summary>
16+
/// Normalizes the filenames in a list of MatchItems using NormalizationForm.FormKC.
17+
/// </summary>
18+
/// <param name="matchItems">The list of MatchItems to normalize.</param>
19+
/// <returns>A new list of MatchItems with normalized filenames.</returns>
20+
public List<Core.MatchItem> Normalize(IReadOnlyList<Core.MatchItem> matchItems)
21+
{
22+
if (matchItems.Count == 0) return [];
23+
24+
var result = new List<Core.MatchItem>(matchItems.Count);
25+
foreach (var item in matchItems)
26+
{
27+
var normalizedVideo = item.Video.Normalize(NormalizationForm.FormKC);
28+
var normalizedSubtitle = item.Subtitle.Normalize(NormalizationForm.FormKC);
29+
30+
if (!string.IsNullOrEmpty(item.Video))
31+
_normalizedToRawVideos[normalizedVideo] = item.Video;
32+
if (!string.IsNullOrEmpty(item.Subtitle))
33+
_normalizedToRawSubtitles[normalizedSubtitle] = item.Subtitle;
34+
35+
result.Add(new Core.MatchItem(item.Key, normalizedVideo, normalizedSubtitle));
36+
}
37+
38+
return result;
39+
}
40+
41+
/// <summary>
42+
/// Denormalizes the filenames in a list of MatchItems back to their original form.
43+
/// </summary>
44+
/// <param name="matchItems">The list of MatchItems to denormalize.</param>
45+
/// <returns>A new list of MatchItems with original filenames.</returns>
46+
/// <exception cref="KeyNotFoundException">Thrown when a normalized filename cannot be mapped back to its original form.</exception>
47+
public List<Core.MatchItem> Denormalize(IReadOnlyList<Core.MatchItem> matchItems)
48+
{
49+
if (matchItems.Count == 0) return [];
50+
51+
var result = new List<Core.MatchItem>(matchItems.Count);
52+
foreach (var item in matchItems)
53+
{
54+
var originalVideo = !string.IsNullOrEmpty(item.Video) ? _normalizedToRawVideos[item.Video] : string.Empty;
55+
var originalSubtitle = !string.IsNullOrEmpty(item.Subtitle)
56+
? _normalizedToRawSubtitles[item.Subtitle]
57+
: string.Empty;
58+
result.Add(new Core.MatchItem(item.Key, originalVideo, originalSubtitle));
59+
}
60+
61+
return result;
62+
}
63+
64+
/// <summary>
65+
/// Clears the internal mapping dictionaries.
66+
/// </summary>
67+
public void Clear()
68+
{
69+
_normalizedToRawVideos.Clear();
70+
_normalizedToRawSubtitles.Clear();
71+
}
72+
}

SubRenamer/ViewModels/MainViewModel.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,19 @@ partial void OnSubSyncEnabledChanged(bool value)
174174
[RelayCommand]
175175
private void PerformMatch()
176176
{
177+
var filenameNormalizer = new MatcherFilenameNormalizer();
177178
ShowRenameTasks = false;
178179
var inputItems = MatcherDataConverter.ConvertMatchItems(MatchList);
180+
inputItems = filenameNormalizer.Normalize(inputItems);
179181
var m = Config.Get().MatchMode;
180182
var resultRaw = Matcher.Execute(inputItems, new MatcherOptions()
181183
{
182184
// Convert Config to MatcherOptions
183185
VideoRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualVideoRegex : Config.Get().VideoRegex) : null,
184186
SubtitleRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualSubtitle : Config.Get().SubtitleRegex) : null,
185187
});
188+
resultRaw = filenameNormalizer.Denormalize(resultRaw);
189+
filenameNormalizer.Clear();
186190
var result = MatcherDataConverter.ConvertMatchItems(resultRaw);
187191
result.ForEach(UpdateMatchItemStatus);
188192
MatchList = new ObservableCollection<MatchItem>(result);

0 commit comments

Comments
 (0)