From a94672dfd0514906df8cd84e1c099b2167504dc1 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 16 Aug 2024 09:13:20 +0700 Subject: [PATCH 01/10] attempt to integrate WeCantSpell into liblcm, there's currently issues with modifying the word list at runtime. --- Directory.Build.props | 2 +- src/SIL.LCModel.Core/SIL.LCModel.Core.csproj | 1 + .../SpellChecking/SpellEngine.cs | 25 ++++++--- .../SpellChecking/SpellEngineLinux.cs | 16 ------ .../SpellChecking/SpellEngineWeCantSpell.cs | 54 +++++++++++++++++++ .../SpellChecking/SpellEngineWindows.cs | 16 ------ .../SpellChecking/SpellingHelper.cs | 11 ++++ 7 files changed, 86 insertions(+), 39 deletions(-) create mode 100644 src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs diff --git a/Directory.Build.props b/Directory.Build.props index 6d8c5c73..f7f9fdca 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,7 +15,7 @@ false $(MSBuildThisFileDirectory)/artifacts true - NU1605;CS8002 + NU1605 $(MSBuildThisFileDirectory)/liblcm.snk true snupkg diff --git a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj index df9b1c1a..0b7a90a5 100644 --- a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj +++ b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj @@ -22,6 +22,7 @@ SIL.LCModel.Core provides a base library with core functionality. + diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs index 81bffaf9..98cbae46 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs @@ -26,10 +26,12 @@ internal static SpellEngine Create(string affixPath, string dictPath, string exc SpellEngine spellEngine = null; try { - if (Platform.IsWindows) - spellEngine = CreateSpellEngineWindows(affixPath, dictPath, exceptionPath); - else - spellEngine = CreateSpellEngineLinux(affixPath, dictPath, exceptionPath); + if (SpellingHelper.UseWeCantSpell) + { + spellEngine = new SpellEngineWeCantSpell(affixPath, dictPath, exceptionPath); + } else { + spellEngine = Platform.IsWindows ? CreateSpellEngineWindows(affixPath, dictPath, exceptionPath) : CreateSpellEngineLinux(affixPath, dictPath, exceptionPath); + } spellEngine.Initialize(); } @@ -87,9 +89,20 @@ private void Initialize() /// public abstract bool Check(string word); + private bool _isVernacular; + private bool _gotIsVernacular; + public bool IsVernacular + { + get + { + if (_gotIsVernacular) + return _isVernacular; - /// - public abstract bool IsVernacular { get; } + _isVernacular = Check(SpellingHelper.PrototypeWord); + _gotIsVernacular = true; + return _isVernacular; + } + } /// public abstract ICollection Suggest(string badWord); diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs index 65f09892..811970da 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs @@ -13,8 +13,6 @@ namespace SIL.LCModel.Core.SpellChecking internal sealed class SpellEngineLinux: SpellEngine { private IntPtr _hunspellHandle; - private bool _isVernacular; - private bool _gotIsVernacular; internal SpellEngineLinux(string affixPath, string dictPath, string exceptionPath) : base(exceptionPath) @@ -218,20 +216,6 @@ public override ICollection Suggest(string badWord) Hunspell_free_list(_hunspellHandle, ref pointerToAddressStringArray, resultCount); return results; } - - /// - public override bool IsVernacular - { - get - { - if (_gotIsVernacular) - return _isVernacular; - - _isVernacular = Check(SpellingHelper.PrototypeWord); - _gotIsVernacular = true; - return _isVernacular; - } - } } /// diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs new file mode 100644 index 00000000..6c0c4493 --- /dev/null +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using WeCantSpell.Hunspell; + +namespace SIL.LCModel.Core.SpellChecking +{ + internal class SpellEngineWeCantSpell: SpellEngine + { + private readonly WordList _wordList; + private readonly WordList.Builder _customWordsBuilder; + private WordList _customWordList; + private readonly HashSet _badWords = new HashSet(); + + public SpellEngineWeCantSpell(string affixPath, string dictPath, string exceptionPath) : base(exceptionPath) + { + _wordList = WordList.CreateFromFiles(dictPath, affixPath); + _customWordsBuilder = new WordList.Builder(_wordList.Affix); + _customWordList = _customWordsBuilder.ToImmutable(); + } + + public override bool Check(string word) + { + if (_badWords.Contains(word)) return false; + if (_customWordList.Check(word)) return true; + return _wordList.Check(word); + } + + public override ICollection Suggest(string badWord) + { + var suggestions = _wordList.Suggest(badWord).Union(_customWordList.Suggest(badWord)); + return suggestions.Where(suggestion => !_badWords.Contains(suggestion)).ToArray(); + } + + protected override void SetStatusInternal(string word1, bool isCorrect) + { + // WeCantSpell does not support modifying the word list, so we have to use 2 and merge them. + if (isCorrect) + { + var detail = IsVernacular + ? new WordEntryDetail(FlagSet.Empty, + MorphSet.Create(new []{SpellingHelper.PrototypeWord}), + WordEntryOptions.None) + : WordEntryDetail.Default; + _customWordsBuilder.Add(word1, detail); + _customWordList = _customWordsBuilder.ToImmutable(); + } + else + { + _badWords.Add(word1); + } + } + } +} \ No newline at end of file diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs index 7c04ba03..67c91d49 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs @@ -22,8 +22,6 @@ internal class NoLinuxRepack : System.Attribute internal sealed class SpellEngineWindows: SpellEngine { private readonly Hunspell _hunspellHandle; - private bool _isVernacular; - private bool _gotIsVernacular; internal SpellEngineWindows(string affixPath, string dictPath, string exceptionPath) : base(exceptionPath) @@ -87,20 +85,6 @@ public override ICollection Suggest(string badWord) return _hunspellHandle.Suggest(MarshallAsUtf8Bytes(badWord)); } - /// - public override bool IsVernacular - { - get - { - if (_gotIsVernacular) - return _isVernacular; - - _isVernacular = Check(MarshallAsUtf8Bytes(SpellingHelper.PrototypeWord)); - _gotIsVernacular = true; - return _isVernacular; - } - } - /// /// We can't declare these arguments (char * in C++) as [MarshalAs(UnmanagedType.LPStr)] string, because that /// unconditionally coverts the string to bytes using the current system code page, which is never what we want. diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs index 77cd3209..d82b7214 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs @@ -22,6 +22,17 @@ namespace SIL.LCModel.Core.SpellChecking /// public static class SpellingHelper { + /// + /// FieldWorks uses NHunspell for spell checking, but NHunspell is not available on Linux. + /// Use this flag to switch between NHunspell and WeCantSpell as needed. On dotnet framework we use NHunspell by default. + /// On dotnet core we use WeCantSpell by default. + /// + public static bool UseWeCantSpell { get; set; } = + #if NETFRAMEWORK + false; + #else + true; + #endif // A helper object used to ensure that the spelling engines are properly disposed of private sealed class SingletonToDispose : IDisposable { From dd291482d02b45d409af385547b4556d6425962c Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Wed, 5 Mar 2025 15:51:26 +0700 Subject: [PATCH 02/10] upgrade to WeCantSpell 6 which supports Adding words at runtime, fix some bugs around normalization and the prototype words --- src/SIL.LCModel.Core/SIL.LCModel.Core.csproj | 2 +- .../SpellChecking/SpellEngineWeCantSpell.cs | 22 ++++++------------- .../SpellChecking/SpellingHelper.cs | 2 +- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj index 619c7dda..15536733 100644 --- a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj +++ b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj @@ -22,7 +22,7 @@ SIL.LCModel.Core provides a base library with core functionality. - + diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs index 6c0c4493..12d31007 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Icu; using WeCantSpell.Hunspell; namespace SIL.LCModel.Core.SpellChecking @@ -8,46 +9,37 @@ namespace SIL.LCModel.Core.SpellChecking internal class SpellEngineWeCantSpell: SpellEngine { private readonly WordList _wordList; - private readonly WordList.Builder _customWordsBuilder; - private WordList _customWordList; - private readonly HashSet _badWords = new HashSet(); public SpellEngineWeCantSpell(string affixPath, string dictPath, string exceptionPath) : base(exceptionPath) { _wordList = WordList.CreateFromFiles(dictPath, affixPath); - _customWordsBuilder = new WordList.Builder(_wordList.Affix); - _customWordList = _customWordsBuilder.ToImmutable(); } public override bool Check(string word) { - if (_badWords.Contains(word)) return false; - if (_customWordList.Check(word)) return true; - return _wordList.Check(word); + return _wordList.Check(Normalizer.Normalize(word, Normalizer.UNormalizationMode.UNORM_NFC)); } public override ICollection Suggest(string badWord) { - var suggestions = _wordList.Suggest(badWord).Union(_customWordList.Suggest(badWord)); - return suggestions.Where(suggestion => !_badWords.Contains(suggestion)).ToArray(); + var result = _wordList.Suggest(badWord); + return result as ICollection ?? result.ToArray(); } protected override void SetStatusInternal(string word1, bool isCorrect) { - // WeCantSpell does not support modifying the word list, so we have to use 2 and merge them. if (isCorrect) { var detail = IsVernacular - ? new WordEntryDetail(FlagSet.Empty, + ? new WordEntryDetail(FlagSet.Create(new FlagValue(SpellingHelper.keepCaseFlag)), MorphSet.Create(new []{SpellingHelper.PrototypeWord}), WordEntryOptions.None) : WordEntryDetail.Default; - _customWordsBuilder.Add(word1, detail); - _customWordList = _customWordsBuilder.ToImmutable(); + _wordList.Add(word1, detail); } else { - _badWords.Add(word1); + _wordList.Remove(word1); } } } diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs index d82b7214..4e7e6fba 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs @@ -310,7 +310,7 @@ internal static void ClearAllDictionaries() /// to indicate that other words should be keep-case also. /// internal const string PrototypeWord = "XXPatternWordDoNotDeleteXX"; - private const string keepCaseFlag = "C"; + internal const char keepCaseFlag = 'C'; internal static void EnsureDictionary(string dictId) { From 77f880a0fe7f9dcc4cfdfb5c3b50f2db135e16ed Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Wed, 5 Mar 2025 16:09:15 +0700 Subject: [PATCH 03/10] remove the forced sdk version 6 as our tests need to run on dotnet 8 --- global.json | 1 - 1 file changed, 1 deletion(-) diff --git a/global.json b/global.json index 23036f2e..98076720 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,5 @@ { "sdk": { - "version": "6.0.0", "rollForward": "latestMajor" } } \ No newline at end of file From 668f0493618514dd70606a6722e0e54bd5d1a1bd Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Wed, 5 Mar 2025 16:17:09 +0700 Subject: [PATCH 04/10] don't use WeCantSpell when the runtime is Framework --- src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs index 4e7e6fba..fc5c6560 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs @@ -31,7 +31,7 @@ public static class SpellingHelper #if NETFRAMEWORK false; #else - true; + !Platform.IsDotNetFramework; #endif // A helper object used to ensure that the spelling engines are properly disposed of private sealed class SingletonToDispose : IDisposable From 892116d79b59604a2ebedcb8d05e4cbc33843bc4 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 6 Mar 2025 09:34:09 +0700 Subject: [PATCH 05/10] greatly simplify `IsVernacular` --- src/SIL.LCModel.Core/SIL.LCModel.Core.csproj | 1 + .../SpellChecking/SpellEngine.cs | 16 ++-------------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj index 15536733..52202d81 100644 --- a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj +++ b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj @@ -5,6 +5,7 @@ SIL.LCModel.Core The liblcm library is the core FieldWorks model for linguistic analyses of languages. Tools in this library provide the ability to store and interact with language and culture data, including anthropological, text corpus, and linguistics data. SIL.LCModel.Core provides a base library with core functionality. + latest diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs index 98cbae46..bc9cfbea 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs @@ -89,20 +89,8 @@ private void Initialize() /// public abstract bool Check(string word); - private bool _isVernacular; - private bool _gotIsVernacular; - public bool IsVernacular - { - get - { - if (_gotIsVernacular) - return _isVernacular; - - _isVernacular = Check(SpellingHelper.PrototypeWord); - _gotIsVernacular = true; - return _isVernacular; - } - } + private bool? _isVernacular; + public bool IsVernacular => _isVernacular ??= Check(SpellingHelper.PrototypeWord); /// public abstract ICollection Suggest(string badWord); From d05c78c6b986ac212d515058ac322ef8736379d5 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 10 Mar 2025 13:45:54 +0700 Subject: [PATCH 06/10] disable assembly signing which causes lcm to require strong named dependencies --- Directory.Build.props | 6 ++---- src/CSTools/Tools/Tools.csproj | 1 - src/CSTools/lg/lg.csproj | 1 - src/CSTools/pg/pg.csproj | 1 - src/SIL.LCModel.Core/Properties/AssemblyInfo.cs | 2 +- .../WritingSystems/CoreGlobalWritingSystemRepository.cs | 2 +- src/SIL.LCModel.Utils/Properties/AssemblyInfo.cs | 2 +- src/SIL.LCModel/Properties/AssemblyInfo.cs | 2 +- tests/SIL.LCModel.Core.Tests/App.config | 2 +- tests/SIL.LCModel.Tests/App.config | 2 +- 10 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index d9a2027f..71910208 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,10 +15,8 @@ $(MSBuildThisFileDirectory)artifacts/$(Configuration)/$(TargetFramework) false $(MSBuildThisFileDirectory)/artifacts - true - NU1605 - $(MSBuildThisFileDirectory)/liblcm.snk - true + NU1605;CS8002 + true snupkg true true diff --git a/src/CSTools/Tools/Tools.csproj b/src/CSTools/Tools/Tools.csproj index df4f2dda..e8768b5a 100644 --- a/src/CSTools/Tools/Tools.csproj +++ b/src/CSTools/Tools/Tools.csproj @@ -7,7 +7,6 @@ SIL LCModel Lexer/Parser Tools ../../../artifacts ../../../CHANGELOG.md - ../../../liblcm.snk TRACE;GENTIME 1701;1702;0162 diff --git a/src/CSTools/lg/lg.csproj b/src/CSTools/lg/lg.csproj index 02e56f08..f3a07832 100644 --- a/src/CSTools/lg/lg.csproj +++ b/src/CSTools/lg/lg.csproj @@ -5,7 +5,6 @@ SIL.LCModel.Tools Lexer Generator ../../../artifacts - ../../../liblcm.snk false Exe 1701;1702;0162 diff --git a/src/CSTools/pg/pg.csproj b/src/CSTools/pg/pg.csproj index 153d17f8..40b97b35 100644 --- a/src/CSTools/pg/pg.csproj +++ b/src/CSTools/pg/pg.csproj @@ -5,7 +5,6 @@ SIL.LCModel.Tools Parser Generator ../../../artifacts - ../../../liblcm.snk false Exe 1701;1702;0162 diff --git a/src/SIL.LCModel.Core/Properties/AssemblyInfo.cs b/src/SIL.LCModel.Core/Properties/AssemblyInfo.cs index 43600104..08853646 100644 --- a/src/SIL.LCModel.Core/Properties/AssemblyInfo.cs +++ b/src/SIL.LCModel.Core/Properties/AssemblyInfo.cs @@ -5,4 +5,4 @@ using System.Reflection; using System.Runtime.CompilerServices; -[assembly: InternalsVisibleTo("SIL.LCModel.Core.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100b511304f05af0a01cbc5408cdbdf742aa1664db0e1157235bb2619e7fb5e705bd3534a7157a088a458ec3136e46ebd2b73519fb07dffd2daa40a7b9aa340675d926ab918d2e0183b8613320529b8a490028c8e1b40b980f3724928455d447d8f93d459be3c55a4e3f2ef5119c3393fd25adba301cbff8a3ffbce2e181d143788")] +[assembly: InternalsVisibleTo("SIL.LCModel.Core.Tests")] diff --git a/src/SIL.LCModel.Core/WritingSystems/CoreGlobalWritingSystemRepository.cs b/src/SIL.LCModel.Core/WritingSystems/CoreGlobalWritingSystemRepository.cs index 27d32121..3bbe6774 100644 --- a/src/SIL.LCModel.Core/WritingSystems/CoreGlobalWritingSystemRepository.cs +++ b/src/SIL.LCModel.Core/WritingSystems/CoreGlobalWritingSystemRepository.cs @@ -1,7 +1,7 @@ using System.Runtime.CompilerServices; using SIL.WritingSystems; -[assembly: InternalsVisibleTo("SIL.LCModel.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100b511304f05af0a01cbc5408cdbdf742aa1664db0e1157235bb2619e7fb5e705bd3534a7157a088a458ec3136e46ebd2b73519fb07dffd2daa40a7b9aa340675d926ab918d2e0183b8613320529b8a490028c8e1b40b980f3724928455d447d8f93d459be3c55a4e3f2ef5119c3393fd25adba301cbff8a3ffbce2e181d143788")] +[assembly: InternalsVisibleTo("SIL.LCModel.Tests")] namespace SIL.LCModel.Core.WritingSystems { /// diff --git a/src/SIL.LCModel.Utils/Properties/AssemblyInfo.cs b/src/SIL.LCModel.Utils/Properties/AssemblyInfo.cs index 96a0917e..7233aea1 100644 --- a/src/SIL.LCModel.Utils/Properties/AssemblyInfo.cs +++ b/src/SIL.LCModel.Utils/Properties/AssemblyInfo.cs @@ -8,4 +8,4 @@ [assembly: ComVisible(false)] -[assembly: InternalsVisibleTo("SIL.LCModel.Utils.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100b511304f05af0a01cbc5408cdbdf742aa1664db0e1157235bb2619e7fb5e705bd3534a7157a088a458ec3136e46ebd2b73519fb07dffd2daa40a7b9aa340675d926ab918d2e0183b8613320529b8a490028c8e1b40b980f3724928455d447d8f93d459be3c55a4e3f2ef5119c3393fd25adba301cbff8a3ffbce2e181d143788")] +[assembly: InternalsVisibleTo("SIL.LCModel.Utils.Tests")] diff --git a/src/SIL.LCModel/Properties/AssemblyInfo.cs b/src/SIL.LCModel/Properties/AssemblyInfo.cs index febdbc39..ce1d5d82 100644 --- a/src/SIL.LCModel/Properties/AssemblyInfo.cs +++ b/src/SIL.LCModel/Properties/AssemblyInfo.cs @@ -8,4 +8,4 @@ //[assembly: AssemblyTitle("SIL.LCModel")] -[assembly: InternalsVisibleTo("SIL.LCModel.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100b511304f05af0a01cbc5408cdbdf742aa1664db0e1157235bb2619e7fb5e705bd3534a7157a088a458ec3136e46ebd2b73519fb07dffd2daa40a7b9aa340675d926ab918d2e0183b8613320529b8a490028c8e1b40b980f3724928455d447d8f93d459be3c55a4e3f2ef5119c3393fd25adba301cbff8a3ffbce2e181d143788")] +[assembly: InternalsVisibleTo("SIL.LCModel.Tests")] diff --git a/tests/SIL.LCModel.Core.Tests/App.config b/tests/SIL.LCModel.Core.Tests/App.config index f48805fc..0c0c619a 100644 --- a/tests/SIL.LCModel.Core.Tests/App.config +++ b/tests/SIL.LCModel.Core.Tests/App.config @@ -4,7 +4,7 @@ - + diff --git a/tests/SIL.LCModel.Tests/App.config b/tests/SIL.LCModel.Tests/App.config index f48805fc..0c0c619a 100644 --- a/tests/SIL.LCModel.Tests/App.config +++ b/tests/SIL.LCModel.Tests/App.config @@ -4,7 +4,7 @@ - + From 3aff872ec9b91683f065437e44eed7a97b50e3b7 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 10 Mar 2025 13:48:43 +0700 Subject: [PATCH 07/10] remove NHunspell --- src/SIL.LCModel.Core/SIL.LCModel.Core.csproj | 6 - .../SpellChecking/SpellEngine.cs | 22 +- .../SpellChecking/SpellEngineLinux.cs | 658 ------------------ .../SpellChecking/SpellEngineWindows.cs | 103 --- .../SpellChecking/SpellingHelper.cs | 11 - .../SpellChecking/SpellingHelperTests.cs | 13 - 6 files changed, 1 insertion(+), 812 deletions(-) delete mode 100644 src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs delete mode 100644 src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs diff --git a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj index 52202d81..759b9751 100644 --- a/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj +++ b/src/SIL.LCModel.Core/SIL.LCModel.Core.csproj @@ -16,7 +16,6 @@ SIL.LCModel.Core provides a base library with core functionality. - @@ -82,11 +81,6 @@ SIL.LCModel.Core provides a base library with core functionality. - - - - - obj/x86/$(Configuration)/ dotnet build diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs index bc9cfbea..236898d0 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs @@ -26,13 +26,7 @@ internal static SpellEngine Create(string affixPath, string dictPath, string exc SpellEngine spellEngine = null; try { - if (SpellingHelper.UseWeCantSpell) - { - spellEngine = new SpellEngineWeCantSpell(affixPath, dictPath, exceptionPath); - } else { - spellEngine = Platform.IsWindows ? CreateSpellEngineWindows(affixPath, dictPath, exceptionPath) : CreateSpellEngineLinux(affixPath, dictPath, exceptionPath); - } - + spellEngine = new SpellEngineWeCantSpell(affixPath, dictPath, exceptionPath); spellEngine.Initialize(); } catch (Exception e) @@ -45,20 +39,6 @@ internal static SpellEngine Create(string affixPath, string dictPath, string exc return spellEngine; } - private static SpellEngine CreateSpellEngineWindows(string affixPath, string dictPath, - string exceptionPath) - { - // Separate method so that we don't try to instantiate the class when running on Linux - return new SpellEngineWindows(affixPath, dictPath, exceptionPath); - } - - private static SpellEngine CreateSpellEngineLinux(string affixPath, string dictPath, - string exceptionPath) - { - // Separate method so that we don't try to instantiate the class when running on Windows - return new SpellEngineLinux(affixPath, dictPath, exceptionPath); - } - internal SpellEngine(string exceptionPath) { ExceptionPath = exceptionPath; diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs deleted file mode 100644 index 811970da..00000000 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineLinux.cs +++ /dev/null @@ -1,658 +0,0 @@ -// // Copyright (c) 2018 SIL International -// // This software is licensed under the MIT License (http://opensource.org/licenses/MIT) - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Runtime.InteropServices; -using System.Text; -using Icu; - -namespace SIL.LCModel.Core.SpellChecking -{ - internal sealed class SpellEngineLinux: SpellEngine - { - private IntPtr _hunspellHandle; - - internal SpellEngineLinux(string affixPath, string dictPath, string exceptionPath) - : base(exceptionPath) - { - _hunspellHandle = Hunspell_initialize(MarshallAsUtf8Bytes(affixPath), MarshallAsUtf8Bytes(dictPath)); - } - - #region Disposable - protected override void Dispose(bool disposing) - { - Debug.WriteLineIf(!disposing, "****** Missing Dispose() call for " + GetType().Name + ". ****** "); - if (_hunspellHandle != IntPtr.Zero) - { - Hunspell_uninitialize(_hunspellHandle); - _hunspellHandle = IntPtr.Zero; - } - - base.Dispose(disposing); - } - #endregion - - /// - /// We can't declare these arguments (char * in C++) as [MarshalAs(UnmanagedType.LPStr)] string, because that - /// unconditionally coverts the string to bytes using the current system code page, which is never what we want. - /// So we declare them as byte[] and marshal like this. The C++ code requires null termination so add a null - /// before converting. (This doesn't seem to be necessary, but better safe than sorry.) - /// - /// - /// - private static byte[] MarshallAsUtf8Bytes(string word) - { - return Encoding.UTF8.GetBytes(Normalizer.Normalize(word, Normalizer.UNormalizationMode.UNORM_NFC) + "\0"); - } - - // This method transforms an array of unmanaged character pointers (pointed to by pUnmanagedStringArray) - // into an array of managed strings. - // Adapted with thanks from http://limbioliong.wordpress.com/2011/08/14/returning-an-array-of-strings-from-c-to-c-part-1/ - private static string[] MarshalUnmanagedStrArray2ManagedStrArray(IntPtr pUnmanagedStringArray, int stringCount) - { - var pIntPtrArray = new IntPtr[stringCount]; - var managedStringArray = new string[stringCount]; - - Marshal.Copy(pUnmanagedStringArray, pIntPtrArray, 0, stringCount); - - for (var i = 0; i < stringCount; i++) - { - var data = new List(); - var ptr = pIntPtrArray[i]; - var offset = 0; - while (true) - { - var ch = Marshal.ReadByte(ptr, offset++); - if (ch == 0) - break; - - data.Add(ch); - } - managedStringArray[i] = Encoding.UTF8.GetString(data.ToArray()); - } - return managedStringArray; - } - - #region Methods to access LibHunspell - private const int RTLD_NOW = 2; - - [DllImport("libdl.so", SetLastError = true)] - private static extern IntPtr dlopen([MarshalAs(UnmanagedType.LPTStr)] string file, int mode); - - [DllImport("libdl.so", SetLastError = true)] - private static extern int dlclose(IntPtr handle); - - private ILibHunspell nativeLibrary; - - private ILibHunspell Library - { - get - { - if (nativeLibrary != null) - return nativeLibrary; - - // Try dlopen'ing libhunspell .so files until we find one. - - var hunspellHandle = dlopen(LibHunspell170.LibraryFilename, RTLD_NOW); - if (hunspellHandle != IntPtr.Zero) - { - dlclose(hunspellHandle); - nativeLibrary = new LibHunspell170(); - return nativeLibrary; - } - - hunspellHandle = dlopen(LibHunspell160.LibraryFilename, RTLD_NOW); - if (hunspellHandle != IntPtr.Zero) - { - dlclose(hunspellHandle); - nativeLibrary = new LibHunspell160(); - return nativeLibrary; - } - - hunspellHandle = dlopen(LibHunspell130.LibraryFilename, RTLD_NOW); - if (hunspellHandle != IntPtr.Zero) - { - dlclose(hunspellHandle); - nativeLibrary = new LibHunspell130(); - return nativeLibrary; - } - - hunspellHandle = dlopen(LibHunspell.LibraryFilename, RTLD_NOW); - if (hunspellHandle != IntPtr.Zero) - { - dlclose(hunspellHandle); - nativeLibrary = new LibHunspell(); - return nativeLibrary; - } - - throw new Exception("Unable to find and load libhunspell."); - } - } - - private IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile) - { - return Library.Hunspell_initialize(affFile, dictFile); - } - - private void Hunspell_uninitialize(IntPtr handle) - { - Library.Hunspell_uninitialize(handle); - } - - private int Hunspell_spell(IntPtr handle, byte[] word) - { - return Library.Hunspell_spell(handle, word); - } - - private int Hunspell_add(IntPtr handle, byte[] word) - { - return Library.Hunspell_add(handle, word); - } - - private int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example) - { - return Library.Hunspell_add_with_affix(handle, word, example); - } - - private int Hunspell_remove(IntPtr handle, byte[] word) - { - return Library.Hunspell_remove(handle, word); - } - - private int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word) - { - return Library.Hunspell_suggest_unix(handle, out suggestions, word); - } - - private int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return Library.Hunspell_suggest_unix(handle, out suggestions, word); - } - - private void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count) - { - Library.Hunspell_free_list(handle, ref list, count); - } - #endregion - - public override bool Check(string word) - { - return Hunspell_spell(_hunspellHandle, MarshallAsUtf8Bytes(word)) != 0; - } - - protected override void SetStatusInternal(string word, bool isCorrect) - { - if (isCorrect) - { - if (IsVernacular) - { - // Custom vernacular-only dictionary. - // want it 'affixed' like the prototype, which has been marked to suppress other-case matches - Hunspell_add_with_affix(_hunspellHandle, MarshallAsUtf8Bytes(word), - MarshallAsUtf8Bytes(SpellingHelper.PrototypeWord)); - } - else - { - // not our custom dictionary, some majority language, we can't (and probably don't want) - // to be restrictive about case. - Hunspell_add(_hunspellHandle, MarshallAsUtf8Bytes(word)); - } - } - else - { - Hunspell_remove(_hunspellHandle, MarshallAsUtf8Bytes(word)); - } - } - - /// - public override ICollection Suggest(string badWord) - { - var resultCount = Hunspell_suggest(_hunspellHandle, MarshallAsUtf8Bytes(badWord), out var pointerToAddressStringArray); - if (pointerToAddressStringArray == IntPtr.Zero) - return new string[0]; - var results = MarshalUnmanagedStrArray2ManagedStrArray(pointerToAddressStringArray, resultCount); - Hunspell_free_list(_hunspellHandle, ref pointerToAddressStringArray, resultCount); - return results; - } - } - - /// - /// Interface to native methods in libhunspell libraries. - /// - interface ILibHunspell - { - IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile); - - void Hunspell_uninitialize(IntPtr handle); - - int Hunspell_spell(IntPtr handle, byte[] word); - - int Hunspell_add(IntPtr handle, byte[] word); - - int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example); - - int Hunspell_remove(IntPtr handle, byte[] word); - - int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word); - - int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions); - - void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count); - } - - #region LibHunspell libraries - /// - /// libhunspell in Ubuntu 14.04 and 16.04 - /// - internal class LibHunspell130 : ILibHunspell - { - public static string LibraryFilename - { - get - { - return NativeLibhunspell_1_3_0.LibHunspell; - } - } - - public IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile) - { - return NativeLibhunspell_1_3_0.Hunspell_initialize(affFile, dictFile); - } - - public void Hunspell_uninitialize(IntPtr handle) - { - NativeLibhunspell_1_3_0.Hunspell_uninitialize(handle); - } - - public int Hunspell_spell(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_3_0.Hunspell_spell(handle, word); - } - - public int Hunspell_add(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_3_0.Hunspell_add(handle, word); - } - - public int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example) - { - return NativeLibhunspell_1_3_0.Hunspell_add_with_affix(handle, word, example); - } - - public int Hunspell_remove(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_3_0.Hunspell_remove(handle, word); - } - - public int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word) - { - return NativeLibhunspell_1_3_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return NativeLibhunspell_1_3_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count) - { - NativeLibhunspell_1_3_0.Hunspell_free_list(handle, ref list, count); - } - } - - /// Hunspell functions in libhunspell 1.3.0 - internal class NativeLibhunspell_1_3_0 - { - public const string LibHunspell = "libhunspell-1.3.so.0"; - public const string LibHunspellPrefix = "Hunspell_"; - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "create", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "destroy", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_uninitialize(IntPtr handle); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "spell", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_spell(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add_with_affix", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "remove", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_remove(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "suggest", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word); - - public static int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return Hunspell_suggest_unix(handle, out suggestions, word); - } - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "free_list", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count); - } - - /// - /// libhunspell in Ubuntu 18.04 - /// - internal class LibHunspell160 : ILibHunspell - { - public static string LibraryFilename - { - get - { - return NativeLibhunspell_1_6_0.LibHunspell; - } - } - - public IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile) - { - return NativeLibhunspell_1_6_0.Hunspell_initialize(affFile, dictFile); - } - - public void Hunspell_uninitialize(IntPtr handle) - { - NativeLibhunspell_1_6_0.Hunspell_uninitialize(handle); - } - - public int Hunspell_spell(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_6_0.Hunspell_spell(handle, word); - } - - public int Hunspell_add(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_6_0.Hunspell_add(handle, word); - } - - public int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example) - { - return NativeLibhunspell_1_6_0.Hunspell_add_with_affix(handle, word, example); - } - - public int Hunspell_remove(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_6_0.Hunspell_remove(handle, word); - } - - public int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word) - { - return NativeLibhunspell_1_6_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return NativeLibhunspell_1_6_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count) - { - NativeLibhunspell_1_6_0.Hunspell_free_list(handle, ref list, count); - } - } - - /// Hunspell functions in libhunspell 1.6.0 - internal class NativeLibhunspell_1_6_0 - { - public const string LibHunspell = "libhunspell-1.6.so.0"; - public const string LibHunspellPrefix = "Hunspell_"; - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "create", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "destroy", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_uninitialize(IntPtr handle); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "spell", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_spell(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add_with_affix", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "remove", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_remove(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "suggest", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word); - - public static int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return Hunspell_suggest_unix(handle, out suggestions, word); - } - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "free_list", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count); - } - - /// - /// libhunspell in Ubuntu 20.04 - /// - internal class LibHunspell170 : ILibHunspell - { - public static string LibraryFilename - { - get - { - return NativeLibhunspell_1_7_0.LibHunspell; - } - } - - public IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile) - { - return NativeLibhunspell_1_7_0.Hunspell_initialize(affFile, dictFile); - } - - public void Hunspell_uninitialize(IntPtr handle) - { - NativeLibhunspell_1_7_0.Hunspell_uninitialize(handle); - } - - public int Hunspell_spell(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_7_0.Hunspell_spell(handle, word); - } - - public int Hunspell_add(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_7_0.Hunspell_add(handle, word); - } - - public int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example) - { - return NativeLibhunspell_1_7_0.Hunspell_add_with_affix(handle, word, example); - } - - public int Hunspell_remove(IntPtr handle, byte[] word) - { - return NativeLibhunspell_1_7_0.Hunspell_remove(handle, word); - } - - public int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word) - { - return NativeLibhunspell_1_7_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return NativeLibhunspell_1_7_0.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count) - { - NativeLibhunspell_1_7_0.Hunspell_free_list(handle, ref list, count); - } - } - - /// Hunspell functions in libhunspell 1.7.0 - internal class NativeLibhunspell_1_7_0 - { - public const string LibHunspell = "libhunspell-1.7.so.0"; - public const string LibHunspellPrefix = "Hunspell_"; - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "create", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "destroy", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_uninitialize(IntPtr handle); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "spell", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_spell(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add_with_affix", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "remove", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_remove(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "suggest", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word); - - public static int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return Hunspell_suggest_unix(handle, out suggestions, word); - } - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "free_list", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count); - } - - /// - /// libhunspell.so from libhunspell-dev package. - /// - internal class LibHunspell : ILibHunspell - { - public static string LibraryFilename - { - get - { - return NativeLibhunspellSo.LibHunspell; - } - } - - public IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile) - { - return NativeLibhunspellSo.Hunspell_initialize(affFile, dictFile); - } - - public void Hunspell_uninitialize(IntPtr handle) - { - NativeLibhunspellSo.Hunspell_uninitialize(handle); - } - - public int Hunspell_spell(IntPtr handle, byte[] word) - { - return NativeLibhunspellSo.Hunspell_spell(handle, word); - } - - public int Hunspell_add(IntPtr handle, byte[] word) - { - return NativeLibhunspellSo.Hunspell_add(handle, word); - } - - public int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example) - { - return NativeLibhunspellSo.Hunspell_add_with_affix(handle, word, example); - } - - public int Hunspell_remove(IntPtr handle, byte[] word) - { - return NativeLibhunspellSo.Hunspell_remove(handle, word); - } - - public int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word) - { - return NativeLibhunspellSo.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return NativeLibhunspellSo.Hunspell_suggest_unix(handle, out suggestions, word); - } - - public void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count) - { - NativeLibhunspellSo.Hunspell_free_list(handle, ref list, count); - } - } - - /// Hunspell functions in libhunspell.so - internal class NativeLibhunspellSo - { - public const string LibHunspell = "libhunspell.so"; - public const string LibHunspellPrefix = "Hunspell_"; - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "create", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern IntPtr Hunspell_initialize(byte[] affFile, byte[] dictFile); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "destroy", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_uninitialize(IntPtr handle); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "spell", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_spell(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "add_with_affix", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_add_with_affix(IntPtr handle, byte[] word, byte[] example); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "remove", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_remove(IntPtr handle, byte[] word); - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "suggest", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern int Hunspell_suggest_unix(IntPtr handle, out IntPtr suggestions, byte[] word); - - public static int Hunspell_suggest(IntPtr handle, byte[] word, out IntPtr suggestions) - { - return Hunspell_suggest_unix(handle, out suggestions, word); - } - - [DllImport(LibHunspell, EntryPoint = LibHunspellPrefix + "free_list", - CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Unicode)] - public static extern void Hunspell_free_list(IntPtr handle, ref IntPtr list, int count); - } - #endregion -} diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs deleted file mode 100644 index 67c91d49..00000000 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWindows.cs +++ /dev/null @@ -1,103 +0,0 @@ -// // Copyright (c) 2018 SIL International -// // This software is licensed under the MIT License (http://opensource.org/licenses/MIT) - -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Text; -using Icu; -using NHunspell; - -namespace SIL.LCModel.Core.SpellChecking -{ - /// ILRepacking SpellEngineWindows into a dll, and then loading that dll in Linux (such as in mono5-sil) - /// crashes, saying Msg: Could not load type of field - /// 'SIL.LCModel.Core.SpellChecking.SpellEngineWindows:_hunspellHandle' (0) due to: Could not load file or assembly - /// NHunspell. Instructing ILRepack to omit SpellEngineWindows (by attribute) prevents that crash. - internal class NoLinuxRepack : System.Attribute - { - } - - [NoLinuxRepack] - internal sealed class SpellEngineWindows: SpellEngine - { - private readonly Hunspell _hunspellHandle; - - internal SpellEngineWindows(string affixPath, string dictPath, string exceptionPath) - : base(exceptionPath) - { - try - { - _hunspellHandle = new Hunspell(affixPath, dictPath); - } - catch (Exception e) - { - Debug.WriteLine("Initializing Hunspell: {0} exception: {1} ", e.GetType(), e.Message); - _hunspellHandle?.Dispose(); - throw; - } - } - - #region Disposable - protected override void Dispose(bool disposing) - { - Debug.WriteLineIf(!disposing, "****** Missing Dispose() call for " + GetType().Name + ". ****** "); - if (disposing) - { - _hunspellHandle?.Dispose(); - } - - base.Dispose(disposing); - } - #endregion - - public override bool Check(string word) - { - return _hunspellHandle.Spell(MarshallAsUtf8Bytes(word)); - } - - protected override void SetStatusInternal(string word, bool isCorrect) - { - if (isCorrect) - { - if (IsVernacular) - { - // Custom vernacular-only dictionary. - // want it 'affixed' like the prototype, which has been marked to suppress other-case matches - _hunspellHandle.AddWithAffix(MarshallAsUtf8Bytes(word), MarshallAsUtf8Bytes(SpellingHelper.PrototypeWord)); - } - else - { - // not our custom dictionary, some majority language, we can't (and probably don't want) - // to be restrictive about case. - _hunspellHandle.Add(MarshallAsUtf8Bytes(word)); - } - } - else - { - _hunspellHandle.Remove(MarshallAsUtf8Bytes(word)); - } - } - - /// - public override ICollection Suggest(string badWord) - { - return _hunspellHandle.Suggest(MarshallAsUtf8Bytes(badWord)); - } - - /// - /// We can't declare these arguments (char * in C++) as [MarshalAs(UnmanagedType.LPStr)] string, because that - /// unconditionally coverts the string to bytes using the current system code page, which is never what we want. - /// So we declare them as byte[] and marshal like this. The C++ code requires null termination so add a null - /// before converting. (This doesn't seem to be necessary, but better safe than sorry.) - /// - /// - /// - private static string MarshallAsUtf8Bytes(string word) - { - var bytes = Encoding.UTF8.GetBytes(Normalizer.Normalize(word, - Normalizer.UNormalizationMode.UNORM_NFC) + "\0"); - return Encoding.UTF8.GetString(bytes); - } - } -} \ No newline at end of file diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs index fc5c6560..d41d14fb 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellingHelper.cs @@ -22,17 +22,6 @@ namespace SIL.LCModel.Core.SpellChecking /// public static class SpellingHelper { - /// - /// FieldWorks uses NHunspell for spell checking, but NHunspell is not available on Linux. - /// Use this flag to switch between NHunspell and WeCantSpell as needed. On dotnet framework we use NHunspell by default. - /// On dotnet core we use WeCantSpell by default. - /// - public static bool UseWeCantSpell { get; set; } = - #if NETFRAMEWORK - false; - #else - !Platform.IsDotNetFramework; - #endif // A helper object used to ensure that the spelling engines are properly disposed of private sealed class SingletonToDispose : IDisposable { diff --git a/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs b/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs index 296d7d49..c18e72bc 100644 --- a/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs +++ b/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs @@ -21,19 +21,6 @@ namespace SIL.LCModel.Core.SpellChecking [TestFixture] public class SpellingHelperTests { - // TODO-Linux: need slightly modified hunspell package installed! - - [OneTimeSetUp] - public void FixtureSetUp() - { - #if NET8_0 - if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - Assert.Ignore("NHunspell does not work on dotnet 8 on linux"); - } - #endif - } - /// /// Check how spelling status is set and cleared. /// From 045d5b249e65d7214e3f4948a00cc165d92e5d43 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 10 Mar 2025 14:17:56 +0700 Subject: [PATCH 08/10] removed unused sign key file --- liblcm.snk | Bin 596 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 liblcm.snk diff --git a/liblcm.snk b/liblcm.snk deleted file mode 100644 index e68d1e082924490ca4024053d96c102524c4e846..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 596 zcmV-a0;~N80ssI2Bme+XQ$aES1ONa50098C5im~$uL=Rn#XyYP-*hUWW=*i+6>>GZ zCK>1ZUT|B}Q%Z4HpopYc>@havZoMmWQJ=7V|I*r|3VWKPKxbW&YPlHF;21lG6EX!U zxTKH*jE);Xxq$O>Nhn2KM17Bw)LFhfRixwc?@<}UIX}``+oJ)?|B65R&Mp`o6gP-% z{DHlhSZw%TrnJ1ra;-Fq)Y8WS>OiD7qN}FQ)xbN7|3os>8udhMAAU?u)&1L@Ynf)P z<6S(PmcyhyAU&^X4Tb_Fq%D!qe3y`=<*x8VACQhrHOM0M%myIU1k1}96L`L{e!0HL zOvg+{o_F7(IgIyFKXoo8hgvRr=KaP2g1e2LtT86#V%uWF%WY_$radMFV{Fpo?U0`I zM-UrudZctU7sGfW$c8~asnA?kA5qZ5*k;No&9#^kq1B(US&@*Otybl&@3Wh37QgRp z6mn7-4QN?yX1JUR*r4TcNk=q=b02I@2&uY%A}_IDPVTq=ONRD{fw%9lz;s`E7`xFe zcOEbvk-r2bAgNJf=#RIEz@XY(Wy{cPyi`zh@HO(j>D$(bNDX;^;mG0=Ou(S=`t zyv^8k320TJw;i%`KdV>%ZYd&AP=XM+P+tiuQS$ezgfgOPIUd&kDh&xV%nf+6V5Q%E zOwJj=O)R<=a-quG1lz?Y?a#|+A)(bl;2Kko$)R?u_dqI8-eB5QAi|!_r;G;}YJNy1 iR8+0JO?a338APwd>{Z_410(q%Kw|d~;oi}iHup*5d>(lK From 37b052b7a5686b1ff722cbe0afe37ab6c9cb5bed Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Tue, 18 Mar 2025 10:10:16 +0700 Subject: [PATCH 09/10] fix some code style issues from review --- Directory.Build.props | 2 +- .../SpellChecking/SpellEngine.cs | 4 -- .../SpellChecking/SpellEngineWeCantSpell.cs | 61 +++++++++---------- .../SpellChecking/SpellingHelperTests.cs | 1 - 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 71910208..f8d122ff 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -15,7 +15,7 @@ $(MSBuildThisFileDirectory)artifacts/$(Configuration)/$(TargetFramework) false $(MSBuildThisFileDirectory)/artifacts - NU1605;CS8002 + NU1605;CS8002 true snupkg true diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs index 236898d0..04757e7d 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngine.cs @@ -8,7 +8,6 @@ using System.IO; using System.Text; using Icu; -using SIL.PlatformUtilities; namespace SIL.LCModel.Core.SpellChecking { @@ -66,16 +65,13 @@ private void Initialize() } } - /// public abstract bool Check(string word); private bool? _isVernacular; public bool IsVernacular => _isVernacular ??= Check(SpellingHelper.PrototypeWord); - /// public abstract ICollection Suggest(string badWord); - /// public void SetStatus(string word1, bool isCorrect) { var word = Normalizer.Normalize(word1, Normalizer.UNormalizationMode.UNORM_NFC); diff --git a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs index 12d31007..af8b1fe8 100644 --- a/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs +++ b/src/SIL.LCModel.Core/SpellChecking/SpellEngineWeCantSpell.cs @@ -1,46 +1,43 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; using Icu; using WeCantSpell.Hunspell; -namespace SIL.LCModel.Core.SpellChecking +namespace SIL.LCModel.Core.SpellChecking; + +internal class SpellEngineWeCantSpell(string affixPath, string dictPath, string exceptionPath) + : SpellEngine(exceptionPath) { - internal class SpellEngineWeCantSpell: SpellEngine - { - private readonly WordList _wordList; + private readonly WordList _wordList = WordList.CreateFromFiles(dictPath, affixPath); - public SpellEngineWeCantSpell(string affixPath, string dictPath, string exceptionPath) : base(exceptionPath) - { - _wordList = WordList.CreateFromFiles(dictPath, affixPath); - } + public override bool Check(string word) + { + var normalized = Normalizer.Normalize(word, Normalizer.UNormalizationMode.UNORM_NFC); + return _wordList.Check(normalized); + } - public override bool Check(string word) - { - return _wordList.Check(Normalizer.Normalize(word, Normalizer.UNormalizationMode.UNORM_NFC)); - } + public override ICollection Suggest(string badWord) + { + var result = _wordList.Suggest(badWord); + return result as ICollection ?? result.ToArray(); + } - public override ICollection Suggest(string badWord) + protected override void SetStatusInternal(string word1, bool isCorrect) + { + if (isCorrect) { - var result = _wordList.Suggest(badWord); - return result as ICollection ?? result.ToArray(); + var detail = IsVernacular + ? new WordEntryDetail( + FlagSet.Create(new FlagValue(SpellingHelper.keepCaseFlag)), + MorphSet.Create([SpellingHelper.PrototypeWord]), + WordEntryOptions.None + ) + : WordEntryDetail.Default; + _wordList.Add(word1, detail); } - - protected override void SetStatusInternal(string word1, bool isCorrect) + else { - if (isCorrect) - { - var detail = IsVernacular - ? new WordEntryDetail(FlagSet.Create(new FlagValue(SpellingHelper.keepCaseFlag)), - MorphSet.Create(new []{SpellingHelper.PrototypeWord}), - WordEntryOptions.None) - : WordEntryDetail.Default; - _wordList.Add(word1, detail); - } - else - { - _wordList.Remove(word1); - } + _wordList.Remove(word1); } } } \ No newline at end of file diff --git a/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs b/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs index c18e72bc..e9d91bb4 100644 --- a/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs +++ b/tests/SIL.LCModel.Core.Tests/SpellChecking/SpellingHelperTests.cs @@ -5,7 +5,6 @@ using System; using System.IO; using System.Linq; -using System.Runtime.InteropServices; using System.Text; using NUnit.Framework; using SIL.IO; From 10f58840a1070050df911aae35694da1bcd54a81 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 24 Mar 2025 10:37:39 +0700 Subject: [PATCH 10/10] fix editor config code style to include props files, convert tabs to spaces in Directory.Build.props --- .editorconfig | 2 +- Directory.Build.props | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.editorconfig b/.editorconfig index e547ef03..c82ab97b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,7 @@ indent_style = tab tab_width = 4 # Settings Visual Studio uses for generated files -[*.{csproj,resx,settings,vcxproj*,vdproj,xml,yml,config}] +[*.{csproj,resx,settings,vcxproj*,vdproj,xml,yml,config,props}] indent_style = space indent_size = 2 diff --git a/Directory.Build.props b/Directory.Build.props index f8d122ff..fd4b7bb9 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -11,12 +11,12 @@ https://github.com/sillsdev/liblcm false Any CPU - Debug + Debug $(MSBuildThisFileDirectory)artifacts/$(Configuration)/$(TargetFramework) false $(MSBuildThisFileDirectory)/artifacts NU1605;CS8002 - true + true snupkg true true