Skip to content
This repository was archived by the owner on Dec 5, 2024. It is now read-only.

Commit d777583

Browse files
committed
Minor update of libse
1 parent 926ebbc commit d777583

File tree

11 files changed

+948
-203
lines changed

11 files changed

+948
-203
lines changed

SubtitleEdit/Windows/Edit/SpellCheckController.cs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Text;
99
using System.Threading.Tasks;
1010
using Nikse.SubtitleEdit.UILogic;
11+
using Nikse.SubtitleEdit.Core.Dictionaries;
1112

1213
namespace Edit
1314
{
@@ -26,6 +27,7 @@ public partial class SpellCheckController : NSWindowController
2627
private List<SpellCheckWord> _words;
2728
private List<string> _skipAllList = new List<string>();
2829
private bool _abort = false;
30+
private List<string> _namesEtcList = new List<string>();
2931

3032
public SpellCheckController(IntPtr handle)
3133
: base(handle)
@@ -49,6 +51,12 @@ public override void AwakeFromNib()
4951
base.AwakeFromNib();
5052
}
5153

54+
void LoadNames()
55+
{
56+
var namesList = new NamesList(Configuration.DictionariesFolder, "en", Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
57+
_namesEtcList = namesList.GetAllNames();
58+
}
59+
5260
public void InitializeSpellCheck()
5361
{
5462
_spellChecker = new SpellChecker("en");
@@ -57,6 +65,7 @@ public void InitializeSpellCheck()
5765
_currentParagraphIndex = 0;
5866
_words = SpellChecker.Split(_subtitle.Paragraphs[0].Text);
5967
_wordsIndex = -1;
68+
LoadNames();
6069
PrepareNextWord();
6170
}
6271

@@ -67,13 +76,18 @@ public void InitializeSpellCheck()
6776

6877
public void SkipAll()
6978
{
70-
string word = _currentWord.ToLower().Trim();
79+
string wordOriginalCasing = _currentWord.Trim();
80+
string word = wordOriginalCasing.ToLower();
7181
_skipAllList.Add(word);
7282
_skipAllList.Add(word.ToUpper());
7383
if (word.Length > 0)
7484
{
7585
_skipAllList.Add(word.Substring(0, 1).ToUpper() + word.Substring(1));
7686
}
87+
if (!_skipAllList.Contains(wordOriginalCasing))
88+
{
89+
_skipAllList.Add(wordOriginalCasing);
90+
}
7791
PrepareNextWord();
7892
}
7993

@@ -138,6 +152,10 @@ private void PrepareNextWord()
138152
spelledOk = true;
139153
}
140154

155+
if (!spelledOk && _namesEtcList.Contains(_currentWord))
156+
{
157+
spelledOk = true;
158+
}
141159

142160
if (!spelledOk)
143161
{

libse/Interfaces/IDoSpell.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
namespace Nikse.SubtitleEdit.Core.Interfaces
2+
{
3+
public interface IDoSpell
4+
{
5+
bool DoSpell(string word);
6+
}
7+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace Nikse.SubtitleEdit.Core.Interfaces
2+
{
3+
public interface IRtfTextConverter
4+
{
5+
string RtfToText(string rtf);
6+
string TextToRtf(string text);
7+
}
8+
}

libse/LibSE.csproj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,13 @@
408408
<Compile Include="VobSub\VobSubMergedPack.cs" />
409409
<Compile Include="VobSub\VobSubPack.cs" />
410410
<Compile Include="VobSub\VobSubParser.cs" />
411+
<Compile Include="RichTextToPlainText.cs" />
412+
<Compile Include="Interfaces\IDoSpell.cs" />
413+
<Compile Include="Interfaces\IRtfTextConverter.cs" />
414+
<Compile Include="SubtitleFormats\F4Rtf.cs" />
415+
<Compile Include="SpellCheck\SpellCheckWord.cs" />
416+
<Compile Include="SpellCheck\SpellCheckWordLists.cs" />
417+
<Compile Include="SpellCheck\UndoObject.cs" />
411418
</ItemGroup>
412419
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
413420
<PropertyGroup>

libse/RichTextToPlainText.cs

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
using Nikse.SubtitleEdit.Core.Interfaces;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Text;
5+
using System.Text.RegularExpressions;
6+
7+
namespace Nikse.SubtitleEdit.Core
8+
{
9+
/// <summary>
10+
/// Rich Text to plain text
11+
/// </summary>
12+
/// <remarks>
13+
/// Translated from Python located at:
14+
/// http://stackoverflow.com/a/188877/448
15+
/// to C# by Chris Benard - http://chrisbenard.net/2014/08/20/Extract-Text-from-RTF-in-.Net
16+
/// </remarks>
17+
public static class RichTextToPlainText
18+
{
19+
20+
public static IRtfTextConverter NativeRtfTextConverter { get; set; }
21+
22+
private class StackEntry
23+
{
24+
public int NumberOfCharactersToSkip { get; private set; }
25+
public bool Ignorable { get; private set; }
26+
27+
public StackEntry(int numberOfCharactersToSkip, bool ignorable)
28+
{
29+
NumberOfCharactersToSkip = numberOfCharactersToSkip;
30+
Ignorable = ignorable;
31+
}
32+
}
33+
34+
private static readonly Regex RtfRegex = new Regex(@"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", RegexOptions.Singleline | RegexOptions.IgnoreCase);
35+
36+
private static readonly List<string> Destinations = new List<string>
37+
{
38+
"aftncn","aftnsep","aftnsepc","annotation","atnauthor","atndate","atnicn","atnid",
39+
"atnparent","atnref","atntime","atrfend","atrfstart","author","background",
40+
"bkmkend","bkmkstart","blipuid","buptim","category","colorschememapping",
41+
"colortbl","comment","company","creatim","datafield","datastore","defchp","defpap",
42+
"do","doccomm","docvar","dptxbxtext","ebcend","ebcstart","factoidname","falt",
43+
"fchars","ffdeftext","ffentrymcr","ffexitmcr","ffformat","ffhelptext","ffl",
44+
"ffname","ffstattext","field","file","filetbl","fldinst","fldrslt","fldtype",
45+
"fname","fontemb","fontfile","fonttbl","footer","footerf","footerl","footerr",
46+
"footnote","formfield","ftncn","ftnsep","ftnsepc","g","generator","gridtbl",
47+
"header","headerf","headerl","headerr","hl","hlfr","hlinkbase","hlloc","hlsrc",
48+
"hsv","htmltag","info","keycode","keywords","latentstyles","lchars","levelnumbers",
49+
"leveltext","lfolevel","linkval","list","listlevel","listname","listoverride",
50+
"listoverridetable","listpicture","liststylename","listtable","listtext",
51+
"lsdlockedexcept","macc","maccPr","mailmerge","maln","malnScr","manager","margPr",
52+
"mbar","mbarPr","mbaseJc","mbegChr","mborderBox","mborderBoxPr","mbox","mboxPr",
53+
"mchr","mcount","mctrlPr","md","mdeg","mdegHide","mden","mdiff","mdPr","me",
54+
"mendChr","meqArr","meqArrPr","mf","mfName","mfPr","mfunc","mfuncPr","mgroupChr",
55+
"mgroupChrPr","mgrow","mhideBot","mhideLeft","mhideRight","mhideTop","mhtmltag",
56+
"mlim","mlimloc","mlimlow","mlimlowPr","mlimupp","mlimuppPr","mm","mmaddfieldname",
57+
"mmath","mmathPict","mmathPr","mmaxdist","mmc","mmcJc","mmconnectstr",
58+
"mmconnectstrdata","mmcPr","mmcs","mmdatasource","mmheadersource","mmmailsubject",
59+
"mmodso","mmodsofilter","mmodsofldmpdata","mmodsomappedname","mmodsoname",
60+
"mmodsorecipdata","mmodsosort","mmodsosrc","mmodsotable","mmodsoudl",
61+
"mmodsoudldata","mmodsouniquetag","mmPr","mmquery","mmr","mnary","mnaryPr",
62+
"mnoBreak","mnum","mobjDist","moMath","moMathPara","moMathParaPr","mopEmu",
63+
"mphant","mphantPr","mplcHide","mpos","mr","mrad","mradPr","mrPr","msepChr",
64+
"mshow","mshp","msPre","msPrePr","msSub","msSubPr","msSubSup","msSubSupPr","msSup",
65+
"msSupPr","mstrikeBLTR","mstrikeH","mstrikeTLBR","mstrikeV","msub","msubHide",
66+
"msup","msupHide","mtransp","mtype","mvertJc","mvfmf","mvfml","mvtof","mvtol",
67+
"mzeroAsc","mzeroDesc","mzeroWid","nesttableprops","nextfile","nonesttables",
68+
"objalias","objclass","objdata","object","objname","objsect","objtime","oldcprops",
69+
"oldpprops","oldsprops","oldtprops","oleclsid","operator","panose","password",
70+
"passwordhash","pgp","pgptbl","picprop","pict","pn","pnseclvl","pntext","pntxta",
71+
"pntxtb","printim","private","propname","protend","protstart","protusertbl","pxe",
72+
"result","revtbl","revtim","rsidtbl","rxe","shp","shpgrp","shpinst",
73+
"shppict","shprslt","shptxt","sn","sp","staticval","stylesheet","subject","sv",
74+
"svb","tc","template","themedata","title","txe","ud","upr","userprops",
75+
"wgrffmtfilter","windowcaption","writereservation","writereservhash","xe","xform",
76+
"xmlattrname","xmlattrvalue","xmlclose","xmlname","xmlnstbl",
77+
"xmlopen"
78+
};
79+
80+
private static readonly Dictionary<string, string> SpecialCharacters = new Dictionary<string, string>
81+
{
82+
{ "par", "\n" },
83+
{ "sect", "\n\n" },
84+
{ "page", "\n\n" },
85+
{ "line", "\n" },
86+
{ "tab", "\t" },
87+
{ "emdash", "\u2014" },
88+
{ "endash", "\u2013" },
89+
{ "emspace", "\u2003" },
90+
{ "enspace", "\u2002" },
91+
{ "qmspace", "\u2005" },
92+
{ "bullet", "\u2022" },
93+
{ "lquote", "\u2018" },
94+
{ "rquote", "\u2019" },
95+
{ "ldblquote", "\u201C" },
96+
{ "rdblquote", "\u201D" },
97+
};
98+
99+
/// <summary>
100+
/// Strip RTF Tags from RTF Text
101+
/// </summary>
102+
/// <param name="inputRtf">RTF formatted text</param>
103+
/// <returns>Plain text from RTF</returns>
104+
internal static string ConvertToText(string inputRtf)
105+
{
106+
if (inputRtf == null)
107+
{
108+
return null;
109+
}
110+
111+
// use interface converter if available
112+
if (NativeRtfTextConverter != null)
113+
{
114+
NativeRtfTextConverter.RtfToText(inputRtf);
115+
}
116+
117+
var stack = new Stack<StackEntry>();
118+
bool ignorable = false; // Whether this group (and all inside it) are "ignorable".
119+
int ucskip = 1; // Number of ASCII characters to skip after a unicode character.
120+
int curskip = 0; // Number of ASCII characters left to skip
121+
var outList = new List<string>(); // Output buffer.
122+
123+
MatchCollection matches = RtfRegex.Matches(inputRtf);
124+
125+
if (matches.Count > 0)
126+
{
127+
foreach (Match match in matches)
128+
{
129+
string word = match.Groups[1].Value;
130+
string arg = match.Groups[2].Value;
131+
string hex = match.Groups[3].Value;
132+
string character = match.Groups[4].Value;
133+
string brace = match.Groups[5].Value;
134+
string tchar = match.Groups[6].Value;
135+
136+
if (!String.IsNullOrEmpty(brace))
137+
{
138+
curskip = 0;
139+
if (brace == "{")
140+
{
141+
// Push state
142+
stack.Push(new StackEntry(ucskip, ignorable));
143+
}
144+
else if (brace == "}")
145+
{
146+
// Pop state
147+
StackEntry entry = stack.Pop();
148+
ucskip = entry.NumberOfCharactersToSkip;
149+
ignorable = entry.Ignorable;
150+
}
151+
}
152+
else if (!String.IsNullOrEmpty(character)) // \x (not a letter)
153+
{
154+
curskip = 0;
155+
if (character == "~")
156+
{
157+
if (!ignorable)
158+
{
159+
outList.Add("\xA0");
160+
}
161+
}
162+
else if ("{}\\".Contains(character))
163+
{
164+
if (!ignorable)
165+
{
166+
outList.Add(character);
167+
}
168+
}
169+
else if (character == "*")
170+
{
171+
ignorable = true;
172+
}
173+
}
174+
else if (!String.IsNullOrEmpty(word)) // \foo
175+
{
176+
curskip = 0;
177+
if (Destinations.Contains(word))
178+
{
179+
ignorable = true;
180+
}
181+
else if (ignorable)
182+
{
183+
}
184+
else if (SpecialCharacters.ContainsKey(word))
185+
{
186+
outList.Add(SpecialCharacters[word]);
187+
}
188+
else if (word == "uc")
189+
{
190+
ucskip = Int32.Parse(arg);
191+
}
192+
else if (word == "u")
193+
{
194+
int c = Int32.Parse(arg);
195+
if (c < 0)
196+
{
197+
c += 0x10000;
198+
}
199+
outList.Add(Char.ConvertFromUtf32(c));
200+
curskip = ucskip;
201+
}
202+
}
203+
else if (!String.IsNullOrEmpty(hex)) // \'xx
204+
{
205+
if (curskip > 0)
206+
{
207+
curskip -= 1;
208+
}
209+
else if (!ignorable)
210+
{
211+
int c = Int32.Parse(hex, System.Globalization.NumberStyles.HexNumber);
212+
outList.Add(Char.ConvertFromUtf32(c));
213+
}
214+
}
215+
else if (!String.IsNullOrEmpty(tchar))
216+
{
217+
if (curskip > 0)
218+
{
219+
curskip -= 1;
220+
}
221+
else if (!ignorable)
222+
{
223+
outList.Add(tchar);
224+
}
225+
}
226+
}
227+
}
228+
return String.Join(String.Empty, outList.ToArray());
229+
}
230+
231+
internal static string ConvertToRtf(string value)
232+
{
233+
if (string.IsNullOrWhiteSpace(value))
234+
{
235+
return string.Empty;
236+
}
237+
238+
// use interface converter if available
239+
if (NativeRtfTextConverter != null)
240+
{
241+
NativeRtfTextConverter.TextToRtf(value);
242+
}
243+
244+
// special RTF chars
245+
var backslashed = new StringBuilder(value);
246+
backslashed.Replace(@"\", @"\\");
247+
backslashed.Replace(@"{", @"\{");
248+
backslashed.Replace(@"}", @"\}");
249+
backslashed.Replace(Environment.NewLine, @"\par" + Environment.NewLine);
250+
251+
// convert string char by char
252+
var sb = new StringBuilder();
253+
foreach (char character in backslashed.ToString())
254+
{
255+
if (character <= 0x7f)
256+
sb.Append(character);
257+
else
258+
sb.Append("\\u" + Convert.ToUInt32(character) + "?");
259+
}
260+
261+
return @"{\rtf1\ansi\ansicpg1252\deff0{\fonttbl\f0\fswiss Helvetica;}\f0\pard " + sb + @"\par" + Environment.NewLine + "}";
262+
}
263+
264+
}
265+
266+
}

libse/SpellCheck/SpellCheckWord.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace Nikse.SubtitleEdit.Core.SpellCheck
2+
{
3+
public class SpellCheckWord
4+
{
5+
public int Index { get; set; }
6+
public string Text { get; set; }
7+
}
8+
}

0 commit comments

Comments
 (0)