Skip to content

Commit 055b8e3

Browse files
committed
Refactor WordCensor and optimize offensive word handling
Refactored variable naming for better clarity and consistency. Optimized text processing by introducing a HashSet for offensive words to improve performance. Deprecated inefficient looping mechanisms and replaced them with a streamlined indexing approach for word detection.
1 parent 0faf32e commit 055b8e3

File tree

2 files changed

+54
-22
lines changed

2 files changed

+54
-22
lines changed

source/WordCensor/EntryPoint.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class WordCensor : IPlugin
1111

1212
public string Text => "Word censor";
1313

14-
public decimal Version => 1m;
14+
public decimal Version => 1.1m;
1515

1616
public string Description => "Censor offensive words";
1717

source/WordCensor/WordsHandler.cs

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Text;
34
using System.Text.RegularExpressions;
45
using Nikse.SubtitleEdit.Core.Common;
@@ -7,10 +8,10 @@ namespace Nikse.SubtitleEdit.PluginLogic
78
{
89
class WordsHandler
910
{
10-
private static readonly Random _random = new Random();
11-
private static readonly char[] _grawlixChars = { '@', '#', '!', '?', '$', '%', '&' };
11+
private static readonly Random Random = new();
12+
private static readonly char[] GrawlixChars = { '@', '#', '!', '?', '$', '%', '&' };
1213
private readonly WordsHandlerConfigs _configs;
13-
private static readonly Regex _regexNonWord = new Regex("\\W", RegexOptions.Compiled);
14+
private static readonly Regex RegexNonWord = new("\\W", RegexOptions.Compiled);
1415

1516
public WordsHandler(WordsHandlerConfigs configs)
1617
{
@@ -19,37 +20,68 @@ public WordsHandler(WordsHandlerConfigs configs)
1920

2021
public void CleanUpSubtitle(Subtitle subtitle)
2122
{
23+
var offensiveWordsSet = new HashSet<string>(WordsHelper.GetWords());
2224
foreach (var p in subtitle.Paragraphs)
2325
{
24-
p.Text = ProcessText(p.Text);
26+
p.Text = ProcessText(p.Text, offensiveWordsSet);
2527
}
2628
}
2729

28-
public string ProcessText(string text)
30+
public string ProcessText(string text, HashSet<string> ofensiveWords)
2931
{
30-
foreach (var word in WordsHelper.GetWords())
32+
var l = 0;
33+
for (int r = 0; r < text.Length; r++)
3134
{
32-
int idx = text.IndexOf(word, StringComparison.OrdinalIgnoreCase);
33-
while (idx >= 0)
35+
if (RegexNonWord.IsMatch(text[l].ToString()) && !RegexNonWord.IsMatch(text[r].ToString()))
3436
{
35-
bool startsOkay = idx == 0 || _regexNonWord.IsMatch(text[idx - 1].ToString());
36-
bool allOkay = startsOkay && (idx + word.Length == text.Length || _regexNonWord.IsMatch(text[idx + word.Length].ToString()));
37-
if (allOkay)
37+
l = r;
38+
}
39+
else if (!char.IsLetterOrDigit(text[r]) || RegexNonWord.IsMatch(text[r].ToString()))
40+
{
41+
if (r - l > 1)
3842
{
39-
string procWord = ProcessWord(word);
40-
if (_configs.ColorRed)
43+
string word = text.Substring(l, r - l);
44+
if (ofensiveWords.Contains(word.ToLowerInvariant()))
4145
{
42-
procWord = WordsHelper.ColorWordRed(procWord);
46+
text = text.Remove(l, r - l);
47+
var processWord = ProcessWord(word);
48+
if (_configs.ColorRed)
49+
{
50+
processWord = WordsHelper.ColorWordRed(processWord);
51+
}
52+
53+
text = text.Insert(l, processWord);
54+
r = l + processWord.Length;
4355
}
44-
// take out old word
45-
text = text.Remove(idx, word.Length);
46-
// insert new word/colored word.
47-
text = text.Insert(idx, procWord);
4856
}
49-
// keep looking for more words in same text.
50-
idx = text.IndexOf(word, idx + word.Length, StringComparison.OrdinalIgnoreCase);
57+
58+
l = r + 1;
5159
}
5260
}
61+
62+
// foreach (var word in WordsHelper.GetWords())
63+
// {
64+
// int idx = text.IndexOf(word, StringComparison.OrdinalIgnoreCase);
65+
// while (idx >= 0)
66+
// {
67+
// bool startsOkay = idx == 0 || RegexNonWord.IsMatch(text[idx - 1].ToString());
68+
// bool allOkay = startsOkay && (idx + word.Length == text.Length || RegexNonWord.IsMatch(text[idx + word.Length].ToString()));
69+
// if (allOkay)
70+
// {
71+
// string procWord = ProcessWord(word);
72+
// if (_configs.ColorRed)
73+
// {
74+
// procWord = WordsHelper.ColorWordRed(procWord);
75+
// }
76+
// // take out old word
77+
// text = text.Remove(idx, word.Length);
78+
// // insert new word/colored word.
79+
// text = text.Insert(idx, procWord);
80+
// }
81+
// // keep looking for more words in same text.
82+
// idx = text.IndexOf(word, idx + word.Length, StringComparison.OrdinalIgnoreCase);
83+
// }
84+
// }
5385
return text;
5486
}
5587

@@ -64,7 +96,7 @@ private static string ProcessWord(string curseWord)
6496
var sb = new StringBuilder(len);
6597
foreach (char ch in charsToCensor)
6698
{
67-
char grawlix = _grawlixChars[_random.Next(0, _grawlixChars.Length)];
99+
char grawlix = GrawlixChars[Random.Next(0, GrawlixChars.Length)];
68100
sb.Append(grawlix);
69101
}
70102
// grawlixed word

0 commit comments

Comments
 (0)