Skip to content

Commit e31f94c

Browse files
committed
add sensitive
1 parent 093eac4 commit e31f94c

File tree

17 files changed

+727
-6
lines changed

17 files changed

+727
-6
lines changed

.DS_Store

2 KB
Binary file not shown.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CREATE TABLE IF NOT EXISTS `sensitivewords` (
2+
`id` int unsigned NOT NULL AUTO_INCREMENT,
3+
`app_key` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
4+
`word` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
5+
`word_type` tinyint(1) NOT NULL DEFAULT '1' COMMENT '12',
6+
`created_time` datetime(3) DEFAULT CURRENT_TIMESTAMP(3),
7+
`updated_time` datetime(3) DEFAULT CURRENT_TIMESTAMP(3) ON UPDATE CURRENT_TIMESTAMP(3),
8+
PRIMARY KEY (`id`),
9+
UNIQUE KEY `uniq_word` (`app_key`,`word`),
10+
KEY `idx_appkey` (`app_key`,`id`)
11+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;

commons/errs/errorcode.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ var (
2828
IMErrorCode_APP_LOGIN_ERR_PASS IMErrorCode = 17013
2929
IMErrorCode_APP_PHONE_EXISTED IMErrorCode = 17014
3030
IMErrorCode_APP_EMAIL_EXIST IMErrorCode = 17015
31+
IMErrorCode_APP_Sensitive IMErrorCode = 17016
3132

3233
//friends
3334
IMErrorCode_APP_FRIEND_DEFAULT IMErrorCode = 17100

commons/sensitive/filter.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package sensitive
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"os"
9+
"regexp"
10+
"time"
11+
12+
"github.com/juggleim/jugglechat-server/commons/sensitive/trie"
13+
)
14+
15+
// Filter 敏感词过滤器
16+
type Filter struct {
17+
trie *trie.Trie
18+
noise *regexp.Regexp
19+
}
20+
21+
// New 返回一个敏感词过滤器
22+
func NewFilter() *Filter {
23+
return &Filter{
24+
trie: trie.NewTrie(),
25+
noise: regexp.MustCompile(`[|\s&%$@*!!#^~_—|'";.。,,?<>《》::]+`),
26+
}
27+
}
28+
29+
// AddWord 添加敏感词
30+
func (filter *Filter) AddWord(words ...string) {
31+
filter.trie.Add(words...)
32+
}
33+
34+
// DelWord 删除敏感词
35+
func (filter *Filter) DelWord(words ...string) {
36+
filter.trie.Del(words...)
37+
}
38+
39+
// Filter 过滤敏感词
40+
func (filter *Filter) Filter(text string) string {
41+
return filter.trie.Filter(text)
42+
}
43+
44+
// Replace 和谐敏感词
45+
func (filter *Filter) Replace(text string, repl rune) string {
46+
return filter.trie.Replace(text, repl)
47+
}
48+
49+
// FindIn 检测敏感词
50+
func (filter *Filter) FindIn(text string) (bool, string) {
51+
text = filter.RemoveNoise(text)
52+
return filter.trie.FindIn(text)
53+
}
54+
55+
// FindAll 找到所有匹配词
56+
func (filter *Filter) FindAll(text string) []string {
57+
return filter.trie.FindAll(text)
58+
}
59+
60+
// Validate 检测字符串是否合法
61+
func (filter *Filter) Validate(text string) (bool, string) {
62+
text = filter.RemoveNoise(text)
63+
return filter.trie.Validate(text)
64+
}
65+
66+
// RemoveNoise 去除空格等噪音,噪音可以使用 UpdateNoisePattern 更新
67+
func (filter *Filter) RemoveNoise(text string) string {
68+
return filter.noise.ReplaceAllString(text, "")
69+
}
70+
71+
// UpdateNoisePattern 更新去噪模式
72+
func (filter *Filter) UpdateNoisePattern(pattern string) {
73+
filter.noise = regexp.MustCompile(pattern)
74+
}
75+
76+
// LoadWordDict 加载本地敏感词字典
77+
func (filter *Filter) LoadWordDict(path string) error {
78+
f, err := os.Open(path)
79+
if err != nil {
80+
return err
81+
}
82+
defer func(f *os.File) {
83+
err := f.Close()
84+
if err != nil {
85+
fmt.Println(err.Error())
86+
}
87+
}(f)
88+
89+
return filter.Load(f)
90+
}
91+
92+
// LoadNetWordDict 加载网络敏感词字典
93+
func (filter *Filter) LoadNetWordDict(url string) error {
94+
c := http.Client{
95+
Timeout: 5 * time.Second,
96+
}
97+
rsp, err := c.Get(url)
98+
if err != nil {
99+
return err
100+
}
101+
defer func(Body io.ReadCloser) {
102+
err := Body.Close()
103+
if err != nil {
104+
fmt.Println(err.Error())
105+
}
106+
}(rsp.Body)
107+
108+
return filter.Load(rsp.Body)
109+
}
110+
111+
// Load common method to add words
112+
func (filter *Filter) Load(rd io.Reader) error {
113+
buf := bufio.NewReader(rd)
114+
for {
115+
line, _, err := buf.ReadLine()
116+
if err != nil {
117+
if err != io.EOF {
118+
return err
119+
}
120+
break
121+
}
122+
filter.trie.Add(string(line))
123+
}
124+
125+
return nil
126+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package sensitive
2+
3+
import (
4+
"fmt"
5+
"sync"
6+
"time"
7+
8+
"github.com/juggleim/jugglechat-server/commons/caches"
9+
"github.com/juggleim/jugglechat-server/storages"
10+
"github.com/juggleim/jugglechat-server/storages/models"
11+
)
12+
13+
var (
14+
filterCache *caches.LruCache
15+
filterLocks *sync.RWMutex
16+
)
17+
18+
func init() {
19+
filterCache = caches.NewLruCacheWithAddReadTimeout("filter_cache", 10000, nil, 8*time.Minute, 10*time.Minute)
20+
filterLocks = &sync.RWMutex{}
21+
22+
filterCache.SetValueCreator(func(key interface{}) interface{} {
23+
s := NewSensitiveService()
24+
start := time.Now()
25+
loadAppWords(s, key.(string))
26+
fmt.Println("load app words cost:", time.Since(start))
27+
return s
28+
})
29+
}
30+
31+
func GetAppSensitiveFilter(appKey string) *SensitiveService {
32+
filterLocks.Lock()
33+
defer filterLocks.Unlock()
34+
35+
v, ok := filterCache.GetByCreator(appKey, nil)
36+
if !ok {
37+
return nil
38+
}
39+
return v.(*SensitiveService)
40+
}
41+
42+
func loadAppWords(service *SensitiveService, appKey string) (err error) {
43+
var (
44+
startId int64 = 0
45+
pageSize int64 = 1000
46+
)
47+
storage := storages.NewSensitiveWordStorage()
48+
for {
49+
list, err := storage.QrySensitiveWords(appKey, pageSize, startId)
50+
if err != nil {
51+
fmt.Println(err.Error())
52+
return err
53+
}
54+
for _, item := range list {
55+
if startId < item.ID {
56+
startId = item.ID
57+
}
58+
}
59+
service.AddWord(list...)
60+
if len(list) < int(pageSize) {
61+
break
62+
}
63+
}
64+
return nil
65+
}
66+
67+
type SensitiveService struct {
68+
replaceFilter *Filter
69+
denyFilter *Filter
70+
loadLock *sync.RWMutex
71+
}
72+
73+
func NewSensitiveService() *SensitiveService {
74+
return &SensitiveService{
75+
replaceFilter: NewFilter(),
76+
denyFilter: NewFilter(),
77+
loadLock: &sync.RWMutex{},
78+
}
79+
}
80+
81+
func (s *SensitiveService) ReplaceSensitiveWords(text string) (isDeny bool, replacedText string) {
82+
s.loadLock.RLock()
83+
defer s.loadLock.RUnlock()
84+
85+
if s.denyFilter != nil {
86+
var ok bool
87+
ok, _ = s.denyFilter.FindIn(text)
88+
if ok {
89+
isDeny = true
90+
return
91+
}
92+
}
93+
if s.replaceFilter != nil {
94+
replacedText = s.replaceFilter.Replace(text, '*')
95+
}
96+
97+
return
98+
}
99+
100+
func (s *SensitiveService) AddWord(words ...*models.SensitiveWord) {
101+
s.loadLock.Lock()
102+
defer s.loadLock.Unlock()
103+
if s.replaceFilter == nil {
104+
s.replaceFilter = NewFilter()
105+
}
106+
if s.denyFilter == nil {
107+
s.denyFilter = NewFilter()
108+
}
109+
for _, word := range words {
110+
if word.WordType == models.SensitiveWordType_deny_word {
111+
s.denyFilter.AddWord(word.Word)
112+
} else {
113+
s.replaceFilter.AddWord(word.Word)
114+
}
115+
}
116+
}
117+
118+
func (s *SensitiveService) DelWord(words ...string) {
119+
s.loadLock.Lock()
120+
defer s.loadLock.Unlock()
121+
if s.denyFilter != nil {
122+
s.denyFilter.DelWord(words...)
123+
}
124+
if s.replaceFilter != nil {
125+
s.replaceFilter.DelWord(words...)
126+
}
127+
}

0 commit comments

Comments
 (0)