@@ -3,14 +3,15 @@ package wordcld
33import (
44 "bufio"
55 "context"
6- "github.com/xh-polaris/psych-core-api/biz/application/dto/core_api"
76 "os"
87 "path/filepath"
98 "regexp"
109 "strings"
1110 "sync"
1211 "unicode/utf8"
1312
13+ "github.com/xh-polaris/psych-core-api/biz/application/dto/core_api"
14+
1415 "github.com/xh-polaris/psych-core-api/biz/cst"
1516 "github.com/xh-polaris/psych-core-api/biz/infra/mapper/message"
1617 "github.com/xh-polaris/psych-core-api/biz/infra/mapper/report"
@@ -107,10 +108,45 @@ func ensureStopWordsLoaded() {
107108 stopWordsOnce .Do (loadStopWords )
108109}
109110
111+ // initJiebaInstance 初始化jieba实例
112+ func initJiebaInstance () * gojieba.Jieba {
113+ dictPath := os .Getenv ("JIEBA_DICT_PATH" )
114+
115+ // 在生产环境(Docker)中,必须使用自定义路径,因为默认的Go模块路径不存在
116+ // 如果没有设置环境变量,设置默认值为Docker中的字典路径
117+ if dictPath == "" {
118+ dictPath = "/app/dict"
119+ }
120+
121+ // 检查自定义字典目录是否存在并包含必要的字典文件
122+ requiredFiles := []string {
123+ "jieba.dict.utf8" ,
124+ "hmm_model.utf8" ,
125+ "user.dict.utf8" ,
126+ "idf.utf8" ,
127+ "stop_words.utf8" ,
128+ }
129+
130+ // 检查所有字典文件是否存在
131+ dictPaths := make ([]string , 0 , len (requiredFiles ))
132+ for _ , filename := range requiredFiles {
133+ fullPath := filepath .Join (dictPath , filename )
134+ if _ , err := os .Stat (fullPath ); os .IsNotExist (err ) {
135+ // 如果字典文件不存在,尝试使用gojieba的默认配置(仅限开发环境)
136+ // 生产环境中这通常会失败,所以应该确保字典文件正确部署
137+ return gojieba .NewJieba ()
138+ }
139+ dictPaths = append (dictPaths , fullPath )
140+ }
141+
142+ // 如果所有字典文件都存在,使用自定义路径
143+ return gojieba .NewJieba (dictPaths ... )
144+ }
145+
110146func NewWordCloudExtractor (rptMapper report.IMongoMapper ) * WordCloudExtractor {
111147 Extractor = WordCloudExtractor {
112148 rptMapper : rptMapper ,
113- jieba : gojieba . NewJieba (),
149+ jieba : initJiebaInstance (),
114150 }
115151 return & Extractor
116152}
0 commit comments