Skip to content

Commit d0825a9

Browse files
author
moonlight
committed
fix:生产环境下jieba dict缺失
1 parent 86026ad commit d0825a9

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ WORKDIR /build
1313
ADD go.mod .
1414
ADD go.sum .
1515
RUN go mod download
16+
17+
# 重要:创建目录并复制字典文件
18+
RUN mkdir -p /build/dict
19+
# 复制gojieba的字典文件到构建目录
20+
RUN cp -r /go/pkg/mod/github.com/yanyiwu/gojieba@v1.4.6/deps/cppjieba/dict/* /build/dict/ || true
21+
1622
COPY . .
1723
RUN sh ./build.sh
1824

@@ -25,5 +31,8 @@ ENV TZ Asia/Shanghai
2531

2632
WORKDIR /app
2733
COPY --from=builder /build/output /app
34+
COPY --from=builder /build/dict /app/dict
35+
36+
ENV JIEBA_DICT_PATH=/app/dict
2837

2938
CMD ["sh", "./bootstrap.sh"]

biz/domain/wordcld/wordcloud.go

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ package wordcld
33
import (
44
"bufio"
55
"context"
6-
"github.com/xh-polaris/psych-core-api/biz/application/dto/core_api"
76
"os"
87
"path/filepath"
98
"regexp"
109
"strings"
1110
"sync"
1211
"unicode/utf8"
1312

13+
"github.com/xh-polaris/psych-core-api/biz/application/dto/core_api"
14+
1415
"github.com/xh-polaris/psych-core-api/biz/cst"
1516
"github.com/xh-polaris/psych-core-api/biz/infra/mapper/message"
1617
"github.com/xh-polaris/psych-core-api/biz/infra/mapper/report"
@@ -107,10 +108,45 @@ func ensureStopWordsLoaded() {
107108
stopWordsOnce.Do(loadStopWords)
108109
}
109110

111+
// initJiebaInstance 初始化jieba实例
112+
func initJiebaInstance() *gojieba.Jieba {
113+
dictPath := os.Getenv("JIEBA_DICT_PATH")
114+
115+
// 在生产环境(Docker)中,必须使用自定义路径,因为默认的Go模块路径不存在
116+
// 如果没有设置环境变量,设置默认值为Docker中的字典路径
117+
if dictPath == "" {
118+
dictPath = "/app/dict"
119+
}
120+
121+
// 检查自定义字典目录是否存在并包含必要的字典文件
122+
requiredFiles := []string{
123+
"jieba.dict.utf8",
124+
"hmm_model.utf8",
125+
"user.dict.utf8",
126+
"idf.utf8",
127+
"stop_words.utf8",
128+
}
129+
130+
// 检查所有字典文件是否存在
131+
dictPaths := make([]string, 0, len(requiredFiles))
132+
for _, filename := range requiredFiles {
133+
fullPath := filepath.Join(dictPath, filename)
134+
if _, err := os.Stat(fullPath); os.IsNotExist(err) {
135+
// 如果字典文件不存在,尝试使用gojieba的默认配置(仅限开发环境)
136+
// 生产环境中这通常会失败,所以应该确保字典文件正确部署
137+
return gojieba.NewJieba()
138+
}
139+
dictPaths = append(dictPaths, fullPath)
140+
}
141+
142+
// 如果所有字典文件都存在,使用自定义路径
143+
return gojieba.NewJieba(dictPaths...)
144+
}
145+
110146
func NewWordCloudExtractor(rptMapper report.IMongoMapper) *WordCloudExtractor {
111147
Extractor = WordCloudExtractor{
112148
rptMapper: rptMapper,
113-
jieba: gojieba.NewJieba(),
149+
jieba: initJiebaInstance(),
114150
}
115151
return &Extractor
116152
}

0 commit comments

Comments
 (0)