Skip to content

Commit 28de8fc

Browse files
authored
Feat/2.3.0 beta3 (#1832)
2 parents 80dcae8 + 6dbf656 commit 28de8fc

File tree

140 files changed

+3247
-1366
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+3247
-1366
lines changed

.gitmodules

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +0,0 @@
1-
[submodule "src/bisheng-unstructured"]
2-
path = src/bisheng-unstructured
3-
url = https://github.com/dataelement/bisheng-unstructured.git
4-
[submodule "src/bisheng-rt"]
5-
path = src/bisheng-rt
6-
url = https://github.com/dataelement/bisheng-rt.git

docker/docker-compose.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ services:
4040

4141
backend:
4242
container_name: bisheng-backend
43-
image: dataelement/bisheng-backend:v2.3.0-beta2
43+
image: dataelement/bisheng-backend:v2.3.0-beta3
4444
ports:
4545
- "7860:7860"
4646
environment:
@@ -78,7 +78,7 @@ services:
7878

7979
backend_worker:
8080
container_name: bisheng-backend-worker
81-
image: dataelement/bisheng-backend:v2.3.0-beta2
81+
image: dataelement/bisheng-backend:v2.3.0-beta3
8282
environment:
8383
TZ: Asia/Shanghai
8484
BS_MILVUS_CONNECTION_ARGS: '{"host":"milvus","port":"19530","user":"","password":"","secure":false}'
@@ -109,7 +109,7 @@ services:
109109

110110
frontend:
111111
container_name: bisheng-frontend
112-
image: dataelement/bisheng-frontend:v2.3.0-beta2
112+
image: dataelement/bisheng-frontend:v2.3.0-beta3
113113
ports:
114114
- "3001:3001"
115115
environment:

src/backend/bisheng/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
try:
99
# 通过ci去自动修改
10-
__version__ = '2.3.0-beta2'
10+
__version__ = '2.3.0-beta3'
1111
except metadata.PackageNotFoundError:
1212
# Case where package metadata is not available.
1313
__version__ = ''

src/backend/bisheng/api/services/assistant.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from datetime import datetime
2-
from datetime import datetime
32
from typing import Any, List, Optional, Union
43

54
from fastapi import Request
@@ -297,7 +296,7 @@ async def update_assistant(cls, request: Request, login_user: UserPayload, req:
297296
raise AssistantNameRepeatError()
298297
assistant.name = req.name
299298
assistant.desc = req.desc
300-
assistant.logo = req.logo
299+
assistant.logo = req.logo if req.logo else assistant.logo
301300
assistant.prompt = req.prompt
302301
assistant.guide_word = req.guide_word
303302
assistant.guide_question = req.guide_question

src/backend/bisheng/api/services/audit_log.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import csv
22
from datetime import datetime
33
from tempfile import NamedTemporaryFile
4-
from typing import Any, List, Optional
4+
from typing import Any, List, Optional, Dict
55

66
from loguru import logger
77

@@ -49,18 +49,21 @@ def get_audit_log(cls, login_user: UserPayload, group_ids, operator_ids, start_t
4949
return resp_200(data={'data': data, 'total': total})
5050

5151
@classmethod
52-
def get_all_operators(cls, login_user: UserPayload) -> Any:
52+
def get_all_operators(cls, login_user: UserPayload) -> List[Dict]:
5353
groups = []
5454
if not login_user.is_admin():
5555
groups = [one.group_id for one in UserGroupDao.get_user_admin_group(login_user.user_id)]
56+
# not any group admin
57+
if not groups:
58+
raise UnAuthorizedError()
5659

5760
data = AuditLogDao.get_all_operators(groups)
5861
res = {}
5962
for one in data:
6063
if not one[1]:
6164
continue
6265
res[one[0]] = {'user_id': one[0], 'user_name': one[1]}
63-
return resp_200(data=list(res.values()))
66+
return list(res.values())
6467

6568
@classmethod
6669
def _chat_log(cls, user: UserPayload, ip_address: str, event_type: EventType, object_type: ObjectType,
@@ -599,8 +602,7 @@ def export_session_messages(cls, user: UserPayload, flow_ids: List[str], user_id
599602
minio_client.put_object_sync(object_name=tmp_object_name, file=tmp_file.name,
600603
content_type='application/text',
601604
bucket_name=minio_client.tmp_bucket)
602-
share_url = minio_client.get_share_link(tmp_object_name, minio_client.tmp_bucket)
603-
return minio_client.clear_minio_share_host(share_url)
605+
return minio_client.get_share_link_sync(tmp_object_name, minio_client.tmp_bucket)
604606

605607
@classmethod
606608
def get_chat_messages(cls, chat_list: List[AppChatList]) -> List[AppChatList]:

src/backend/bisheng/api/services/base.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def get_logo_share_link(cls, logo_path: str):
1212
redis_client = get_redis_client_sync()
1313
if not logo_path:
1414
return ''
15-
cache_key = f'logo_cache:{logo_path}'
15+
cache_key = f'logo_cache_new:{logo_path}'
1616
# 先从内存中获取
1717
share_url = cls.LogoMemoryCache.get(cache_key)
1818
if share_url:
@@ -25,9 +25,7 @@ def get_logo_share_link(cls, logo_path: str):
2525
return share_url
2626

2727
minio_client = get_minio_storage_sync()
28-
share_url = minio_client.get_share_link(logo_path)
29-
# 去除前缀通过nginx访问,防止访问不到文件
30-
share_url = minio_client.clear_minio_share_host(share_url)
28+
share_url = minio_client.get_share_link_sync(logo_path)
3129

3230
# 缓存5天, 临时链接有效期为7天
3331
redis_client.set(cache_key, share_url, 3600 * 120)
@@ -40,7 +38,7 @@ async def get_logo_share_link_async(cls, logo_path: str):
4038
redis_client = await get_redis_client()
4139
if not logo_path:
4240
return ''
43-
cache_key = f'logo_cache:{logo_path}'
41+
cache_key = f'logo_cache_new:{logo_path}'
4442
# 先从内存中获取
4543
share_url = cls.LogoMemoryCache.get(cache_key)
4644
if share_url:
@@ -53,11 +51,9 @@ async def get_logo_share_link_async(cls, logo_path: str):
5351
return share_url
5452

5553
minio_client = await get_minio_storage()
56-
share_url = minio_client.get_share_link(logo_path)
57-
# 去除前缀通过nginx访问,防止访问不到文件
58-
share_url = minio_client.clear_minio_share_host(share_url)
54+
share_url = await minio_client.get_share_link(logo_path)
5955

6056
# 缓存5天, 临时链接有效期为7天
6157
await redis_client.aset(cache_key, share_url, 3600 * 120)
6258
cls.LogoMemoryCache.set(cache_key, share_url)
63-
return share_url
59+
return share_url

src/backend/bisheng/api/services/dataset_service.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class DatasetService(BaseService):
1616
def build_dataset_list(cls,
1717
page: int,
1818
limit: int,
19-
keyword: Optional[str] = None) -> List[Dict]:
19+
keyword: Optional[str] = None) -> (List[Dict], int):
2020
"""补全list 数据"""
2121

2222
dataset_list = DatasetDao.filter_dataset_by_ids(dataset_ids=[],
@@ -31,12 +31,11 @@ def build_dataset_list(cls,
3131
user_ids = [one.user_id for one in dataset_list]
3232
user_list = UserDao.get_user_by_ids(user_ids)
3333
user_dict = {one.user_id: one for one in user_list}
34-
res = [DatasetRead.validate(one) for one in dataset_list]
35-
minio_client = get_minio_storage_sync()
34+
res = [DatasetRead.model_validate(one) for one in dataset_list]
3635
for one in res:
3736
one.user_name = user_dict[one.user_id].user_name
3837
if one.object_name:
39-
one.url = minio_client.get_share_link(one.object_name)
38+
one.url = one.object_name
4039

4140
return res, total_count
4241

@@ -74,3 +73,7 @@ def delete_dataset(cls, dataset_id: int):
7473
minio_client.remove_object_sync(object_name=object_name)
7574
DatasetDao.delete(dataset)
7675
return True
76+
77+
@classmethod
78+
async def get_one_by_object_name(cls, object_name: str) -> Optional[Dataset]:
79+
dataset = await DatasetDao.aget_dataset_by_object_name(object_name)

src/backend/bisheng/api/services/knowledge.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ async def get_knowledge(
7878
login_user: UserPayload,
7979
knowledge_type: KnowledgeTypeEnum,
8080
name: str = None,
81+
sort_by: str = "update_time",
8182
page: int = 1,
8283
limit: int = 10,
8384
) -> (List[KnowledgeRead], int):
@@ -96,6 +97,7 @@ async def get_knowledge(
9697
knowledge_id_extra,
9798
knowledge_type,
9899
name,
100+
sort_by,
99101
page,
100102
limit,
101103
)
@@ -104,7 +106,7 @@ async def get_knowledge(
104106
)
105107
else:
106108
res = await KnowledgeDao.aget_all_knowledge(
107-
name, knowledge_type, page=page, limit=limit
109+
name, knowledge_type, sort_by, page=page, limit=limit
108110
)
109111
total = await KnowledgeDao.acount_all_knowledge(name, knowledge_type)
110112

@@ -461,7 +463,7 @@ async def get_preview_file_chunk(
461463
new_file_name = KnowledgeUtils.get_tmp_preview_file_object_name(filepath)
462464
minio_client = await get_minio_storage()
463465
if await minio_client.object_exists(minio_client.tmp_bucket, new_file_name):
464-
file_share_url = minio_client.get_share_link(
466+
file_share_url = await minio_client.get_share_link(
465467
new_file_name, minio_client.tmp_bucket
466468
)
467469

@@ -623,9 +625,7 @@ async def rebuild_knowledge_file(cls, request: Request,
623625
db_file.updater_name = login_user.user_name
624626
db_file = await KnowledgeFileDao.async_update(db_file)
625627

626-
file_path, _ = cls.get_file_share_url(db_file.id)
627-
628-
preview_cache_key = cls.get_preview_cache_key(req_data.knowledge_id, file_path=file_path)
628+
preview_cache_key = cls.get_preview_cache_key(req_data.knowledge_id, file_path=req_data.file_path)
629629
file_worker.retry_knowledge_file_celery.delay(db_file.id, preview_cache_key, req_data.callback_url)
630630

631631
return db_file.model_dump()
@@ -1172,13 +1172,13 @@ def get_file_share_url(cls, file_id: int) -> (str, str):
11721172
""" 获取文件原始下载地址 和 对应的预览文件下载地址 """
11731173
file = KnowledgeFileDao.get_file_by_ids([file_id])
11741174
if not file:
1175-
raise NotFoundError.http_exception()
1175+
raise NotFoundError()
11761176
file = file[0]
11771177
minio_client = get_minio_storage_sync()
11781178
# 130版本以前的文件解析
11791179
if file.parse_type in [ParseType.LOCAL.value, ParseType.UNS.value]:
1180-
original_url = minio_client.get_share_link(cls.get_knowledge_file_object_name(file.id, file.file_name))
1181-
preview_url = minio_client.get_share_link(str(file.id))
1180+
original_url = minio_client.get_share_link_sync(cls.get_knowledge_file_object_name(file.id, file.file_name))
1181+
preview_url = minio_client.get_share_link_sync(str(file.id))
11821182
else:
11831183
original_url = cls.get_file_share_url_with_empty(file.object_name)
11841184
preview_url = ""
@@ -1197,7 +1197,7 @@ def get_file_share_url_with_empty(cls, object_name: str) -> str:
11971197
"""
11981198
minio_client = get_minio_storage_sync()
11991199
if minio_client.object_exists_sync(minio_client.bucket, object_name):
1200-
return minio_client.get_share_link(object_name, minio_client.bucket)
1200+
return minio_client.get_share_link_sync(object_name)
12011201
return ""
12021202

12031203
@classmethod

src/backend/bisheng/api/services/knowledge_imp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def add_file_embedding(
560560
f"start download original file={db_file.id} file_name={db_file.file_name}"
561561
)
562562

563-
file_url = minio_client.get_share_link(db_file.object_name)
563+
file_url = minio_client.get_share_link_sync(db_file.object_name, clear_host=False)
564564
filepath, _ = file_download(file_url)
565565

566566
# Convert split_rule string to dict if needed

src/backend/bisheng/api/services/libreoffice_converter.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import shutil # For checking if the executable is in PATH
33
import subprocess
4+
import tempfile
45

56
from loguru import logger
67

@@ -85,22 +86,25 @@ def convert_doc_to_docx(input_doc_path, output_dir=None):
8586
file_name_no_ext = os.path.splitext(base_name)[0]
8687
output_docx_path = os.path.join(output_dir, f"{file_name_no_ext}.docx")
8788

88-
command = [
89-
soffice_path,
90-
"--headless", # Run in headless mode (no GUI)
91-
"--convert-to",
92-
"docx", # Specify the output format
93-
"--outdir",
94-
output_dir, # Specify the output directory
95-
input_doc_path, # The input file
96-
]
97-
98-
logger.debug(f"Executing command: {' '.join(command)}")
99-
10089
try:
101-
process = subprocess.run(
102-
command, check=True, capture_output=True, text=True, timeout=120
103-
) # 120 seconds timeout
90+
with tempfile.TemporaryDirectory() as temp_dir:
91+
command = [
92+
soffice_path,
93+
"--headless", # Run in headless mode (no GUI)
94+
"--norestore",
95+
"--invisible",
96+
"-env:SingleAppInstance=false",
97+
f"-env:UserInstallation=file://{temp_dir}",
98+
"--convert-to",
99+
"docx:Office Open XML Text", # Specify the output format
100+
"--outdir",
101+
output_dir, # Specify the output directory
102+
input_doc_path, # The input file
103+
]
104+
logger.debug(f"Executing command: {' '.join(command)}")
105+
process = subprocess.run(
106+
command, check=True, capture_output=True, text=True, timeout=120
107+
) # 120 seconds timeout
104108
logger.debug(f"LibreOffice STDOUT: {process.stdout}")
105109
if (
106110
process.stderr
@@ -192,23 +196,27 @@ def convert_ppt_to_pdf(input_path, output_dir=None):
192196
pdf_name = os.path.splitext(base_name)[0] + ".pdf"
193197
expected_pdf_path = os.path.join(output_dir, pdf_name)
194198

195-
command = [
196-
soffice_path,
197-
"--headless",
198-
"--convert-to",
199-
"pdf",
200-
"--outdir",
201-
output_dir,
202-
input_path,
203-
]
204-
205199
try:
206-
logger.debug(f"Converting {input_path} to PDF using {soffice_path}...")
207-
# LibreOffice can sometimes be slow to start up and convert.
208-
# It may also not provide much stdout/stderr unless there's a significant error.
209-
process = subprocess.run(
210-
command, capture_output=True, text=True, check=True, timeout=180
211-
) # 180 seconds timeout
200+
with tempfile.TemporaryDirectory() as temp_dir:
201+
command = [
202+
soffice_path,
203+
"--headless",
204+
"--norestore",
205+
"--invisible",
206+
"-env:SingleAppInstance=false",
207+
f"-env:UserInstallation=file://{temp_dir}",
208+
"--convert-to",
209+
"pdf",
210+
"--outdir",
211+
output_dir,
212+
input_path,
213+
]
214+
logger.debug(f"Converting {input_path} to PDF using {soffice_path}...")
215+
# LibreOffice can sometimes be slow to start up and convert.
216+
# It may also not provide much stdout/stderr unless there's a significant error.
217+
process = subprocess.run(
218+
command, capture_output=True, text=True, check=True, timeout=180
219+
) # 180 seconds timeout
212220

213221
if process.stdout:
214222
logger.debug(f"soffice stdout: {process.stdout}") # Often empty on success

0 commit comments

Comments
 (0)