Skip to content

Commit e321abd

Browse files
committed
feat: import Terminologies
1 parent d1440ca commit e321abd

File tree

8 files changed

+470
-37
lines changed

8 files changed

+470
-37
lines changed

backend/apps/terminology/api/terminology.py

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import asyncio
2+
import hashlib
23
import io
4+
import os
5+
import uuid
6+
from http.client import HTTPException
37
from typing import Optional
48

59
import pandas as pd
6-
from fastapi import APIRouter, Query
10+
from fastapi import APIRouter, File, UploadFile, Query
711
from fastapi.responses import StreamingResponse
812

913
from apps.chat.models.chat_model import AxisObj
1014
from apps.terminology.curd.terminology import page_terminology, create_terminology, update_terminology, \
11-
delete_terminology, enable_terminology, get_all_terminology
15+
delete_terminology, enable_terminology, get_all_terminology, batch_create_terminology
1216
from apps.terminology.models.terminology_model import TerminologyInfo
17+
from common.core.config import settings
1318
from common.core.deps import SessionDep, CurrentUser, Trans
1419
from common.utils.data_format import DataFormat
1520

@@ -89,3 +94,112 @@ def inner():
8994

9095
result = await asyncio.to_thread(inner)
9196
return StreamingResponse(result, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
97+
98+
99+
path = settings.EXCEL_PATH
100+
101+
from sqlalchemy.orm import sessionmaker, scoped_session
102+
from common.core.db import engine
103+
from sqlmodel import Session
104+
105+
session_maker = scoped_session(sessionmaker(bind=engine, class_=Session))
106+
107+
108+
@router.post("/uploadExcel")
109+
async def upload_excel(trans: Trans, current_user: CurrentUser, file: UploadFile = File(...)):
110+
ALLOWED_EXTENSIONS = {"xlsx", "xls"}
111+
if not file.filename.lower().endswith(tuple(ALLOWED_EXTENSIONS)):
112+
raise HTTPException(400, "Only support .xlsx/.xls")
113+
114+
os.makedirs(path, exist_ok=True)
115+
base_filename = f"{file.filename.split('.')[0]}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
116+
filename = f"{base_filename}.{file.filename.split('.')[1]}"
117+
save_path = os.path.join(path, filename)
118+
with open(save_path, "wb") as f:
119+
f.write(await file.read())
120+
121+
oid = current_user.oid
122+
123+
use_cols = [0, 1, 2, 3, 4]
124+
125+
def inner():
126+
127+
session = session_maker()
128+
129+
sheet_names = pd.ExcelFile(save_path).sheet_names
130+
131+
import_data = []
132+
133+
for sheet_name in sheet_names:
134+
135+
df = pd.read_excel(
136+
save_path,
137+
sheet_name=sheet_name,
138+
engine='calamine',
139+
header=0,
140+
usecols=use_cols,
141+
dtype=str
142+
).fillna("")
143+
144+
for index, row in df.iterrows():
145+
# 跳过空行
146+
if row.isnull().all():
147+
continue
148+
149+
word = row[0].strip() if pd.notna(row[0]) and row[0].strip() else None
150+
other_words = [w.strip() for w in row[1].strip().split(',')] if pd.notna(row[1]) and row[
151+
1].strip() else []
152+
description = row[2].strip() if pd.notna(row[2]) and row[2].strip() else None
153+
datasource_names = [d.strip() for d in row[3].strip().split(',')] if pd.notna(row[3]) and row[
154+
3].strip() else []
155+
all_datasource = True if pd.notna(row[4]) and row[4].lower().strip() in ['y', 'yes', 'true'] else False
156+
specific_ds = False if all_datasource else True
157+
158+
import_data.append(TerminologyInfo(word=word, description=description, other_words=other_words,
159+
datasource_names=datasource_names, specific_ds=specific_ds))
160+
161+
res = batch_create_terminology(session, import_data, oid, trans)
162+
163+
failed_records = res['failed_records']
164+
165+
error_excel_filename = None
166+
167+
if len(failed_records) > 0:
168+
data_list = []
169+
for obj in failed_records:
170+
_data = {
171+
"word": obj['data'].word,
172+
"other_words": ', '.join(obj['data'].other_words) if obj['data'].other_words else '',
173+
"description": obj['data'].description,
174+
"all_data_sources": 'N' if obj['data'].specific_ds else 'Y',
175+
"datasource": ', '.join(obj['data'].datasource_names) if obj['data'].datasource_names and obj[
176+
'data'].specific_ds else '',
177+
"errors": obj['errors']
178+
}
179+
data_list.append(_data)
180+
181+
fields = []
182+
fields.append(AxisObj(name=trans('i18n_terminology.term_name'), value='word'))
183+
fields.append(AxisObj(name=trans('i18n_terminology.synonyms'), value='other_words'))
184+
fields.append(AxisObj(name=trans('i18n_terminology.term_description'), value='description'))
185+
fields.append(AxisObj(name=trans('i18n_terminology.effective_data_sources'), value='datasource'))
186+
fields.append(AxisObj(name=trans('i18n_terminology.all_data_sources'), value='all_data_sources'))
187+
fields.append(AxisObj(name=trans('i18n_data_training.error_info'), value='errors'))
188+
189+
md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list)
190+
191+
df = pd.DataFrame(md_data, columns=_fields_list)
192+
error_excel_filename = f"{base_filename}_error.xlsx"
193+
save_error_path = os.path.join(path, error_excel_filename)
194+
# 保存 DataFrame 到 Excel
195+
df.to_excel(save_error_path, index=False)
196+
197+
return {
198+
'success_count': res['success_count'],
199+
'failed_count': len(failed_records),
200+
'duplicate_count': res['duplicate_count'],
201+
'original_count': res['original_count'],
202+
'error_excel_filename': error_excel_filename,
203+
}
204+
205+
return await asyncio.to_thread(inner)

0 commit comments

Comments
 (0)