|
1 | 1 | import asyncio |
| 2 | +import hashlib |
2 | 3 | import io |
| 4 | +import os |
| 5 | +import uuid |
| 6 | +from http.client import HTTPException |
3 | 7 | from typing import Optional |
4 | 8 |
|
5 | 9 | import pandas as pd |
6 | | -from fastapi import APIRouter, Query |
| 10 | +from fastapi import APIRouter, File, UploadFile, Query |
7 | 11 | from fastapi.responses import StreamingResponse |
8 | 12 |
|
9 | 13 | from apps.chat.models.chat_model import AxisObj |
10 | 14 | from apps.terminology.curd.terminology import page_terminology, create_terminology, update_terminology, \ |
11 | | - delete_terminology, enable_terminology, get_all_terminology |
| 15 | + delete_terminology, enable_terminology, get_all_terminology, batch_create_terminology |
12 | 16 | from apps.terminology.models.terminology_model import TerminologyInfo |
| 17 | +from common.core.config import settings |
13 | 18 | from common.core.deps import SessionDep, CurrentUser, Trans |
14 | 19 | from common.utils.data_format import DataFormat |
15 | 20 |
|
@@ -89,3 +94,112 @@ def inner(): |
89 | 94 |
|
90 | 95 | result = await asyncio.to_thread(inner) |
91 | 96 | return StreamingResponse(result, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") |
| 97 | + |
| 98 | + |
| 99 | +path = settings.EXCEL_PATH |
| 100 | + |
| 101 | +from sqlalchemy.orm import sessionmaker, scoped_session |
| 102 | +from common.core.db import engine |
| 103 | +from sqlmodel import Session |
| 104 | + |
| 105 | +session_maker = scoped_session(sessionmaker(bind=engine, class_=Session)) |
| 106 | + |
| 107 | + |
| 108 | +@router.post("/uploadExcel") |
| 109 | +async def upload_excel(trans: Trans, current_user: CurrentUser, file: UploadFile = File(...)): |
| 110 | + ALLOWED_EXTENSIONS = {"xlsx", "xls"} |
| 111 | + if not file.filename.lower().endswith(tuple(ALLOWED_EXTENSIONS)): |
| 112 | + raise HTTPException(400, "Only support .xlsx/.xls") |
| 113 | + |
| 114 | + os.makedirs(path, exist_ok=True) |
| 115 | + base_filename = f"{file.filename.split('.')[0]}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}" |
| 116 | + filename = f"{base_filename}.{file.filename.split('.')[1]}" |
| 117 | + save_path = os.path.join(path, filename) |
| 118 | + with open(save_path, "wb") as f: |
| 119 | + f.write(await file.read()) |
| 120 | + |
| 121 | + oid = current_user.oid |
| 122 | + |
| 123 | + use_cols = [0, 1, 2, 3, 4] |
| 124 | + |
| 125 | + def inner(): |
| 126 | + |
| 127 | + session = session_maker() |
| 128 | + |
| 129 | + sheet_names = pd.ExcelFile(save_path).sheet_names |
| 130 | + |
| 131 | + import_data = [] |
| 132 | + |
| 133 | + for sheet_name in sheet_names: |
| 134 | + |
| 135 | + df = pd.read_excel( |
| 136 | + save_path, |
| 137 | + sheet_name=sheet_name, |
| 138 | + engine='calamine', |
| 139 | + header=0, |
| 140 | + usecols=use_cols, |
| 141 | + dtype=str |
| 142 | + ).fillna("") |
| 143 | + |
| 144 | + for index, row in df.iterrows(): |
| 145 | + # 跳过空行 |
| 146 | + if row.isnull().all(): |
| 147 | + continue |
| 148 | + |
| 149 | + word = row[0].strip() if pd.notna(row[0]) and row[0].strip() else None |
| 150 | + other_words = [w.strip() for w in row[1].strip().split(',')] if pd.notna(row[1]) and row[ |
| 151 | + 1].strip() else [] |
| 152 | + description = row[2].strip() if pd.notna(row[2]) and row[2].strip() else None |
| 153 | + datasource_names = [d.strip() for d in row[3].strip().split(',')] if pd.notna(row[3]) and row[ |
| 154 | + 3].strip() else [] |
| 155 | + all_datasource = True if pd.notna(row[4]) and row[4].lower().strip() in ['y', 'yes', 'true'] else False |
| 156 | + specific_ds = False if all_datasource else True |
| 157 | + |
| 158 | + import_data.append(TerminologyInfo(word=word, description=description, other_words=other_words, |
| 159 | + datasource_names=datasource_names, specific_ds=specific_ds)) |
| 160 | + |
| 161 | + res = batch_create_terminology(session, import_data, oid, trans) |
| 162 | + |
| 163 | + failed_records = res['failed_records'] |
| 164 | + |
| 165 | + error_excel_filename = None |
| 166 | + |
| 167 | + if len(failed_records) > 0: |
| 168 | + data_list = [] |
| 169 | + for obj in failed_records: |
| 170 | + _data = { |
| 171 | + "word": obj['data'].word, |
| 172 | + "other_words": ', '.join(obj['data'].other_words) if obj['data'].other_words else '', |
| 173 | + "description": obj['data'].description, |
| 174 | + "all_data_sources": 'N' if obj['data'].specific_ds else 'Y', |
| 175 | + "datasource": ', '.join(obj['data'].datasource_names) if obj['data'].datasource_names and obj[ |
| 176 | + 'data'].specific_ds else '', |
| 177 | + "errors": obj['errors'] |
| 178 | + } |
| 179 | + data_list.append(_data) |
| 180 | + |
| 181 | + fields = [] |
| 182 | + fields.append(AxisObj(name=trans('i18n_terminology.term_name'), value='word')) |
| 183 | + fields.append(AxisObj(name=trans('i18n_terminology.synonyms'), value='other_words')) |
| 184 | + fields.append(AxisObj(name=trans('i18n_terminology.term_description'), value='description')) |
| 185 | + fields.append(AxisObj(name=trans('i18n_terminology.effective_data_sources'), value='datasource')) |
| 186 | + fields.append(AxisObj(name=trans('i18n_terminology.all_data_sources'), value='all_data_sources')) |
| 187 | + fields.append(AxisObj(name=trans('i18n_data_training.error_info'), value='errors')) |
| 188 | + |
| 189 | + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list) |
| 190 | + |
| 191 | + df = pd.DataFrame(md_data, columns=_fields_list) |
| 192 | + error_excel_filename = f"{base_filename}_error.xlsx" |
| 193 | + save_error_path = os.path.join(path, error_excel_filename) |
| 194 | + # 保存 DataFrame 到 Excel |
| 195 | + df.to_excel(save_error_path, index=False) |
| 196 | + |
| 197 | + return { |
| 198 | + 'success_count': res['success_count'], |
| 199 | + 'failed_count': len(failed_records), |
| 200 | + 'duplicate_count': res['duplicate_count'], |
| 201 | + 'original_count': res['original_count'], |
| 202 | + 'error_excel_filename': error_excel_filename, |
| 203 | + } |
| 204 | + |
| 205 | + return await asyncio.to_thread(inner) |
0 commit comments