Skip to content

Commit 393fd8c

Browse files
committed
feat: add terminology management
1 parent 5e591e4 commit 393fd8c

File tree

16 files changed

+383
-12
lines changed

16 files changed

+383
-12
lines changed

backend/alembic/env.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
# from apps.system.models.user import SQLModel # noqa
2626
# from apps.settings.models.setting_models import SQLModel
27-
from apps.chat.models.chat_model import SQLModel
27+
# from apps.chat.models.chat_model import SQLModel
28+
from apps.terminology.models.terminology_model import SQLModel
2829
# from apps.dashboard.models.dashboard_model import SQLModel
2930
from common.core.config import settings # noqa
3031

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""039_create_terminology
2+
3+
Revision ID: 25cbc85766fd
4+
Revises: fc23c4f3e755
5+
Create Date: 2025-08-25 11:38:32.990973
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
import sqlmodel.sql.sqltypes
11+
import pgvector
12+
from sqlalchemy.dialects import postgresql
13+
14+
# revision identifiers, used by Alembic.
15+
revision = '25cbc85766fd'
16+
down_revision = 'fc23c4f3e755'
17+
branch_labels = None
18+
depends_on = None
19+
20+
21+
def upgrade():
22+
# ### commands auto generated by Alembic - please adjust! ###
23+
op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
24+
25+
op.create_table('terminology',
26+
sa.Column('id', sa.BigInteger(), sa.Identity(always=True), nullable=False),
27+
sa.Column('pid', sa.BigInteger(), nullable=True),
28+
sa.Column('create_time', sa.DateTime(), nullable=True),
29+
sa.Column('word', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=True),
30+
sa.Column('description', sa.Text(), nullable=True),
31+
sa.Column('embedding', pgvector.sqlalchemy.vector.VECTOR(), nullable=True),
32+
sa.PrimaryKeyConstraint('id')
33+
)
34+
35+
# ### end Alembic commands ###
36+
37+
38+
def downgrade():
39+
# ### commands auto generated by Alembic - please adjust! ###
40+
op.drop_table('terminology')
41+
# ### end Alembic commands ###

backend/apps/ai_model/embedding.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import threading
2+
from typing import Optional
3+
4+
from langchain_core.embeddings import Embeddings
5+
from langchain_huggingface import HuggingFaceEmbeddings
6+
from pydantic import BaseModel
7+
8+
from common.core.config import settings
9+
10+
11+
class EmbeddingModelInfo(BaseModel):
12+
folder: str
13+
name: str
14+
device: str = 'cpu'
15+
16+
17+
local_embedding_model = EmbeddingModelInfo(folder=settings.LOCAL_MODEL_PATH, name=settings.DEFAULT_EMBEDDING_MODEL)
18+
19+
_lock = threading.Lock()
20+
locks = {}
21+
22+
_embedding_model: dict[str, Optional[Embeddings]] = {}
23+
24+
25+
class EmbeddingModelCache:
26+
27+
@staticmethod
28+
def _new_instance(config: EmbeddingModelInfo = local_embedding_model):
29+
return HuggingFaceEmbeddings(model_name=config.name, cache_folder=config.folder,
30+
model_kwargs={'device': config.device},
31+
encode_kwargs={'normalize_embeddings': True}
32+
)
33+
34+
@staticmethod
35+
def _get_lock(key: str = settings.DEFAULT_EMBEDDING_MODEL):
36+
lock = locks.get(key)
37+
if lock is None:
38+
with _lock:
39+
lock = locks.get(key)
40+
if lock is None:
41+
lock = threading.Lock()
42+
locks[key] = lock
43+
44+
return lock
45+
46+
@staticmethod
47+
def get_model(key: str = settings.DEFAULT_EMBEDDING_MODEL,
48+
config: EmbeddingModelInfo = local_embedding_model) -> Embeddings:
49+
global _embedding_model
50+
model_instance = _embedding_model.get(key)
51+
if model_instance is None:
52+
lock = EmbeddingModelCache._get_lock(key)
53+
with lock:
54+
model_instance = _embedding_model.get(key)
55+
if model_instance is None:
56+
model_instance = EmbeddingModelCache._new_instance(config)
57+
_embedding_model[key] = model_instance
58+
59+
return model_instance

backend/apps/api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from fastapi import APIRouter
22

3+
from apps.terminology.api import terminology
34
from apps.chat.api import chat
45
from apps.dashboard.api import dashboard_api
56
from apps.datasource.api import datasource
6-
from apps.settings.api import terminology
77
from apps.system.api import login, user, aimodel, workspace, assistant
88
from apps.mcp import mcp
99

@@ -19,3 +19,4 @@
1919
api_router.include_router(dashboard_api.router)
2020
api_router.include_router(mcp.router)
2121

22+

backend/apps/terminology/__init__.py

Whitespace-only changes.

backend/apps/terminology/api/__init__.py

Whitespace-only changes.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Optional
2+
3+
from fastapi import APIRouter, Query
4+
5+
from apps.terminology.curd.terminology import page_terminology, create_terminology, update_terminology, \
6+
delete_terminology
7+
from apps.terminology.models.terminology_model import TerminologyInfo
8+
from common.core.deps import SessionDep
9+
10+
router = APIRouter(tags=["Terminology"], prefix="/system/terminology")
11+
12+
13+
@router.get("/page/{current_page}/{page_size}")
14+
async def pager(session: SessionDep, current_page: int, page_size: int,
15+
word: Optional[str] = Query(None, description="搜索术语(可选)")):
16+
current_page, page_size, total_count, total_pages, _list = page_terminology(session, current_page, page_size, word)
17+
18+
return {
19+
"current_page": current_page,
20+
"page_size": page_size,
21+
"total_count": total_count,
22+
"total_pages": total_pages,
23+
"data": _list
24+
}
25+
26+
27+
@router.put("")
28+
async def create_or_update(session: SessionDep, info: TerminologyInfo):
29+
if info.id:
30+
return update_terminology(session, info)
31+
else:
32+
return create_terminology(session, info)
33+
34+
35+
@router.delete("")
36+
async def delete(session: SessionDep, id_list: list[int]):
37+
delete_terminology(session, id_list)

backend/apps/terminology/curd/__init__.py

Whitespace-only changes.
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import datetime
2+
from typing import List, Optional
3+
4+
from sqlalchemy import and_, or_, select, func, delete, update
5+
from sqlalchemy.dialects.postgresql import JSONB
6+
from sqlalchemy.orm import aliased
7+
8+
from apps.terminology.models.terminology_model import Terminology, TerminologyInfo
9+
from common.core.deps import SessionDep
10+
11+
12+
def page_terminology(session: SessionDep, current_page: int = 1, page_size: int = 10, name: Optional[str] = None):
13+
_list: List[TerminologyInfo] = []
14+
15+
child = aliased(Terminology)
16+
17+
current_page = max(1, current_page)
18+
page_size = max(10, page_size)
19+
20+
total_count = 0
21+
total_pages = 0
22+
23+
if name and name.strip() != "":
24+
keyword_pattern = f"%{name.strip()}%"
25+
# 步骤1:先找到所有匹配的节点ID(无论是父节点还是子节点)
26+
matched_ids_subquery = (
27+
select(Terminology.id)
28+
.where(Terminology.name.like(keyword_pattern)) # LIKE查询条件
29+
.subquery()
30+
)
31+
32+
# 步骤2:找到这些匹配节点的所有父节点(包括自身如果是父节点)
33+
parent_ids_subquery = (
34+
select(Terminology.id)
35+
.where(
36+
(Terminology.id.in_(matched_ids_subquery)) |
37+
(Terminology.id.in_(
38+
select(Terminology.pid)
39+
.where(Terminology.id.in_(matched_ids_subquery))
40+
.where(Terminology.pid.isnot(None))
41+
))
42+
)
43+
.where(Terminology.pid.is_(None)) # 只取父节点
44+
)
45+
46+
count_stmt = select(func.count()).select_from(parent_ids_subquery.subquery())
47+
total_count = session.execute(count_stmt).scalar()
48+
total_pages = (total_count + page_size - 1) // page_size
49+
50+
# 步骤3:获取分页后的父节点ID
51+
paginated_parent_ids = (
52+
parent_ids_subquery
53+
.order_by(Terminology.create_time.desc())
54+
.offset((current_page - 1) * page_size)
55+
.limit(page_size)
56+
.subquery()
57+
)
58+
59+
# 步骤4:获取这些父节点的childrenNames
60+
children_subquery = (
61+
select(
62+
child.pid,
63+
func.jsonb_agg(child.name).order_by(child.create_time.desc()).label('children_names')
64+
)
65+
.where(child.pid.isnot(None))
66+
.group_by(child.pid)
67+
.subquery()
68+
)
69+
70+
# 主查询
71+
stmt = (
72+
select(
73+
Terminology.id,
74+
Terminology.word,
75+
Terminology.create_time,
76+
Terminology.description,
77+
func.coalesce(
78+
children_subquery.c.children_names,
79+
func.cast('[]', JSONB)
80+
).label('other_words')
81+
)
82+
.outerjoin(
83+
children_subquery,
84+
Terminology.id == children_subquery.c.pid
85+
)
86+
.where(Terminology.id.in_(paginated_parent_ids))
87+
.order_by(Terminology.create_time.desc())
88+
)
89+
print(str(stmt))
90+
else:
91+
parent_ids_subquery = (
92+
select(Terminology.id)
93+
.where(Terminology.pid.is_(None)) # 只取父节点
94+
)
95+
count_stmt = select(func.count()).select_from(parent_ids_subquery.subquery())
96+
total_count = session.execute(count_stmt).scalar()
97+
total_pages = (total_count + page_size - 1) // page_size
98+
99+
paginated_parent_ids = (
100+
parent_ids_subquery
101+
.order_by(Terminology.create_time.desc())
102+
.offset((current_page - 1) * page_size)
103+
.limit(page_size)
104+
.subquery()
105+
)
106+
107+
stmt = (
108+
select(
109+
Terminology.id,
110+
Terminology.word,
111+
Terminology.create_time,
112+
Terminology.description,
113+
func.coalesce(
114+
func.jsonb_agg(child.word),
115+
func.cast('[]', JSONB)
116+
).label('other_words')
117+
)
118+
.outerjoin(child, and_(Terminology.id == child.pid))
119+
.where(Terminology.id.in_(paginated_parent_ids))
120+
.group_by(Terminology.id, Terminology.word)
121+
.order_by(Terminology.create_time.desc())
122+
)
123+
print(str(stmt))
124+
125+
result = session.execute(stmt)
126+
127+
for row in result:
128+
_list.append(TerminologyInfo(
129+
id=row.id,
130+
word=row.word,
131+
create_time=row.create_time,
132+
description=row.description,
133+
other_words=row.other_words,
134+
))
135+
136+
return current_page, page_size, total_count, total_pages, _list
137+
138+
139+
def create_terminology(session: SessionDep, info: TerminologyInfo):
140+
create_time = datetime.datetime.now()
141+
parent = Terminology(word=info.word, create_time=create_time, description=info.description)
142+
143+
result = Terminology(**parent.model_dump())
144+
145+
session.add(parent)
146+
session.flush()
147+
session.refresh(parent)
148+
149+
result.id = parent.id
150+
session.commit()
151+
152+
_list: List[Terminology] = []
153+
if info.other_words:
154+
for other_word in info.other_words:
155+
_list.append(
156+
Terminology(pid=result.id, word=other_word, create_time=create_time, description=info.description))
157+
session.bulk_save_objects(_list)
158+
session.flush()
159+
session.commit()
160+
161+
# todo embedding
162+
163+
return result.id
164+
165+
166+
def update_terminology(session: SessionDep, info: TerminologyInfo):
167+
stmt = update(Terminology).where(and_(Terminology.id == info.id)).values(
168+
word=info.word,
169+
description=info.description,
170+
)
171+
session.execute(stmt)
172+
session.commit()
173+
174+
stmt = delete(Terminology).where(and_(Terminology.pid == info.id))
175+
session.execute(stmt)
176+
session.commit()
177+
178+
create_time = datetime.datetime.now()
179+
_list: List[Terminology] = []
180+
if info.other_words:
181+
for other_word in info.other_words:
182+
_list.append(
183+
Terminology(pid=info.id, word=other_word, create_time=create_time, description=info.description))
184+
session.bulk_save_objects(_list)
185+
session.flush()
186+
session.commit()
187+
188+
# todo embedding
189+
190+
return info.id
191+
192+
193+
def delete_terminology(session: SessionDep, ids: list[int]):
194+
stmt = delete(Terminology).where(or_(Terminology.id.in_(ids), Terminology.pid.in_(ids)))
195+
session.execute(stmt)
196+
session.commit()

backend/apps/terminology/models/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)