feat(X-Pack): add custom prompt

ulleo · ulleo · commit 75bd13a54f10 · 2025-09-28T19:19:48.000+08:00
#213
diff --git a/backend/alembic/env.py b/backend/alembic/env.py
@@ -26,6 +26,7 @@
 # from apps.settings.models.setting_models import SQLModel
 # from apps.chat.models.chat_model import SQLModel
 from apps.terminology.models.terminology_model import SQLModel
+#from apps.custom_prompt.models.custom_prompt_model import SQLModel
 # from apps.data_training.models.data_training_model import SQLModel
 # from apps.dashboard.models.dashboard_model import SQLModel
 from common.core.config import settings # noqa
diff --git a/backend/alembic/versions/046_add_custom_prompt.py b/backend/alembic/versions/046_add_custom_prompt.py
@@ -0,0 +1,39 @@
+"""046_add_custom_prompt
+
+Revision ID: 8855aea2dd61
+Revises: 45e7e52bf2b8
+Create Date: 2025-09-28 13:57:01.509249
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '8855aea2dd61'
+down_revision = '45e7e52bf2b8'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('custom_prompt',
+    sa.Column('id', sa.BigInteger(), sa.Identity(always=True), nullable=False),
+    sa.Column('oid', sa.BigInteger(), nullable=True),
+    sa.Column('type', sa.Enum('GENERATE_SQL', 'ANALYSIS', 'PREDICT_DATA', name='customprompttypeenum', native_enum=False, length=20), nullable=True),
+    sa.Column('create_time', sa.DateTime(), nullable=True),
+    sa.Column('name', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=True),
+    sa.Column('prompt', sa.Text(), nullable=True),
+    sa.Column('specific_ds', sa.Boolean(), nullable=True),
+    sa.Column('datasource_ids', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+    sa.PrimaryKeyConstraint('id')
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('custom_prompt')
+    # ### end Alembic commands ###
diff --git a/backend/apps/chat/models/chat_model.py b/backend/apps/chat/models/chat_model.py
@@ -40,11 +40,13 @@ class OperationEnum(Enum):
     CHOOSE_DATASOURCE = '6'
     GENERATE_DYNAMIC_SQL = '7'
 
+
 class ChatFinishStep(Enum):
     GENERATE_SQL = 1
     QUERY_DATA = 2
     GENERATE_CHART = 3
 
+
 #     TODO choose table / check connection / generate description
 
 class ChatLog(SQLModel, table=True):
@@ -177,12 +179,13 @@ class AiModelQuestion(BaseModel):
     sub_query: Optional[list[dict]] = None
     terminologies: str = ""
     data_training: str = ""
+    custom_prompt: str = ""
     error_msg: str = ""
 
     def sql_sys_question(self):
         return get_sql_template()['system'].format(engine=self.engine, schema=self.db_schema, question=self.question,
                                                    lang=self.lang, terminologies=self.terminologies,
-                                                   data_training=self.data_training)
+                                                   data_training=self.data_training, custom_prompt=self.custom_prompt)
 
     def sql_user_question(self, current_time: str):
         return get_sql_template()['user'].format(engine=self.engine, schema=self.db_schema, question=self.question,
@@ -196,13 +199,14 @@ def chart_user_question(self, chart_type: Optional[str] = None):
                                                    chart_type=chart_type)
 
     def analysis_sys_question(self):
-        return get_analysis_template()['system'].format(lang=self.lang, terminologies=self.terminologies)
+        return get_analysis_template()['system'].format(lang=self.lang, terminologies=self.terminologies,
+                                                        custom_prompt=self.custom_prompt)
 
     def analysis_user_question(self):
         return get_analysis_template()['user'].format(fields=self.fields, data=self.data)
 
     def predict_sys_question(self):
-        return get_predict_template()['system'].format(lang=self.lang)
+        return get_predict_template()['system'].format(lang=self.lang, custom_prompt=self.custom_prompt)
 
     def predict_user_question(self):
         return get_predict_template()['user'].format(fields=self.fields, data=self.data)
diff --git a/backend/apps/chat/task/llm.py b/backend/apps/chat/task/llm.py
@@ -30,6 +30,9 @@
     get_last_execute_sql_error
 from apps.chat.models.chat_model import ChatQuestion, ChatRecord, Chat, RenameChat, ChatLog, OperationEnum, \
     ChatFinishStep
+from sqlbot_xpack.license.license_manage import SQLBotLicenseUtil
+from sqlbot_xpack.custom_prompt.curd.custom_prompt import find_custom_prompts
+from sqlbot_xpack.custom_prompt.models.custom_prompt_model import CustomPromptTypeEnum
 from apps.data_training.curd.data_training import get_training_template
 from apps.datasource.crud.datasource import get_table_schema
 from apps.datasource.crud.permission import get_row_permission_filters, is_normal_user
@@ -244,6 +247,9 @@ def generate_analysis(self):
         ds_id = self.ds.id if isinstance(self.ds, CoreDatasource) else None
         self.chat_question.terminologies = get_terminology_template(self.session, self.chat_question.question,
                                                                     self.current_user.oid, ds_id)
+        if SQLBotLicenseUtil.valid():
+            self.chat_question.custom_prompt = find_custom_prompts(self.session, CustomPromptTypeEnum.ANALYSIS,
+                                                               self.current_user.oid, ds_id)
 
         analysis_msg.append(SystemMessage(content=self.chat_question.analysis_sys_question()))
         analysis_msg.append(HumanMessage(content=self.chat_question.analysis_user_question()))
@@ -288,6 +294,12 @@ def generate_predict(self):
         self.chat_question.fields = orjson.dumps(fields).decode()
         data = get_chat_chart_data(self.session, self.record.id)
         self.chat_question.data = orjson.dumps(data.get('data')).decode()
+
+        if SQLBotLicenseUtil.valid():
+            ds_id = self.ds.id if isinstance(self.ds, CoreDatasource) else None
+            self.chat_question.custom_prompt = find_custom_prompts(self.session, CustomPromptTypeEnum.PREDICT_DATA,
+                                                               self.current_user.oid, ds_id)
+
         predict_msg: List[Union[BaseMessage, dict[str, Any]]] = []
         predict_msg.append(SystemMessage(content=self.chat_question.predict_sys_question()))
         predict_msg.append(HumanMessage(content=self.chat_question.predict_user_question()))
@@ -509,6 +521,9 @@ def select_datasource(self):
                                                                         ds_id)
             self.chat_question.data_training = get_training_template(self.session, self.chat_question.question, ds_id,
                                                                      oid)
+            if SQLBotLicenseUtil.valid():
+                self.chat_question.custom_prompt = find_custom_prompts(self.session, CustomPromptTypeEnum.GENERATE_SQL,
+                                                                   oid, ds_id)
 
             self.init_messages()
 
@@ -902,6 +917,9 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
                                                                             oid, ds_id)
                 self.chat_question.data_training = get_training_template(self.session, self.chat_question.question,
                                                                          ds_id, oid)
+                if SQLBotLicenseUtil.valid():
+                    self.chat_question.custom_prompt = find_custom_prompts(self.session, CustomPromptTypeEnum.GENERATE_SQL,
+                                                                       oid, ds_id)
 
             self.init_messages()
 
diff --git a/backend/apps/terminology/curd/terminology.py b/backend/apps/terminology/curd/terminology.py
@@ -5,9 +5,7 @@
 from xml.dom.minidom import parseString
 
 import dicttoxml
-from sqlalchemy import BigInteger
-from sqlalchemy import and_, or_, select, func, delete, update, union
-from sqlalchemy import text
+from sqlalchemy import and_, or_, select, func, delete, update, union, text, BigInteger
 from sqlalchemy.orm import aliased
 from sqlalchemy.orm.session import Session
 
diff --git a/backend/locales/en.json b/backend/locales/en.json
@@ -47,6 +47,10 @@
     "datasource_cannot_be_none": "Datasource cannot be none",
     "data_training_not_exists": "Example does not exists",
     "exists_in_db": "Question exists"
+  },
+    "i18n_custom_prompt": {
+    "exists_in_db": "Prompt name exists",
+    "not_exists": "Prompt does not exists"
   },
   "i18n_excel_export": {
     "data_is_empty": "The form data is empty, cannot export data"
diff --git a/backend/locales/zh-CN.json b/backend/locales/zh-CN.json
@@ -48,6 +48,10 @@
     "data_training_not_exists": "该示例不存在",
     "exists_in_db": "该问题已存在"
   },
+  "i18n_custom_prompt": {
+    "exists_in_db": "模版名称已存在",
+    "not_exists": "该模版不存在"
+  },
   "i18n_excel_export": {
     "data_is_empty": "表单数据为空，无法导出数据"
   }
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
     "pyyaml (>=6.0.2,<7.0.0)",
     "fastapi-mcp (>=0.3.4,<0.4.0)",
     "tabulate>=0.9.0",
-    "sqlbot-xpack>=0.0.3.31,<1.0.0",
+    "sqlbot-xpack>=0.0.3.36,<1.0.0",
     "fastapi-cache2>=0.2.2",
     "sqlparse>=0.5.3",
     "redis>=6.2.0",
diff --git a/backend/template.yaml b/backend/template.yaml
@@ -14,8 +14,9 @@ template:
           <Info>内有<db-engine><m-schema><terminologies>等信息；
           其中，<db-engine>：提供数据库引擎及版本信息；
           <m-schema>：以 M-Schema 格式提供数据库表结构信息；
-          <terminologies>：提供一组术语，块内每一个<terminology>就是术语，其中同一个<words>内的多个<word>代表术语的多种叫法，也就是术语与它的同义词，<description>即该术语对应的描述，其中也可能是能够用来参考的计算公式，或者是一些其他的查询条件
-          <sql-examples>：提供一组SQL示例，你可以参考这些示例来生成你的回答，其中<question>内是提问，<suggestion-answer>内是对于该<question>提问的解释或者对应应该回答的SQL示例
+          <terminologies>：提供一组术语，块内每一个<terminology>就是术语，其中同一个<words>内的多个<word>代表术语的多种叫法，也就是术语与它的同义词，<description>即该术语对应的描述，其中也可能是能够用来参考的计算公式，或者是一些其他的查询条件；
+          <sql-examples>：提供一组SQL示例，你可以参考这些示例来生成你的回答，其中<question>内是提问，<suggestion-answer>内是对于该<question>提问的解释或者对应应该回答的SQL示例。
+        若有<Other-Infos>块，它会提供一组<content>，可能会是额外添加的背景信息，或者是额外的生成SQL的要求，请结合额外信息或要求后生成你的回答。
         用户的提问在<user-question>内，<error-msg>内则会提供上次执行你提供的SQL时会出现的错误信息，<background-infos>内的<current-time>会告诉你用户当前提问的时间
       </Instruction>
       
@@ -219,7 +220,6 @@ template:
         </chat-examples>
       </example>
       
-      ### 下面是提供的信息
       <Info>
       <db-engine> {engine} </db-engine>
       <m-schema>
@@ -229,6 +229,7 @@ template:
       {terminologies}
       {data_training}
       </Info>
+      {custom_prompt}
       
       ### 响应, 请根据上述要求直接返回JSON结果:
       ```json
@@ -394,7 +395,11 @@ template:
         你当前的任务是根据给定的数据分析数据，并给出你的分析结果。
         我们会在<Info>块内提供给你信息，帮助你进行分析：
           <Info>内有<terminologies>等信息；
-          <terminologies>：提供一组术语，块内每一个<terminology>就是术语，其中同一个<words>内的多个<word>代表术语的多种叫法，也就是术语与它的同义词，<description>即该术语对应的描述，其中也可能是能够用来参考的计算公式，或者是一些其他的查询条件
+          <terminologies>：提供一组术语，块内每一个<terminology>就是术语，其中同一个<words>内的多个<word>代表术语的多种叫法，也就是术语与它的同义词，<description>即该术语对应的描述，其中也可能是能够用来参考的计算公式，或者是一些其他的查询条件。
+        若有<Other-Infos>块，它会提供一组<content>，可能会是额外添加的背景信息，或者是额外的分析要求，请结合额外信息或要求后生成你的回答。
+        用户会在提问中提供给你信息：
+          <data>块内是提供给你的数，以JSON格式给出；
+          <fields>块内提供给你对应的字段或字段别名。
       </Instruction>
       
       你必须遵守以下规则:
@@ -404,32 +409,60 @@ template:
         </rule>
       </Rules>
       
-      ### 下面是提供的信息
       <Info>
       {terminologies}
       </Info>
+      {custom_prompt}
     user: |
-      ### 字段(字段别名):
+      <fields>
       {fields}
+      </fields>
       
-      ### 数据:
+      <data>
       {data}
+      </data>
   predict:
     system: |
-      ### 请使用语言：{lang} 回答，若有深度思考过程，则思考过程也需要使用 {lang} 输出
+      <Instruction>
+        你是"SQLBOT"，智能问数小助手，可以根据用户提问，专业生成SQL与可视化图表。
+        你当前的任务是根据给定的数据进行数据预测，并给出你的预测结果。
+        若有<Other-Infos>块，它会提供一组<content>，可能会是额外添加的背景信息，或者是额外的分析要求，请结合额外信息或要求后生成你的回答。
+        用户会在提问中提供给你信息：
+          <data>块内是提供给你的数据，以JSON格式给出；
+          <fields>块内提供给你对应的字段或字段别名。
+      </Instruction>
       
-      ### 说明：
-      你是一个数据分析师，你的任务是根据给定的数据进行数据预测，我将以JSON格式给你一组数据，你帮我预测之后的数据（一段可以展示趋势的数据，至少2个周期），用json数组的格式返回，返回的格式需要与传入的数据格式保持一致。
+      你必须遵守以下规则:
+      <Rules>
+        <rule>
+          请使用语言：{lang} 回答，若有深度思考过程，则思考过程也需要使用 {lang} 输出
+        </rule>
+        <rule>
+          预测的数据是一段可以展示趋势的数据，至少2个周期
+        </rule>
+        <rule>
+          返回的预测数据必须与用户提供的数据同样的格式，使用JSON数组的形式返回
+        </rule>
+        <rule>
+          无法预测或者不支持预测的数据请直接返回(不需要返回JSON格式)："抱歉，该数据无法进行预测。"(若有原因，则额外返回无法预测的原因)
+        </rule>
+        <rule>
+          预测的数据不需要返回用户提供的原有数据，请直接返回你预测的部份
+        </rule>
+      </Rules>
+      {custom_prompt}
+      
+      ### 响应, 请根据上述要求直接返回JSON结果:
       ```json
       
-      无法预测或者不支持预测的数据请直接返回(不需要返回JSON格式，需要翻译为 {lang} 输出)："抱歉，该数据无法进行预测。(有原因则返回无法预测的原因)"
-      如果可以预测，则不需要返回原有数据，直接返回预测的部份
     user: |
-      ### 字段(字段别名):
+      <fields>
       {fields}
+      </fields>
       
-      ### 数据:
+      <data>
       {data}
+      </data>
   datasource:
     system: |
       ### 请使用语言：{lang} 回答
diff --git a/frontend/src/views/system/prompt/index.vue b/frontend/src/views/system/prompt/index.vue
@@ -202,7 +202,7 @@ const search = () => {
   oldKeywords.value = keywords.value
   promptApi
     .getList(pageInfo.currentPage, pageInfo.pageSize, currentType.value, {
-      question: keywords.value,
+      name: keywords.value,
     })
     .then((res: any) => {
       toggleRowLoading.value = true