OpenCSGs · z275748353 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
@@ -431,6 +431,7 @@ import {
 // 引入国际化文件
 import zhOps from "../../../../locales/zh_js/operator_zh.json";
 import enOps from "../../../../locales/en_js/operator_en.json";
+import zhHantOps from '../../../../locales/zh_hant_js/operator_zhHant.json'
 
 const { t, locale } = useI18n();
 const origin = window.location.origin + "/";
@@ -439,6 +440,7 @@ const origin = window.location.origin + "/";
 const operatorI18n = {
   zh: zhOps,
   en: enOps,
+  zhHant: zhHantOps
 };
 
 // 基础状态管理

@@ -205,11 +205,13 @@
   import jsYaml from 'js-yaml';
   import zhOps from "../../../locales/zh_js/operator_zh.json";
   import enOps from "../../../locales/en_js/operator_en.json";
+  import zhHantOps from '../../../locales/zh_hant_js/operator_zhHant.json'
   import { useI18n } from "vue-i18n";
   const { t, locale } = useI18n();
   const operatorI18n = {
     zh: zhOps,
     en: enOps,
+    zhHant: zhHantOps
   };
   const userStore = useUserStore();
   const origin = window.location.origin + '/'; 

@@ -8,7 +8,7 @@
       </p>
     </div>
     <p class="text-gray-900 text-2xl font-medium">
-      数据源
+      {{ t("dataPipelines.data_source") }}
     </p>
 
     <div>
@@ -18,7 +18,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">等待中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
             >
@@ -34,7 +34,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
             >
@@ -49,7 +49,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（正常）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
             >
@@ -64,7 +64,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（错误）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
             >
@@ -77,7 +77,7 @@
     </div>
 
     <p class="text-gray-900 text-2xl font-medium mt-[16px]">
-      格式转换
+      {{ t("dataPipelines.formatConversion") }}
     </p>
 
     <div>
@@ -87,7 +87,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">等待中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
             >
@@ -102,7 +102,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
             >
@@ -117,7 +117,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">已停止</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.stopped") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
             >
@@ -132,7 +132,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（正常）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
             >
@@ -147,7 +147,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（错误）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
             >
@@ -160,7 +160,7 @@
     </div>
 
     <p class="text-gray-900 text-2xl font-medium mt-[16px]">
-      数据处理
+      {{ t("dataPipelines.dataProcessing") }}
     </p>
 
     <div>
@@ -170,7 +170,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">等待中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
             >
@@ -186,7 +186,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行中</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
             >
@@ -201,7 +201,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（正常）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
             >
@@ -216,7 +216,7 @@
           class="bg-white rounded-lg border p-4 flex flex-col justify-between"
         >
           <div class="flex justify-between items-center">
-            <span class="text-sm text-gray-600">执行结束（错误）</span>
+            <span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
             <div
               class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
             >
@@ -260,7 +260,7 @@
     <div class="borderBox mt-8">
       <div class="py-5 px-6">
         <p class="text-lg text-gray-900 font-medium">
-          Celery 节点服务列表
+          {{ t("dataPipelines.celery_node_service_list") }}
         </p>
       </div>
       <div class="flex flex-col">
@@ -278,18 +278,18 @@
           />
           <el-table-column
             prop="current_ip"
-            label="IP地址"
+            :label="t('dataPipelines.ip_address')"
             min-width="180"
           />
           <el-table-column
             prop="task_count"
-            label="当前任务数"
-            min-width="160"
+            :label="t('dataPipelines.current_number_tasks')"
+            min-width="200"
           />
 
           <el-table-column
             prop="status"
-            label="节点状态"
+            :label="t('dataPipelines.node_status')"
             min-width="160"
           >
             <template #default="scope">
@@ -306,7 +306,7 @@
 
           <el-table-column
             prop="ack_time"
-            label="心跳时间"
+            :label="t('dataPipelines.heartbeat_time')"
             min-width="280"
           >
             <template #default="scope">

@@ -13,6 +13,7 @@ export const dataPipelines = {
   "targetFormat": "Target Format",
   "dataFlowBranch": "Data Flow Branch",
   "startExecution": "Start Execution",
+  "inProgress": "In Progress",
   "searchTaskName": "Search Task Name",
   "confirmTermination": "Confirm Termination",
   "terminate": "Terminate",
@@ -39,6 +40,7 @@ export const dataPipelines = {
     }
   },
   "testingConnection": "Testing connection",
+  "submitting": "Submitting",
   "pleaseSelectAnExecutionTime": "Please select an execution time",
   "deletingTask": "Deleting task",
   "terminatingTask": "Terminating task",
@@ -183,14 +185,24 @@ export const dataPipelines = {
   "deduplicate": "Deduplicate",
   "remove": "Remove",
   "data_refine": "Data Refinement",
+  "Internal": "Internal",
   "data_generation": "Data Generation",
   "data_enhancement": "Data Enhancement",
-  "Internal": "Internal",
+
+  "data_source": "Data Source",
+  "execution_completed_normally": "Execution completed (normal)",
+  "execution_end_error": "Execution ended (error)",
+  "stopped": "Stopped",
+  "celery_node_service_list": "Celery Node Service List",
+  "ip_address": "IP Address",
+  "current_number_tasks": "Current Number of Tasks",
+  "node_status": "Node Status",
+  "heartbeat_time": "Heartbeat Time",
+
   "taskType": "Task Type",
   "dataCleaning": "Data Cleaning",
   "processingStatus": "Processing Status",
   "processingText": "Processing Text",
-  "inProgress": "In Progress",
   "completed": "Completed",
   "dataSource": "Data Source",
   "dataSourceBranch": "Data Source Branch",
@@ -244,7 +256,6 @@ export const dataPipelines = {
   "uploadFailedTips2": "The icon size cannot exceed 10MB.",
   "uploadFailedTips3": "Upload failed, please try again",
   "networkError": "Network error, please check the connection and try again",
-  "submitting": "Submitting",
   "algorithmTemplateDescription": "The algorithm template allows users to build workflows using various model operators, enabling tasks such as data cleaning, automated data augmentation, and analysis.",
   "taskTemplate": "Task Template",
   "searchTemplate": "Search Template",
@@ -362,6 +373,9 @@ export const dataPipelines = {
 
   "opencsg_data_extraction_preprocess_internal": "opencsg data extraction preprocess",
   "opencsg_scrape_url_data_preprocess_internal": "opencsg scrape url data preprocess",
+  "fineweb_edu_chinese_common_internal": "fineweb edu chinese common",
+  "smoltalk_chinese_common_internal": "smoltalk chinese common",
+  "cosmopedia_chinese_preprocess_internal": "cosmopedia chinese preprocess",
 
 
   "analysis_common_internal_dec": "This analyzer class is used to analyze specific datasets. It calculates statistics for all filtering operations in the configuration file, applies various analyses (such as overall analysis, column-by-column analysis, etc.) to these statistics, and generates analysis results (statistical tables, distribution charts, etc.) to help users better understand the input dataset.",
@@ -379,4 +393,7 @@ export const dataPipelines = {
   "quality_classifier_common_internal_dec": "This quality classifier class is used to predict the scores of documents in the dataset. It will calculate scores for all rows and provide two columns for each row: score and should_keep, to help users decide which row should be deleted. By default, if the score is higher than 0.9, the row will be marked as should_keep=1.",
   "opencsg_data_extraction_preprocess_internal_dec": "A high-quality tool for converting PDF to Markdown and JSON",
   "opencsg_scrape_url_data_preprocess_internal_dec": "A large language model-based data scraping tool for websites and local documents (XML, HTML, JSON, etc.)",
+  "fineweb_edu_chinese_common_internal_dec": "Users can define their own scoring criteria, score the data from the data source based on these criteria, and filter the data. The maximum score is 5.",
+  "smoltalk_chinese_common_internal_dec": "Use a fixed system_prompt to generate relevant multi-round dialogues with a large model and score them. Filter the data based on the score specified by the user, and only retain the one with the highest score.",
+  "cosmopedia_chinese_preprocess_internal_dec": "A detailed tutorial on converting raw text to WikiHow style using the MakeCosmopediaMapper operator. This tool invokes large language models to generate structured tutorial content based on the input seed text.",
 }
diff --git a/frontend/src/locales/en_js/operator_en.json b/frontend/src/locales/en_js/operator_en.json
@@ -1670,5 +1670,106 @@
             "after": "The dataset adds embedding, nn_indices, and nn_scores fields containing vector representations of text and nearest neighbor information"
         },
         "params": []
+    },
+    "gather_generated_data_filter": {
+        "name": "gather_generated_data_filter",
+        "description": "Filter for collecting and processing generated data.",
+        "type": "Filter",
+        "group": "",
+        "samples": {
+            "before": "Based on the results of the previous step, remove the | | and < | im_end | > characters and filter to get the empty content data.",
+            "after": ""
+        },
+        "params": []
+    },
+    "annotate_edu_train_bert_scorer_mapper": {
+        "name": "annotate_edu_train_bert_scorer_mapper",
+        "description": "Annotate Edu Train BERT Scorer",
+        "type": "Filter",
+        "group": "",
+        "samples": {
+            "before": "Here is a more concise translation of the provided sentence:'Score a field and add a _score field for the result.'",
+            "after": ""
+        },
+        "params": [
+            {
+                "name": "auth_token",
+                "type": "LIST",
+                "option_values": null,
+                "value": ""
+            },
+            {
+                "name": "model_name",
+                "type": "LIST",
+                "option_values": null,
+                "value": "text-embedding-v4"
+            },
+            {
+                "name": "dimensions",
+                "type": "PositiveFloat",
+                "option_values": null,
+                "value": "1024"
+            },
+            {
+                "name": "model_url",
+                "type": "LIST",
+                "option_values": null,
+                "value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
+            },
+            {
+                "name": "query_text",
+                "type": "LIST",
+                "option_values": null,
+                "value": "What is Deep Learning?"
+            }
+        ]
+    },
+    "dedup_and_save_deduplicator": {
+        "name": "dedup_and_save_deduplicator",
+        "description": "A deduplicator based on graph connectivity. It constructs a similarity graph by connecting samples with similarity scores above the threshold, then keeps only one sample (with minimum index) from each connected component. Suitable for datasets with pre-computed nearest neighbor similarity information.",
+        "type": "Deduplicator",
+        "group": "",
+        "samples": {
+            "before": "",
+            "after": ""
+        },
+        "params": [
+            {
+                "name": "similarity_threshold",
+                "type": "PositiveFloat",
+                "option_values": null,
+                "value": 0.5
+            }
+        ]
+    },
+    "pipeline_magpie_zh_mapper": {
+        "name": "pipeline_magpie_zh_mapper",
+        "description": "Using the deepseek-v2.5 or qwen2.5 model, generate multi-round dialogue data based on the manually designed system_prompt corresponding to multiple tasks",
+        "type": "Mapper",
+        "group": "",
+        "samples": {
+            "before": "",
+            "after": ""
+        },
+        "params": [
+            {
+                "name": "model_name",
+                "type": "LIST",
+                "option_values": null,
+                "value": "qwen-plus"
+            },
+            {
+                "name": "auth_token",
+                "type": "LIST",
+                "option_values": null,
+                "value": ""
+            },
+            {
+                "name": "model_url",
+                "type": "LIST",
+                "option_values": null,
+                "value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
+            }
+        ]
     }
 }