Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ import {
// 引入国际化文件
import zhOps from "../../../../locales/zh_js/operator_zh.json";
import enOps from "../../../../locales/en_js/operator_en.json";
import zhHantOps from '../../../../locales/zh_hant_js/operator_zhHant.json'

const { t, locale } = useI18n();
const origin = window.location.origin + "/";
Expand All @@ -439,6 +440,7 @@ const origin = window.location.origin + "/";
const operatorI18n = {
zh: zhOps,
en: enOps,
zhHant: zhHantOps
};

// 基础状态管理
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,13 @@
import jsYaml from 'js-yaml';
import zhOps from "../../../locales/zh_js/operator_zh.json";
import enOps from "../../../locales/en_js/operator_en.json";
import zhHantOps from '../../../locales/zh_hant_js/operator_zhHant.json'
import { useI18n } from "vue-i18n";
const { t, locale } = useI18n();
const operatorI18n = {
zh: zhOps,
en: enOps,
zhHant: zhHantOps
};
const userStore = useUserStore();
const origin = window.location.origin + '/';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
</p>
</div>
<p class="text-gray-900 text-2xl font-medium">
数据源
{{ t("dataPipelines.data_source") }}
</p>

<div>
Expand All @@ -18,7 +18,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">等待中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
>
Expand All @@ -34,7 +34,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
>
Expand All @@ -49,7 +49,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(正常)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
>
Expand All @@ -64,7 +64,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(错误)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
<div
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
>
Expand All @@ -77,7 +77,7 @@
</div>

<p class="text-gray-900 text-2xl font-medium mt-[16px]">
格式转换
{{ t("dataPipelines.formatConversion") }}
</p>

<div>
Expand All @@ -87,7 +87,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">等待中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
>
Expand All @@ -102,7 +102,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
>
Expand All @@ -117,7 +117,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">已停止</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.stopped") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
>
Expand All @@ -132,7 +132,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(正常)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
>
Expand All @@ -147,7 +147,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(错误)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
<div
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
>
Expand All @@ -160,7 +160,7 @@
</div>

<p class="text-gray-900 text-2xl font-medium mt-[16px]">
数据处理
{{ t("dataPipelines.dataProcessing") }}
</p>

<div>
Expand All @@ -170,7 +170,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">等待中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.waiting") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-blue-500 rounded-full"
>
Expand All @@ -186,7 +186,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行中</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.inProgress") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-yellow-500 rounded-full"
>
Expand All @@ -201,7 +201,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(正常)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_completed_normally") }}</span>
<div
class="w-7 h-7 flex items-center justify-center text-green-500 rounded-full"
>
Expand All @@ -216,7 +216,7 @@
class="bg-white rounded-lg border p-4 flex flex-col justify-between"
>
<div class="flex justify-between items-center">
<span class="text-sm text-gray-600">执行结束(错误)</span>
<span class="text-sm text-gray-600">{{ t("dataPipelines.execution_end_error") }}</span>
<div
class="w-7 h-7 flex items-center justify-center bg-purple-100 text-purple-500 rounded-full"
>
Expand Down Expand Up @@ -260,7 +260,7 @@
<div class="borderBox mt-8">
<div class="py-5 px-6">
<p class="text-lg text-gray-900 font-medium">
Celery 节点服务列表
{{ t("dataPipelines.celery_node_service_list") }}
</p>
</div>
<div class="flex flex-col">
Expand All @@ -278,18 +278,18 @@
/>
<el-table-column
prop="current_ip"
label="IP地址"
:label="t('dataPipelines.ip_address')"
min-width="180"
/>
<el-table-column
prop="task_count"
label="当前任务数"
min-width="160"
:label="t('dataPipelines.current_number_tasks')"
min-width="200"
/>

<el-table-column
prop="status"
label="节点状态"
:label="t('dataPipelines.node_status')"
min-width="160"
>
<template #default="scope">
Expand All @@ -306,7 +306,7 @@

<el-table-column
prop="ack_time"
label="心跳时间"
:label="t('dataPipelines.heartbeat_time')"
min-width="280"
>
<template #default="scope">
Expand Down
23 changes: 20 additions & 3 deletions frontend/src/locales/en_js/datapipelines.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export const dataPipelines = {
"targetFormat": "Target Format",
"dataFlowBranch": "Data Flow Branch",
"startExecution": "Start Execution",
"inProgress": "In Progress",
"searchTaskName": "Search Task Name",
"confirmTermination": "Confirm Termination",
"terminate": "Terminate",
Expand All @@ -39,6 +40,7 @@ export const dataPipelines = {
}
},
"testingConnection": "Testing connection",
"submitting": "Submitting",
"pleaseSelectAnExecutionTime": "Please select an execution time",
"deletingTask": "Deleting task",
"terminatingTask": "Terminating task",
Expand Down Expand Up @@ -183,14 +185,24 @@ export const dataPipelines = {
"deduplicate": "Deduplicate",
"remove": "Remove",
"data_refine": "Data Refinement",
"Internal": "Internal",
"data_generation": "Data Generation",
"data_enhancement": "Data Enhancement",
"Internal": "Internal",

"data_source": "Data Source",
"execution_completed_normally": "Execution completed (normal)",
"execution_end_error": "Execution ended (error)",
"stopped": "Stopped",
"celery_node_service_list": "Celery Node Service List",
"ip_address": "IP Address",
"current_number_tasks": "Current Number of Tasks",
"node_status": "Node Status",
"heartbeat_time": "Heartbeat Time",

"taskType": "Task Type",
"dataCleaning": "Data Cleaning",
"processingStatus": "Processing Status",
"processingText": "Processing Text",
"inProgress": "In Progress",
"completed": "Completed",
"dataSource": "Data Source",
"dataSourceBranch": "Data Source Branch",
Expand Down Expand Up @@ -244,7 +256,6 @@ export const dataPipelines = {
"uploadFailedTips2": "The icon size cannot exceed 10MB.",
"uploadFailedTips3": "Upload failed, please try again",
"networkError": "Network error, please check the connection and try again",
"submitting": "Submitting",
"algorithmTemplateDescription": "The algorithm template allows users to build workflows using various model operators, enabling tasks such as data cleaning, automated data augmentation, and analysis.",
"taskTemplate": "Task Template",
"searchTemplate": "Search Template",
Expand Down Expand Up @@ -362,6 +373,9 @@ export const dataPipelines = {

"opencsg_data_extraction_preprocess_internal": "opencsg data extraction preprocess",
"opencsg_scrape_url_data_preprocess_internal": "opencsg scrape url data preprocess",
"fineweb_edu_chinese_common_internal": "fineweb edu chinese common",
"smoltalk_chinese_common_internal": "smoltalk chinese common",
"cosmopedia_chinese_preprocess_internal": "cosmopedia chinese preprocess",


"analysis_common_internal_dec": "This analyzer class is used to analyze specific datasets. It calculates statistics for all filtering operations in the configuration file, applies various analyses (such as overall analysis, column-by-column analysis, etc.) to these statistics, and generates analysis results (statistical tables, distribution charts, etc.) to help users better understand the input dataset.",
Expand All @@ -379,4 +393,7 @@ export const dataPipelines = {
"quality_classifier_common_internal_dec": "This quality classifier class is used to predict the scores of documents in the dataset. It will calculate scores for all rows and provide two columns for each row: score and should_keep, to help users decide which row should be deleted. By default, if the score is higher than 0.9, the row will be marked as should_keep=1.",
"opencsg_data_extraction_preprocess_internal_dec": "A high-quality tool for converting PDF to Markdown and JSON",
"opencsg_scrape_url_data_preprocess_internal_dec": "A large language model-based data scraping tool for websites and local documents (XML, HTML, JSON, etc.)",
"fineweb_edu_chinese_common_internal_dec": "Users can define their own scoring criteria, score the data from the data source based on these criteria, and filter the data. The maximum score is 5.",
"smoltalk_chinese_common_internal_dec": "Use a fixed system_prompt to generate relevant multi-round dialogues with a large model and score them. Filter the data based on the score specified by the user, and only retain the one with the highest score.",
"cosmopedia_chinese_preprocess_internal_dec": "A detailed tutorial on converting raw text to WikiHow style using the MakeCosmopediaMapper operator. This tool invokes large language models to generate structured tutorial content based on the input seed text.",
}
101 changes: 101 additions & 0 deletions frontend/src/locales/en_js/operator_en.json
Original file line number Diff line number Diff line change
Expand Up @@ -1670,5 +1670,106 @@
"after": "The dataset adds embedding, nn_indices, and nn_scores fields containing vector representations of text and nearest neighbor information"
},
"params": []
},
"gather_generated_data_filter": {
"name": "gather_generated_data_filter",
"description": "Filter for collecting and processing generated data.",
"type": "Filter",
"group": "",
"samples": {
"before": "Based on the results of the previous step, remove the | | and < | im_end | > characters and filter to get the empty content data.",
"after": ""
},
"params": []
},
"annotate_edu_train_bert_scorer_mapper": {
"name": "annotate_edu_train_bert_scorer_mapper",
"description": "Annotate Edu Train BERT Scorer",
"type": "Filter",
"group": "",
"samples": {
"before": "Here is a more concise translation of the provided sentence:'Score a field and add a _score field for the result.'",
"after": ""
},
"params": [
{
"name": "auth_token",
"type": "LIST",
"option_values": null,
"value": ""
},
{
"name": "model_name",
"type": "LIST",
"option_values": null,
"value": "text-embedding-v4"
},
{
"name": "dimensions",
"type": "PositiveFloat",
"option_values": null,
"value": "1024"
},
{
"name": "model_url",
"type": "LIST",
"option_values": null,
"value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
},
{
"name": "query_text",
"type": "LIST",
"option_values": null,
"value": "What is Deep Learning?"
}
]
},
"dedup_and_save_deduplicator": {
"name": "dedup_and_save_deduplicator",
"description": "A deduplicator based on graph connectivity. It constructs a similarity graph by connecting samples with similarity scores above the threshold, then keeps only one sample (with minimum index) from each connected component. Suitable for datasets with pre-computed nearest neighbor similarity information.",
"type": "Deduplicator",
"group": "",
"samples": {
"before": "",
"after": ""
},
"params": [
{
"name": "similarity_threshold",
"type": "PositiveFloat",
"option_values": null,
"value": 0.5
}
]
},
"pipeline_magpie_zh_mapper": {
"name": "pipeline_magpie_zh_mapper",
"description": "Using the deepseek-v2.5 or qwen2.5 model, generate multi-round dialogue data based on the manually designed system_prompt corresponding to multiple tasks",
"type": "Mapper",
"group": "",
"samples": {
"before": "",
"after": ""
},
"params": [
{
"name": "model_name",
"type": "LIST",
"option_values": null,
"value": "qwen-plus"
},
{
"name": "auth_token",
"type": "LIST",
"option_values": null,
"value": ""
},
{
"name": "model_url",
"type": "LIST",
"option_values": null,
"value": "https://dashscope.aliyuncs.com/compatible-mode/v1"
}
]
}
}
Loading