diff --git a/frontend/src/pages/DataCollection/Create/CreateTask.tsx b/frontend/src/pages/DataCollection/Create/CreateTask.tsx index a2cbbefd4..411fc4efd 100644 --- a/frontend/src/pages/DataCollection/Create/CreateTask.tsx +++ b/frontend/src/pages/DataCollection/Create/CreateTask.tsx @@ -81,19 +81,7 @@ export default function CollectionTaskCreate() { const handleSubmit = async () => { try { await form.validateFields(); - - const values = form.getFieldsValue(true); - const payload = { - name: values.name, - description: values.description, - syncMode: values.syncMode, - scheduleExpression: values.scheduleExpression, - timeoutSeconds: values.timeoutSeconds, - templateId: values.templateId, - config: values.config, - }; - - await createTaskUsingPost(payload); + await createTaskUsingPost(newTask); message.success("任务创建成功"); navigate("/data/collection"); } catch (error) { diff --git a/runtime/datax/nfsreader/src/main/java/com/modelengine/edatamate/plugin/reader/nfsreader/NfsReader.java b/runtime/datax/nfsreader/src/main/java/com/modelengine/edatamate/plugin/reader/nfsreader/NfsReader.java index 4bb286076..29b134421 100644 --- a/runtime/datax/nfsreader/src/main/java/com/modelengine/edatamate/plugin/reader/nfsreader/NfsReader.java +++ b/runtime/datax/nfsreader/src/main/java/com/modelengine/edatamate/plugin/reader/nfsreader/NfsReader.java @@ -69,12 +69,14 @@ public static class Task extends Reader.Task { private Configuration jobConfig; private String mountPoint; private Set fileType; + private List files; @Override public void init() { this.jobConfig = super.getPluginJobConf(); this.mountPoint = this.jobConfig.getString("mountPoint"); this.fileType = new HashSet<>(this.jobConfig.getList("fileType", Collections.emptyList(), String.class)); + this.files = this.jobConfig.getList("files", Collections.emptyList(), String.class); } @Override @@ -83,6 +85,7 @@ public void startRead(RecordSender recordSender) { List files = stream.filter(Files::isRegularFile) .filter(file -> fileType.isEmpty() || fileType.contains(getFileSuffix(file))) .map(path -> path.getFileName().toString()) + .filter(fileName -> this.files.isEmpty() || this.files.contains(fileName)) .collect(Collectors.toList()); files.forEach(filePath -> { Record record = recordSender.createRecord(); diff --git a/scripts/db/data-collection-init.sql b/scripts/db/data-collection-init.sql index 11b6912cd..68ea2a1c7 100644 --- a/scripts/db/data-collection-init.sql +++ b/scripts/db/data-collection-init.sql @@ -73,5 +73,5 @@ CREATE TABLE t_dc_collection_templates ( ) COMMENT='数据归集模板配置表'; INSERT IGNORE INTO t_dc_collection_templates(id, name, description, source_type, source_name, target_type, target_name, template_content, built_in, created_by, updated_by) -VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {}, "reader": {}, "writer": {}}', True, 'system', 'system'), - ('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input"},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input"},"accessKey": {"name": "访问密钥","description": "OBS访问密钥。","type": "input"},"secretKey": {"name": "密钥","description": "OBS密钥。","type": "input"},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input"}}, "reader": {}, "writer": {}}', True, 'system', 'system'); +VALUES ('1', 'NAS归集模板', '将NAS存储上的文件归集到DataMate平台上。', 'nfsreader', 'nfsreader', 'nfswriter', 'nfswriter', '{"parameter": {"ip": {"name": "NAS地址","description": "NAS服务的地址,可以为IP或者域名。","type": "input", "required": true}, "path": {"name": "共享路径","description": "NAS服务的共享路径。","type": "input", "required": true}, "files": {"name": "文件列表","description": "指定文件列表进行归集。","type": "select", "required": false}}, "reader": {}, "writer": {}}', True, 'system', 'system'), + ('2', 'OBS归集模板', '将OBS存储上的文件归集到DataMate平台上。', 'obsreader', 'obsreader', 'obswriter', 'obswriter', '{"parameter": {"endpoint": {"name": "服务地址","description": "OBS的服务地址。","type": "input", "required": true},"bucket": {"name": "存储桶名称","description": "OBS存储桶名称。","type": "input", "required": true},"accessKey": {"name": "AK","description": "OBS访问密钥。","type": "input", "required": true},"secretKey": {"name": "SK","description": "OBS密钥。","type": "password", "required": true},"prefix": {"name": "匹配前缀","description": "按照匹配前缀去选中OBS中的文件进行归集。","type": "input", "required": true}}, "reader": {}, "writer": {}}', True, 'system', 'system'); diff --git a/scripts/images/backend-python/Dockerfile b/scripts/images/backend-python/Dockerfile index dbc3a1592..21ae87ae5 100644 --- a/scripts/images/backend-python/Dockerfile +++ b/scripts/images/backend-python/Dockerfile @@ -17,19 +17,18 @@ FROM python:3.12-slim # Note: to use the cache mount syntax you must build with BuildKit enabled: # DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python -RUN apt-get update \ - && apt-get install -y --no-install-recommends openjdk-21-jre-headless \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && \ + apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common rsync && \ + rm -rf /var/lib/apt/lists/* ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ - # Poetry configuration POETRY_VERSION=2.2.1 \ POETRY_NO_INTERACTION=1 \ POETRY_VIRTUALENVS_CREATE=false \ POETRY_CACHE_DIR=/tmp/poetry_cache -ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64 +ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH" diff --git a/scripts/images/backend/Dockerfile b/scripts/images/backend/Dockerfile index 3f839fe1a..071ec597c 100644 --- a/scripts/images/backend/Dockerfile +++ b/scripts/images/backend/Dockerfile @@ -1,16 +1,3 @@ -FROM maven:3-eclipse-temurin-8 AS datax-builder - -RUN apt-get update && \ - apt-get install -y git && \ - git clone https://github.com/alibaba/DataX.git - -COPY runtime/datax/ DataX/ - -RUN cd DataX && \ - sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \ - plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \ - mvn -U clean package assembly:assembly -Dmaven.test.skip=true - FROM maven:3-eclipse-temurin-21 AS builder COPY backend/ /opt/backend @@ -22,12 +9,11 @@ RUN cd /opt/backend/services && \ FROM eclipse-temurin:21-jdk RUN apt-get update && \ - apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \ + apt-get install -y vim wget curl rsync python3 python3-pip python-is-python3 dos2unix && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar -COPY --from=datax-builder /DataX/target/datax/datax /opt/datax COPY scripts/images/backend/start.sh /opt/backend/start.sh