Skip to content

Commit 0f6b0bb

Browse files
authored
Merge branch 'ModelEngine-Group:main' into main
2 parents fdc6b65 + 05b26a2 commit 0f6b0bb

File tree

800 files changed

+115708
-2326
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

800 files changed

+115708
-2326
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Deer Flow Docker Image CI
2+
3+
on:
4+
push:
5+
branches: [ "main" ]
6+
paths:
7+
- 'runtime/deer-flow/**'
8+
- 'scripts/images/deer-flow-backend/**'
9+
- 'scripts/images/deer-flow-frontend/**'
10+
- '.github/workflows/docker-image-deer-flow.yml'
11+
pull_request:
12+
branches: [ "main" ]
13+
paths:
14+
- 'runtime/deer-flow/**'
15+
- 'scripts/images/deer-flow-backend/**'
16+
- 'scripts/images/deer-flow-frontend/**'
17+
- '.github/workflows/docker-image-deer-flow.yml'
18+
workflow_dispatch:
19+
20+
jobs:
21+
build-and-push:
22+
runs-on: ubuntu-latest
23+
permissions:
24+
contents: read
25+
packages: write
26+
27+
steps:
28+
- uses: actions/checkout@v4
29+
30+
- name: Login to GitHub Container Registry
31+
if: github.event_name != 'pull_request'
32+
uses: docker/login-action@v3
33+
with:
34+
registry: ghcr.io
35+
username: ${{ github.actor }}
36+
password: ${{ secrets.GITHUB_TOKEN }}
37+
38+
- name: Set Docker Image Tag
39+
id: set-tag
40+
run: |
41+
if [[ $GITHUB_REF == refs/tags/v* ]]; then
42+
TAG=${GITHUB_REF#refs/tags/v}
43+
echo "TAGS=$TAG" >> $GITHUB_OUTPUT
44+
elif [[ $GITHUB_REF == refs/heads/main ]]; then
45+
echo "TAGS=latest" >> $GITHUB_OUTPUT
46+
else
47+
echo "TAGS=temp" >> $GITHUB_OUTPUT
48+
fi
49+
50+
- name: Build Docker Image
51+
run: |
52+
make build-deer-flow VERSION=latest
53+
54+
- name: Tag Docker Image
55+
run: |
56+
LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
57+
docker tag deer-flow-backend:latest ghcr.io/$LOWERCASE_REPO/deer-flow-backend:${{ steps.set-tag.outputs.TAGS }}
58+
docker tag deer-flow-frontend:latest ghcr.io/$LOWERCASE_REPO/deer-flow-frontend:${{ steps.set-tag.outputs.TAGS }}
59+
60+
- name: Push Docker Image
61+
if: github.event_name != 'pull_request'
62+
run: |
63+
LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
64+
docker push ghcr.io/$LOWERCASE_REPO/deer-flow-backend:${{ steps.set-tag.outputs.TAGS }}
65+
docker push ghcr.io/$LOWERCASE_REPO/deer-flow-frontend:${{ steps.set-tag.outputs.TAGS }}

Makefile

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ WITH_MINERU ?= false # 默认不构建mineru
44
VERSION ?= latest
55
NAMESPACE ?= datamate
66

7+
ifdef COMSPEC
8+
# Windows 环境
9+
MAKE := "C:/Program Files (x86)/GnuWin32/bin/make"
10+
else
11+
# Linux/Mac 环境
12+
MAKE := make
13+
endif
14+
715
.PHONY: build-%
816
build-%:
917
$(MAKE) $*-docker-build
@@ -76,15 +84,10 @@ label-studio-adapter-docker-build:
7684

7785
.PHONY: deer-flow-docker-build
7886
deer-flow-docker-build:
79-
@if [ -d "../deer-flow/.git" ]; then \
80-
cd ../deer-flow && git pull; \
81-
else \
82-
git clone git@github.com:bytedance/deer-flow.git ../deer-flow; \
83-
fi
84-
sed -i "s/dark/light/g" "../deer-flow/web/src/components/deer-flow/theme-provider-wrapper.tsx"
85-
cp -n deployment/docker/deer-flow/.env.example ../deer-flow/.env
86-
cp -n deployment/docker/deer-flow/conf.yaml.example ../deer-flow/conf.yaml
87-
cd ../deer-flow && docker compose build
87+
cp -n runtime/deer-flow/.env.example runtime/deer-flow/.env
88+
cp -n runtime/deer-flow/conf.yaml.example runtime/deer-flow/conf.yaml
89+
docker build -t deer-flow-backend:$(VERSION) . -f scripts/images/deer-flow-backend/Dockerfile
90+
docker build -t deer-flow-frontend:$(VERSION) . -f scripts/images/deer-flow-frontend/Dockerfile
8891

8992
.PHONY: mineru-docker-build
9093
mineru-docker-build:
@@ -131,16 +134,19 @@ mineru-k8s-uninstall:
131134

132135
.PHONY: datamate-docker-install
133136
datamate-docker-install:
134-
cd deployment/docker/datamate && cp -n .env.example .env && docker compose -f docker-compose.yml up -d
137+
cd deployment/docker/datamate && cp .env.example .env && docker compose -f docker-compose.yml up -d
135138

136139
.PHONY: datamate-docker-uninstall
137140
datamate-docker-uninstall:
138141
cd deployment/docker/datamate && docker compose -f docker-compose.yml down -v
139142

140143
.PHONY: deer-flow-docker-install
141144
deer-flow-docker-install:
142-
cd deployment/docker/datamate && cp -n .env.deer-flow.example .env && docker compose -f docker-compose.yml up -d
143-
cd deployment/docker/deer-flow && cp -n .env.example .env && cp -n conf.yaml.example conf.yaml && docker compose -f docker-compose.yml up -d
145+
cd deployment/docker/datamate && cp .env.deer-flow.example .env && docker compose -f docker-compose.yml up -d
146+
cp -n runtime/deer-flow/.env.example runtime/deer-flow/.env
147+
cp -n runtime/deer-flow/conf.yaml.example runtime/deer-flow/conf.yaml
148+
cp runtime/deer-flow/.env deployment/docker/deer-flow/.env && cp runtime/deer-flow/conf.yaml deployment/docker/deer-flow/conf.yaml
149+
cd deployment/docker/deer-flow && docker compose -f docker-compose.yml up -d
144150

145151
.PHONY: deer-flow-docker-uninstall
146152
deer-flow-docker-uninstall:
@@ -158,3 +164,22 @@ datamate-k8s-install: create-namespace
158164
datamate-k8s-uninstall:
159165
helm uninstall datamate -n $(NAMESPACE) --ignore-not-found
160166
kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found
167+
168+
.PHONY: deer-flow-k8s-install
169+
deer-flow-k8s-install:
170+
helm upgrade datamate deployment/helm/datamate/ -n $(NAMESPACE) --install --set global.deerFlow.enable=true
171+
cp runtime/deer-flow/.env deployment/helm/deer-flow/charts/public/.env
172+
cp runtime/deer-flow/conf.yaml deployment/helm/deer-flow/charts/public/conf.yaml
173+
helm upgrade deer-flow deployment/helm/deer-flow -n $(NAMESPACE) --install
174+
175+
.PHONY: deer-flow-k8s-uninstall
176+
deer-flow-k8s-uninstall:
177+
helm uninstall deer-flow -n $(NAMESPACE) --ignore-not-found
178+
179+
.PHONY: milvus-k8s-install
180+
milvus-k8s-install:
181+
helm upgrade milvus deployment/helm/milvus -n $(NAMESPACE) --install
182+
183+
.PHONY: milvus-k8s-uninstall
184+
milvus-k8s-uninstall:
185+
helm uninstall milvus -n $(NAMESPACE) --ignore-not-found

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/validator/CleanTaskValidator.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,26 @@
1717
public class CleanTaskValidator {
1818
private final CleaningTaskRepository cleaningTaskRepo;
1919

20-
public void checkNameDuplication (String name) {
20+
public void checkNameDuplication(String name) {
2121
if (cleaningTaskRepo.isNameExist(name)) {
2222
throw BusinessException.of(CleanErrorCode.DUPLICATE_TASK_NAME);
2323
}
2424
}
2525

26-
public void checkInputAndOutput (List<OperatorInstanceDto> operators) {
26+
public void checkInputAndOutput(List<OperatorInstanceDto> operators) {
2727
if (operators == null || operators.size() <= 1) {
2828
return;
2929
}
3030
for (int i = 1; i < operators.size(); i++) {
3131
OperatorInstanceDto front = operators.get(i - 1);
3232
OperatorInstanceDto back = operators.get(i);
33-
if (!StringUtils.equals(front.getOutputs(), back.getInputs())) {
34-
throw BusinessException.of(CleanErrorCode.IN_AND_OUT_NOT_MATCH,
35-
String.format(Locale.ROOT, "ops(name: [%s, %s]) inputs and outputs does not match",
36-
front.getName(), back.getName()));
33+
if (StringUtils.equals(front.getOutputs(), back.getInputs()) || StringUtils.equalsAny("multimodal",
34+
front.getOutputs(), back.getOutputs())) {
35+
continue;
3736
}
37+
throw BusinessException.of(CleanErrorCode.IN_AND_OUT_NOT_MATCH,
38+
String.format(Locale.ROOT, "ops(name: [%s, %s]) inputs and outputs does not match",
39+
front.getName(), back.getName()));
3840
}
3941
}
4042
}

backend/services/data-collection-service/src/main/java/com/datamate/collection/interfaces/rest/CollectionTaskController.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.datamate.datamanagement.application.DatasetApplicationService;
1111
import com.datamate.datamanagement.domain.model.dataset.Dataset;
1212
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
13+
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
1314
import jakarta.validation.Valid;
1415
import lombok.RequiredArgsConstructor;
1516
import lombok.extern.slf4j.Slf4j;
@@ -35,11 +36,13 @@ public class CollectionTaskController{
3536
public ResponseEntity<CollectionTaskResponse> createTask(@Valid @RequestBody CreateCollectionTaskRequest request) {
3637
CollectionTask task = CollectionTaskConverter.INSTANCE.toCollectionTask(request);
3738
String datasetId = null;
39+
DatasetResponse dataset = null;
3840
if (Objects.nonNull(request.getDataset())) {
39-
datasetId = datasetService.createDataset(request.getDataset()).getId();
41+
dataset = DatasetConverter.INSTANCE.convertToResponse(datasetService.createDataset(request.getDataset()));
42+
datasetId = dataset.getId();
4043
}
4144
CollectionTaskResponse response = CollectionTaskConverter.INSTANCE.toResponse(taskService.create(task, datasetId));
42-
response.setDataset(DatasetConverter.INSTANCE.convertToResponse(datasetService.getDataset(datasetId)));
45+
response.setDataset(dataset);
4346
return ResponseEntity.ok().body(response);
4447
}
4548

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ public void deleteDataset(String datasetId) {
119119
public Dataset getDataset(String datasetId) {
120120
Dataset dataset = datasetRepository.getById(datasetId);
121121
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
122+
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
123+
dataset.setFiles(datasetFiles);
122124
return dataset;
123125
}
124126

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ public DatasetFile getDatasetFile(String datasetId, String fileId) {
102102
public void deleteDatasetFile(String datasetId, String fileId) {
103103
DatasetFile file = getDatasetFile(datasetId, fileId);
104104
Dataset dataset = datasetRepository.getById(datasetId);
105+
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
106+
datasetFileRepository.removeById(fileId);
107+
dataset.removeFile(file);
108+
datasetRepository.updateById(dataset);
105109
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
106110
if (file.getFilePath().startsWith(dataset.getPath())) {
107111
try {
@@ -111,9 +115,6 @@ public void deleteDatasetFile(String datasetId, String fileId) {
111115
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
112116
}
113117
}
114-
datasetFileRepository.removeById(fileId);
115-
dataset.removeFile(file);
116-
datasetRepository.updateById(dataset);
117118
}
118119

119120
/**

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/FileMetadataService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private DatasetFile extractFileMetadata(String filePath, String datasetId) throw
110110
.fileType(fileType)
111111
.uploadTime(LocalDateTime.now())
112112
.lastAccessTime(LocalDateTime.now())
113-
.status("UPLOADED")
113+
.status("ACTIVE")
114114
.build();
115115
}
116116

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
import com.baomidou.mybatisplus.annotation.TableId;
44
import com.baomidou.mybatisplus.annotation.TableName;
5+
import com.fasterxml.jackson.databind.ObjectMapper;
56
import lombok.*;
7+
import org.apache.commons.lang3.StringUtils;
68

79
import java.time.LocalDateTime;
10+
import java.util.Arrays;
11+
import java.util.Collections;
812
import java.util.List;
913

1014
/**
@@ -25,11 +29,25 @@ public class DatasetFile {
2529
private String fileType; // JPG/PNG/DCM/TXT
2630
private Long fileSize; // bytes
2731
private String checkSum;
28-
private List<String> tags;
32+
private String tags;
2933
private String metadata;
3034
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
3135
private LocalDateTime uploadTime;
3236
private LocalDateTime lastAccessTime;
3337
private LocalDateTime createdAt;
3438
private LocalDateTime updatedAt;
39+
40+
/**
41+
* 解析标签
42+
*
43+
* @return 标签列表
44+
*/
45+
public List<String> analyzeTag() {
46+
try {
47+
ObjectMapper mapper = new ObjectMapper();
48+
return mapper.readValue(tags, List.class);
49+
} catch (Exception e) {
50+
return Collections.emptyList();
51+
}
52+
}
3553
}

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/StatusConstants.java

Lines changed: 0 additions & 33 deletions
This file was deleted.

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/converter/DatasetConverter.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
package com.datamate.datamanagement.interfaces.converter;
22

3+
import com.datamate.common.infrastructure.exception.BusinessException;
4+
import com.datamate.common.infrastructure.exception.SystemErrorCode;
35
import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest;
46
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
57
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
68
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
79
import com.datamate.common.domain.model.ChunkUploadRequest;
810
import com.datamate.datamanagement.domain.model.dataset.Dataset;
911
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
12+
import com.fasterxml.jackson.databind.ObjectMapper;
13+
import org.apache.commons.collections4.CollectionUtils;
1014
import org.mapstruct.Mapper;
1115
import org.mapstruct.Mapping;
16+
import org.mapstruct.Named;
1217
import org.mapstruct.factory.Mappers;
1318

19+
import java.util.HashMap;
1420
import java.util.List;
21+
import java.util.Map;
1522

1623
/**
1724
* 数据集文件转换器
@@ -26,6 +33,7 @@ public interface DatasetConverter {
2633
*/
2734
@Mapping(source = "sizeBytes", target = "totalSize")
2835
@Mapping(source = "path", target = "targetLocation")
36+
@Mapping(source = "files", target = "distribution", qualifiedByName = "getDistribution")
2937
DatasetResponse convertToResponse(Dataset dataset);
3038

3139
/**
@@ -49,4 +57,28 @@ public interface DatasetConverter {
4957
* 将数据集文件转换为响应
5058
*/
5159
DatasetFileResponse convertToResponse(DatasetFile datasetFile);
60+
61+
/**
62+
* 获取数据文件的标签分布
63+
*
64+
* @param datasetFiles 数据集文件
65+
* @return 标签分布
66+
*/
67+
@Named("getDistribution")
68+
default Map<String, Long> getDistribution(List<DatasetFile> datasetFiles) {
69+
Map<String, Long> distribution = new HashMap<>();
70+
if (CollectionUtils.isEmpty(datasetFiles)) {
71+
return distribution;
72+
}
73+
for (DatasetFile datasetFile : datasetFiles) {
74+
List<String> tags = datasetFile.analyzeTag();
75+
if (CollectionUtils.isEmpty(tags)) {
76+
continue;
77+
}
78+
for (String tag : tags) {
79+
distribution.put(tag, distribution.getOrDefault(tag, 0L) + 1);
80+
}
81+
}
82+
return distribution;
83+
}
5284
}

0 commit comments

Comments
 (0)