Skip to content

Commit 665c11f

Browse files
authored
Merge branch 'ModelEngine-Group:main' into main
2 parents 69b9517 + c998de2 commit 665c11f

File tree

83 files changed

+3319
-759
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+3319
-759
lines changed

.github/workflows/docker-image-backend.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@ name: Backend Docker Image CI
22

33
on:
44
push:
5-
branches: [ "develop_930" ]
5+
branches: [ "main" ]
66
paths:
77
- 'backend/**'
88
- 'scripts/images/backend/**'
99
- '.github/workflows/docker-image-backend.yml'
1010
pull_request:
11-
branches: [ "develop_930" ]
11+
branches: [ "main" ]
1212
paths:
1313
- 'backend/**'
1414
- 'scripts/images/backend/**'
1515
- '.github/workflows/docker-image-backend.yml'
1616
workflow_dispatch:
1717

1818
jobs:
19-
20-
build:
21-
22-
runs-on: ubuntu-latest
23-
24-
steps:
25-
- uses: actions/checkout@v4
26-
- name: Build the Backend Docker image
27-
run: make build-backend
19+
call-docker-build:
20+
name: Build and Push Backend Docker Image
21+
uses: ./.github/workflows/docker-images-reusable.yml
22+
permissions:
23+
contents: read
24+
packages: write
25+
with:
26+
service_name: backend
27+
build_dir: .

.github/workflows/docker-image-frontend.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,26 @@ name: Frontend Docker Image CI
22

33
on:
44
push:
5-
branches: [ "develop_930" ]
5+
branches: [ "main" ]
66
paths:
77
- 'frontend/**'
88
- 'scripts/images/frontend/**'
99
- '.github/workflows/docker-image-frontend.yml'
1010
pull_request:
11-
branches: [ "develop_930" ]
11+
branches: [ "main" ]
1212
paths:
1313
- 'frontend/**'
1414
- 'scripts/images/frontend/**'
1515
- '.github/workflows/docker-image-frontend.yml'
1616
workflow_dispatch:
1717

1818
jobs:
19-
20-
build:
21-
22-
runs-on: ubuntu-latest
23-
24-
steps:
25-
- uses: actions/checkout@v4
26-
- name: Build the Frontend Docker image
27-
run: make build-frontend
19+
call-docker-build:
20+
name: Build and Push Frontend Docker Image
21+
uses: ./.github/workflows/docker-images-reusable.yml
22+
permissions:
23+
contents: read
24+
packages: write
25+
with:
26+
service_name: frontend
27+
build_dir: .
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Runtime Docker Image CI
2+
3+
on:
4+
push:
5+
branches: [ "main" ]
6+
paths:
7+
- 'runtime/**'
8+
- 'scripts/images/runtime/**'
9+
- '.github/workflows/docker-image-runtime.yml'
10+
pull_request:
11+
branches: [ "main" ]
12+
paths:
13+
- 'runtime/**'
14+
- 'scripts/images/runtime/**'
15+
- '.github/workflows/docker-image-runtime.yml'
16+
workflow_dispatch:
17+
18+
jobs:
19+
call-docker-build:
20+
name: Build and Push Runtime Docker Image
21+
uses: ./.github/workflows/docker-images-reusable.yml
22+
permissions:
23+
contents: read
24+
packages: write
25+
with:
26+
service_name: runtime
27+
build_dir: .
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: Docker Image Build & Push
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
service_name:
7+
required: true
8+
type: string
9+
build_dir:
10+
required: true
11+
type: string
12+
13+
jobs:
14+
build-and-push:
15+
runs-on: ubuntu-latest
16+
permissions:
17+
contents: read
18+
packages: write
19+
20+
steps:
21+
- uses: actions/checkout@v4
22+
23+
- name: Login to GitHub Container Registry
24+
if: github.event_name != 'pull_request'
25+
uses: docker/login-action@v3
26+
with:
27+
registry: ghcr.io
28+
username: ${{ github.actor }}
29+
password: ${{ secrets.GITHUB_TOKEN }}
30+
31+
- name: Set Docker Image Tag
32+
id: set-tag
33+
run: |
34+
LOWERCASE_REPO=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
35+
BASE_IMAGE=ghcr.io/$LOWERCASE_REPO/datamate-${{ inputs.service_name }}
36+
if [[ $GITHUB_REF == refs/tags/v* ]]; then
37+
TAG=${GITHUB_REF#refs/tags/v}
38+
echo "TAGS=$BASE_IMAGE:$TAG" >> $GITHUB_OUTPUT
39+
elif [[ $GITHUB_REF == refs/heads/main ]]; then
40+
echo "TAGS=$BASE_IMAGE:latest" >> $GITHUB_OUTPUT
41+
else
42+
echo "TAGS=$BASE_IMAGE:temp" >> $GITHUB_OUTPUT
43+
fi
44+
45+
- name: Build Docker Image
46+
run: |
47+
make build-${{ inputs.service_name }} VERSION=latest
48+
49+
- name: Tag Docker Image
50+
run: |
51+
docker tag datamate-${{ inputs.service_name }}:latest ${{ steps.set-tag.outputs.TAGS }}
52+
53+
- name: Push Docker Image
54+
if: github.event_name != 'pull_request'
55+
run: |
56+
docker push ${{ steps.set-tag.outputs.TAGS }}
57+

Makefile

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ else
3333
endif
3434

3535
.PHONY: install
36-
install: install-data-mate
36+
install: install-datamate
3737

3838
.PHONY: uninstall-%
3939
uninstall-%:
@@ -54,111 +54,105 @@ else
5454
endif
5555

5656
.PHONY: uninstall
57-
uninstall: uninstall-data-mate
57+
uninstall: uninstall-datamate
5858

5959
# build
60-
.PHONY: mineru-docker-build
61-
mineru-docker-build:
62-
docker build -t mineru:$(VERSION) . -f scripts/images/mineru/Dockerfile
63-
64-
.PHONY: datax-docker-build
65-
datax-docker-build:
66-
docker build -t datax:$(VERSION) . -f scripts/images/datax/Dockerfile
67-
68-
.PHONY: unstructured-docker-build
69-
unstructured-docker-build:
70-
docker build -t unstructured:$(VERSION) . -f scripts/images/unstructured/Dockerfile
71-
7260
.PHONY: backend-docker-build
7361
backend-docker-build:
74-
docker build -t backend:$(VERSION) . -f scripts/images/backend/Dockerfile
62+
docker build -t datamate-backend:$(VERSION) . -f scripts/images/backend/Dockerfile
7563

7664
.PHONY: frontend-docker-build
7765
frontend-docker-build:
78-
docker build -t frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
66+
docker build -t datamate-frontend:$(VERSION) . -f scripts/images/frontend/Dockerfile
7967

8068
.PHONY: runtime-docker-build
8169
runtime-docker-build:
82-
docker build -t runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
70+
docker build -t datamate-runtime:$(VERSION) . -f scripts/images/runtime/Dockerfile
71+
72+
.PHONY: label-studio-adapter-docker-build
73+
label-studio-adapter-docker-build:
74+
docker build -t label-studio-adapter:$(VERSION) . -f scripts/images/label-studio-adapter/Dockerfile
8375

8476
.PHONY: backend-docker-install
8577
backend-docker-install:
86-
cd deployment/docker/data-mate && docker-compose up -d backend
78+
cd deployment/docker/datamate && docker-compose up -d backend
8779

8880
.PHONY: backend-docker-uninstall
8981
backend-docker-uninstall:
90-
cd deployment/docker/data-mate && docker-compose down backend
82+
cd deployment/docker/datamate && docker-compose down backend
9183

9284
.PHONY: frontend-docker-install
9385
frontend-docker-install:
94-
cd deployment/docker/data-mate && docker-compose up -d frontend
86+
cd deployment/docker/datamate && docker-compose up -d frontend
9587

9688
.PHONY: frontend-docker-uninstall
9789
frontend-docker-uninstall:
98-
cd deployment/docker/data-mate && docker-compose down frontend
90+
cd deployment/docker/datamate && docker-compose down frontend
9991

10092
.PHONY: runtime-docker-install
10193
runtime-docker-install:
102-
cd deployment/docker/data-mate && docker-compose up -d runtime
94+
cd deployment/docker/datamate && docker-compose up -d runtime
10395

10496
.PHONY: runtime-docker-uninstall
10597
runtime-docker-uninstall:
106-
cd deployment/docker/data-mate && docker-compose down runtime
98+
cd deployment/docker/datamate && docker-compose down runtime
10799

108100
.PHONY: runtime-k8s-install
109101
runtime-k8s-install: create-namespace
110-
helm upgrade kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
111-
helm upgrade raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
102+
helm upgrade datamate-kuberay-operator deployment/helm/ray/kuberay-operator --install -n $(NAMESPACE)
103+
helm upgrade datamate-raycluster deployment/helm/ray/ray-cluster/ --install -n $(NAMESPACE)
112104
kubectl apply -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
113105

114106
.PHONY: runtime-k8s-uninstall
115107
runtime-k8s-uninstall:
116-
helm uninstall raycluster -n $(NAMESPACE)
117-
helm uninstall kuberay-operator -n $(NAMESPACE)
108+
helm uninstall datamate-raycluster -n $(NAMESPACE)
109+
helm uninstall datamate-kuberay-operator -n $(NAMESPACE)
118110
kubectl delete -f deployment/helm/ray/service.yaml -n $(NAMESPACE)
119111

120-
.PHONY: unstructured-k8s-install
121-
unstructured-k8s-install: create-namespace
122-
kubectl apply -f deployment/kubernetes/unstructured/deploy.yaml -n $(NAMESPACE)
123-
124112
.PHONY: mysql-k8s-install
125113
mysql-k8s-install: create-namespace
126-
kubectl create configmap init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
114+
kubectl create configmap datamate-init-sql --from-file=scripts/db/ --dry-run=client -o yaml | kubectl apply -f - -n $(NAMESPACE)
127115
kubectl apply -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
128116
kubectl apply -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
129117

130118
.PHONY: mysql-k8s-uninstall
131119
mysql-k8s-uninstall:
132-
kubectl delete configmap init-sql -n $(NAMESPACE)
133-
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE)
134-
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE)
120+
kubectl delete configmap datamate-init-sql -n $(NAMESPACE) --ignore-not-found
121+
kubectl delete -f deployment/kubernetes/mysql/configmap.yaml -n $(NAMESPACE) --ignore-not-found
122+
kubectl delete -f deployment/kubernetes/mysql/deploy.yaml -n $(NAMESPACE) --ignore-not-found
123+
124+
.PHONY: database-k8s-install
125+
database-k8s-install: mysql-k8s-install
126+
127+
.PHONY: database-k8s-uninstall
128+
database-k8s-uninstall: mysql-k8s-uninstall
135129

136130
.PHONY: backend-k8s-install
137131
backend-k8s-install: create-namespace
138132
kubectl apply -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
139133

140134
.PHONY: backend-k8s-uninstall
141135
backend-k8s-uninstall:
142-
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE)
136+
kubectl delete -f deployment/kubernetes/backend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
143137

144138
.PHONY: frontend-k8s-install
145139
frontend-k8s-install: create-namespace
146140
kubectl apply -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
147141

148142
.PHONY: frontend-k8s-uninstall
149143
frontend-k8s-uninstall:
150-
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE)
144+
kubectl delete -f deployment/kubernetes/frontend/deploy.yaml -n $(NAMESPACE) --ignore-not-found
151145

152-
.PHONY: data-mate-docker-install
153-
data-mate-docker-install:
146+
.PHONY: datamate-docker-install
147+
datamate-docker-install:
154148
cd deployment/docker/datamate && docker-compose up -d
155149

156-
.PHONY: data-mate-docker-uninstall
157-
data-mate-docker-uninstall:
150+
.PHONY: datamate-docker-uninstall
151+
datamate-docker-uninstall:
158152
cd deployment/docker/datamate && docker-compose down
159153

160-
.PHONY: data-mate-k8s-install
161-
data-mate-k8s-install: create-namespace mysql-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
154+
.PHONY: datamate-k8s-install
155+
datamate-k8s-install: create-namespace database-k8s-install backend-k8s-install frontend-k8s-install runtime-k8s-install
162156

163-
.PHONY: data-mate-k8s-uninstall
164-
data-mate-k8s-uninstall: mysql-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall
157+
.PHONY: datamate-k8s-uninstall
158+
datamate-k8s-uninstall: database-k8s-uninstall backend-k8s-uninstall frontend-k8s-uninstall runtime-k8s-uninstall

README-zh.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
![GitHub Issues](https://img.shields.io/github/issues/ModelEngine-Group/DataMate)
1010
![GitHub License](https://img.shields.io/github/license/ModelEngine-Group/DataMate)
1111

12-
**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。
13-
**
12+
**DataMate是面向模型微调与RAG检索的企业级数据处理平台,支持数据归集、数据管理、算子市场、数据清洗、数据合成、数据标注、数据评估、知识生成等核心功能。**
1413

1514
[简体中文](./README-zh.md) | [English](./README.md)
1615

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/httpclient/RuntimeClient.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
@Slf4j
1616
public class RuntimeClient {
17-
private static final String BASE_URL = "http://runtime:8081/api";
17+
private static final String BASE_URL = "http://datamate-runtime:8081/api";
1818

1919
private static final String CREATE_TASK_URL = BASE_URL + "/task/{0}/submit";
2020

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/service/CleaningTaskService.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper;
1313
import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper;
1414
import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper;
15+
import com.datamate.cleaning.interfaces.dto.CleaningProcess;
1516
import com.datamate.cleaning.interfaces.dto.CleaningTask;
1617
import com.datamate.cleaning.interfaces.dto.CreateCleaningTaskRequest;
1718
import com.datamate.cleaning.interfaces.dto.OperatorInstance;
@@ -55,7 +56,14 @@ public class CleaningTaskService {
5556

5657
public List<CleaningTask> getTasks(String status, String keywords, Integer page, Integer size) {
5758
Integer offset = page * size;
58-
return cleaningTaskMapper.findTasks(status, keywords, size, offset);
59+
List<CleaningTask> tasks = cleaningTaskMapper.findTasks(status, keywords, size, offset);
60+
tasks.forEach(this::setProcess);
61+
return tasks;
62+
}
63+
64+
private void setProcess(CleaningTask task) {
65+
int count = cleaningResultMapper.countByInstanceId(task.getId());
66+
task.setProgress(CleaningProcess.of(task.getFileCount(), count));
5967
}
6068

6169
public int countTasks(String status, String keywords) {
@@ -80,6 +88,7 @@ public CleaningTask createTask(CreateCleaningTaskRequest request) {
8088
task.setDestDatasetId(destDataset.getId());
8189
task.setDestDatasetName(destDataset.getName());
8290
task.setBeforeSize(srcDataset.getTotalSize());
91+
task.setFileCount(srcDataset.getFileCount());
8392
cleaningTaskMapper.insertTask(task);
8493

8594
List<OperatorInstancePo> instancePos = request.getInstance().stream()
@@ -93,7 +102,9 @@ public CleaningTask createTask(CreateCleaningTaskRequest request) {
93102
}
94103

95104
public CleaningTask getTask(String taskId) {
96-
return cleaningTaskMapper.findTaskById(taskId);
105+
CleaningTask task = cleaningTaskMapper.findTaskById(taskId);
106+
setProcess(task);
107+
return task;
97108
}
98109

99110
@Transactional
@@ -113,7 +124,7 @@ private void prepareTask(CleaningTask task, List<OperatorInstance> instances) {
113124
process.setDatasetId(task.getDestDatasetId());
114125
process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl");
115126
process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId());
116-
process.setExecutorType(ExecutorType.DATA_PLATFORM.getValue());
127+
process.setExecutorType(ExecutorType.DATAMATE.getValue());
117128
process.setProcess(instances.stream()
118129
.map(instance -> Map.of(instance.getId(), instance.getOverrides()))
119130
.toList());

0 commit comments

Comments
 (0)