Skip to content

Commit c22683d

Browse files
authored
优化部分问题 (#126)
* feature: 支持相对路径引用 * feature: 优化本地部署命令 * feature: 优化算子编排展示 * feature: 优化清洗任务失败后重试
1 parent 04cff3f commit c22683d

File tree

5 files changed

+19
-22
lines changed

5 files changed

+19
-22
lines changed

README-zh.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,15 @@ make install-mineru
7070
```
7171

7272
### 部署DeerFlow服务
73-
1. 修改runtime/deer-flow/.env.example,添加SEARCH_API_KEY和EMBEDDING模型配置
74-
2. 修改runtime/deer-flow/.conf.yaml.example,添加基础模型服务配置
75-
3. 执行`make install-deer-flow`
73+
```bash
74+
make install-deer-flow
75+
```
7676

7777
### 本地开发部署
7878
本地代码修改后,请执行以下命令构建镜像并使用本地镜像部署
7979
```bash
8080
make build
81-
make install REGISTRY=""
81+
make install dev=true
8282
```
8383

8484
## 🤝 贡献指南

backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ public void deleteTask(String taskId) {
171171
}
172172

173173
public void executeTask(String taskId) {
174-
List<CleaningResultDto> failed = cleaningResultRepo.findByInstanceId(taskId, "FAILED");
175-
Set<String> failedSet = failed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet());
174+
List<CleaningResultDto> succeed = cleaningResultRepo.findByInstanceId(taskId, "COMPLETED");
175+
Set<String> succeedSet = succeed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet());
176176
CleaningTaskDto task = cleaningTaskRepo.findTaskById(taskId);
177-
scanDataset(taskId, task.getSrcDatasetId(), failedSet);
177+
scanDataset(taskId, task.getSrcDatasetId(), succeedSet);
178178
cleaningResultRepo.deleteByInstanceId(taskId, "FAILED");
179179
taskScheduler.executeTask(taskId);
180180
}
@@ -232,7 +232,7 @@ private void scanDataset(String taskId, String srcDatasetId) {
232232
} while (pageNumber < datasetFiles.getTotalPages());
233233
}
234234

235-
private void scanDataset(String taskId, String srcDatasetId, Set<String> failedFiles) {
235+
private void scanDataset(String taskId, String srcDatasetId, Set<String> succeedFiles) {
236236
int pageNumber = 0;
237237
int pageSize = 500;
238238
PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize);
@@ -243,7 +243,7 @@ private void scanDataset(String taskId, String srcDatasetId, Set<String> failedF
243243
break;
244244
}
245245
List<Map<String, Object>> files = datasetFiles.getContent().stream()
246-
.filter(content -> failedFiles.contains(content.getId()))
246+
.filter(content -> !succeedFiles.contains(content.getId()))
247247
.map(content -> Map.of("fileName", (Object) content.getFileName(),
248248
"fileSize", content.getFileSize(),
249249
"filePath", content.getFilePath(),

frontend/src/pages/DataCleansing/Create/components/OperatorOrchestration.tsx

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,6 @@ const OperatorFlow: React.FC<OperatorFlowProps> = ({
182182
{operator?.categories?.map((categoryId) => {
183183
return <Tag color="default">{categoryMap[categoryId].name}</Tag>
184184
})}
185-
{/* 参数状态指示 */}
186-
{Object.values(operator.configs).some(
187-
(param: any) =>
188-
(param.type === "input" && !param.value) ||
189-
(param.type === "checkbox" &&
190-
Array.isArray(param.value) &&
191-
param.value.length === 0)
192-
) && <Tag color="red">待配置</Tag>}
193185
{/* 操作按钮 */}
194186
<span
195187
className="cursor-pointer text-red-500"

runtime/python-executor/datamate/core/dataset.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from datamate.core.constant import Fields
1717
from datamate.core.base_op import OPERATORS, BaseOp
1818

19+
from core.base_op import Filter as RELATIVE_Filter, Mapper as RELATIVE_Mapper, Slicer as RELATIVE_Slicer
20+
1921
rd.DataContext.get_current().enable_progress_bars = False
2022

2123

@@ -136,7 +138,10 @@ def load_ops_module(self, op_name):
136138
parent_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ops")
137139
if parent_dir not in sys.path:
138140
sys.path.insert(0, parent_dir)
139-
registry_content = OPERATORS.modules[op_name]
141+
registry_content = OPERATORS.modules.get(op_name)
142+
if registry_content is None:
143+
from core.base_op import OPERATORS as RELATIVE_OPERATORS
144+
registry_content = RELATIVE_OPERATORS.modules.get(op_name)
140145
if isinstance(registry_content, str):
141146
# registry_content是module的路径
142147
submodule = importlib.import_module(registry_content)
@@ -171,23 +176,23 @@ def _run_single_op(self, operators_cls, init_kwargs, **kwargs):
171176

172177
kwargs.update({"ext_params": {}, "failed_reason": {}, "target_type": None})
173178
try:
174-
if issubclass(operators_cls, Mapper):
179+
if issubclass(operators_cls, (Mapper, RELATIVE_Mapper)):
175180
self.data = self.data.map(operators_cls,
176181
fn_constructor_kwargs=init_kwargs,
177182
fn_kwargs=kwargs,
178183
resources=resources,
179184
num_cpus=0.05,
180185
concurrency=(1, 1 if operators_cls.use_model else int(max_actor_nums)))
181186

182-
elif issubclass(operators_cls, Slicer):
187+
elif issubclass(operators_cls, (Slicer, RELATIVE_Slicer)):
183188
self.data = self.data.flat_map(operators_cls,
184189
fn_constructor_kwargs=init_kwargs,
185190
fn_kwargs=kwargs,
186191
resources=resources,
187192
num_cpus=0.05,
188193
concurrency=(1, int(max_actor_nums)))
189194

190-
elif issubclass(operators_cls, Filter):
195+
elif issubclass(operators_cls, (Filter, RELATIVE_Filter)):
191196
self.data = self.data.filter(operators_cls,
192197
fn_constructor_kwargs=init_kwargs,
193198
fn_kwargs=kwargs,

scripts/images/deer-flow-backend/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ RUN --mount=type=cache,target=/root/.cache/uv \
2626
EXPOSE 8000
2727

2828
# Run the application.
29-
CMD ["uv", "run", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]
29+
CMD ["uv", "run", "--no-sync", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]

0 commit comments

Comments
 (0)