feat: add templates & cli_rl (#339)

ZQlQZ · web-flow · commit 643cfdb8d484 · 2025-12-01T10:19:13.000+08:00
* add templates &amp; cli_rl

* add readme

* update ark_rk readme
diff --git a/veadk/cli/cli.py b/veadk/cli/cli.py
@@ -26,6 +26,7 @@
 from veadk.cli.cli_uploadevalset import uploadevalset
 from veadk.cli.cli_update import update
 from veadk.cli.cli_clean import clean
+from veadk.cli.cli_rl import rl_group
 from veadk.version import VERSION
 
 
@@ -53,6 +54,7 @@ def veadk():
 veadk.add_command(uploadevalset)
 veadk.add_command(update)
 veadk.add_command(clean)
+veadk.add_command(rl_group)
 
 if __name__ == "__main__":
     veadk()
diff --git a/veadk/cli/cli_rl.py b/veadk/cli/cli_rl.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import click
+import shutil
+import sys
+from pathlib import Path
+
+
+def get_rl_template_root() -> Path:
+    """Get absolute path of RL scaffold template root (cli/templates/rl/)"""
+    current_file = Path(__file__).resolve()
+    cli_dir = current_file.parent
+    rl_template_root = cli_dir / "templates" / "rl"
+    return rl_template_root
+
+
+@click.group(name="rl", help="RL related commands")
+def rl_group():
+    pass
+
+
+@rl_group.command(
+    name="init", help="Initialize RL scaffold project (specify platform/workspace)"
+)
+@click.option(
+    "--platform",
+    "-p",
+    required=True,
+    type=click.Choice(["ark"], case_sensitive=False),
+    help="Scaffold platform type (only support for now: ark)",
+)
+@click.option(
+    "--workspace", "-w", required=True, type=str, help="Target workspace directory name"
+)
+@click.option(
+    "--overwrite",
+    "-f",
+    is_flag=True,
+    help="Force overwrite existing workspace (default: false)",
+)
+def rl_init(platform: str, workspace: str, overwrite: bool):
+    """
+    Initialize RL scaffold project for ark platform
+    Example: veadk rl init --platform ark --workspace veadk_rl_ark_project
+    """
+    # Locate template directory
+    rl_template_root = get_rl_template_root()
+    platform_template_dir = rl_template_root / platform.lower()
+
+    # Validate template directory
+    if not platform_template_dir.exists():
+        click.secho(f"Error: Scaffold template for {platform} not found!", fg="red")
+        click.secho(f"  Expected path: {platform_template_dir}", fg="yellow")
+        click.secho(
+            f"  Supported platforms: {[d.name for d in rl_template_root.glob('*') if d.is_dir()]}",
+            fg="blue",
+        )
+        sys.exit(1)
+
+    # Target workspace path
+    target_workspace = Path.cwd() / workspace
+
+    # Handle existing directory
+    if target_workspace.exists():
+        if not overwrite:
+            click.secho(
+                f"\nWarning: Target directory {target_workspace} already exists!",
+                fg="yellow",
+            )
+            if not click.confirm("Overwrite?"):
+                click.secho("Operation cancelled", fg="red")
+                sys.exit(0)
+        shutil.rmtree(target_workspace)
+        click.secho(f"Cleared existing directory: {target_workspace}", fg="green")
+
+    # Copy scaffold files
+    try:
+        shutil.copytree(
+            src=platform_template_dir,
+            dst=target_workspace,
+            ignore=None,
+            dirs_exist_ok=False,
+        )
+        click.secho("\nRL scaffold initialized successfully!", fg="green")
+        click.secho(f"  - Project path: {target_workspace.absolute()}", fg="green")
+    except PermissionError:
+        click.secho(
+            f"Error: Permission denied to write to {target_workspace}", fg="red"
+        )
+        sys.exit(1)
+    except Exception as e:
+        click.secho(f"Error: Failed to copy scaffold - {str(e)}", fg="red")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    rl_group()
diff --git a/veadk/cli/templates/rl/ark/.python-version b/veadk/cli/templates/rl/ark/.python-version
@@ -0,0 +1 @@
+3.10
diff --git a/veadk/cli/templates/rl/ark/README.md b/veadk/cli/templates/rl/ark/README.md
@@ -0,0 +1,45 @@
+# 基于方舟平台强化学习
+方舟 RL 将强化学习过程进行了一定程度的封装，降低了复杂度。用户主要关注 rollout 中的 agent 逻辑、奖励函数的构建、训练样本的选择即可。
+VeADK 与方舟平台 Agent RL 集成，用户使用 VeADK 提供的脚手架，可以开发 VeADK Agent，然后提交任务到方舟平台进行强化学习优化。
+## 准备工作
+在你的终端中运行以下命令，初始化一个强化学习项目：
+```shell
+veadk rl init --platform ark --workspace veadk_rl_ark_project
+```
+该命令会在当前目录下创建一个名为 `veadk_rl_ark_project` 的文件夹，其中包含了一个基本的强化学习项目结构。
+然后在终端中运行以下命令，提交任务到方舟平台：
+```shell
+cd veadk_rl_ark_project
+veadk rl submit --platform ark
+```
+## 原理说明
+生成后的项目结构如下，其中核心文件包括：
+- 数据集: `data/*.jsonl`
+- `/plugins`文件夹下的rollout和reward:
+  - rollout ：用以规定agent的工作流，`raw_async_veadk_rollout.py`提供了使用在方舟rl中使用veadk agent的示例，
+  - reward：给出强化学习所需的奖励值，在`random_reward.py`给出了示例
+- `job.py`或`job.yaml`：用以配置训练参数，并指定需要使用的rollout和reward
+```shell
+veadk_rl_ark_project
+├── data
+    ├── *.jsonl # 训练数据
+└── plugins
+    ├── async_weather_rollout.py # 
+    ├── config.yaml.example # VeADK agent 配置信息示例
+    ├── random_reward.py # reward规则设定
+    ├── raw_async_veadk_rollout.py # rollout工作流设定
+    ├── raw_rollout.py # 
+    └── test_utils.py #
+    └── weather_rollout.py # 
+├── job.py # 任务提交代码
+├── job.yaml # 任务配置
+├── test_agent.py # VeFaaS 测试脚本
+```
+## 运行
+```bash
+ark create mcj -f job.yaml
+```
+或
+```bash
+python job.py   
+```
diff --git a/veadk/cli/templates/rl/ark/arkworkspace.toml b/veadk/cli/templates/rl/ark/arkworkspace.toml
@@ -0,0 +1,2 @@
+[plugin.package]
+include=["*.py"]
diff --git a/veadk/cli/templates/rl/ark/data/demo_dataset.jsonl b/veadk/cli/templates/rl/ark/data/demo_dataset.jsonl
@@ -0,0 +1,3 @@
+{"messages":[{"role":"user","content":"将一个故事然后告诉我北京的天气怎么样"}],"thinking":{"type": "enabled"}}
+{"messages":[{"role":"user","content":"上海的天气怎么样"}],"thinking":{"type": "enabled"}}
+{"messages":[{"role":"user","content":"上海和北京的天气怎么样"}],"thinking":{"type": "enabled"}}
diff --git a/veadk/cli/templates/rl/ark/job.py b/veadk/cli/templates/rl/ark/job.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ark_sdk.resources.model_customization_job import ModelCustomizationJob
+from ark_sdk.resources.pipeline_plugin import GRPOPipeline, PipelinePluginWrapper
+from ark_sdk.types.model_customization_job import (
+    ModelReference,
+    FoundationModelReference,
+    TrainingDataset,
+    Data,
+)
+
+from plugins.random_reward import random_reward_fn
+from plugins.raw_async_veadk_rollout import demo_veadk_rollout
+
+if __name__ == "__main__":
+    mcj = ModelCustomizationJob(
+        name="sdk-job",
+        model_reference=ModelReference(
+            foundation_model=FoundationModelReference(
+                name="doubao-seed-1-6-flash", model_version="250615"
+            )
+        ),
+        hyperparameters={
+            "batch_size": "32",
+            "clip_ratio_high": "0.2",
+            "clip_ratio_low": "0.2",
+            "kl_coefficient": "0.001",
+            "loss_agg_mode": "seq-mean-token-mean",
+            "lr": "0.000001",
+            "lr_warmup_steps": "5",
+            "max_new_tokens": "1024",
+            "num_generations": "8",
+            "num_iterations_per_batch": "2",
+            "save_every_n_steps": "10",
+            "temperature": "1.0",
+            "test_every_n_steps": "5",
+            "test_num_generations": "1",
+            "test_top_p": "1",
+            "top_p": "1",
+            "num_steps": "10",
+        },
+        data=Data(
+            training_set=TrainingDataset(
+                local_files=[
+                    "./data/mcj_rollout_test_dataset.jsonl",
+                ]
+            )
+        ),
+        custom_rl_pipeline=GRPOPipeline(
+            graders=[
+                PipelinePluginWrapper(
+                    plugin=random_reward_fn, envs={"foo": "bar"}, weight=0.5
+                ),
+            ],
+            rollout=PipelinePluginWrapper(
+                plugin=demo_veadk_rollout, envs={"foo": "bar"}
+            ),
+        ),
+    )
+
+    mcj.submit()
+    print(f"Job submitted. view job at {mcj.url}")
diff --git a/veadk/cli/templates/rl/ark/job.yaml b/veadk/cli/templates/rl/ark/job.yaml
@@ -0,0 +1,44 @@
+name: sdk-job
+customization_type: GRPO
+model_reference:
+  foundation_model:
+    name: doubao-seed-1-6-flash
+    model_version: '250615'
+hyperparameters:
+  batch_size: '128'
+  clip_ratio_high: '0.2'
+  clip_ratio_low: '0.2'
+  kl_coefficient: '0.001'
+  loss_agg_mode: seq-mean-token-mean
+  lr: '0.000001'
+  lr_warmup_steps: '5'
+  max_new_tokens: '1024'
+  num_generations: '8'
+  num_iterations_per_batch: '2'
+  save_every_n_steps: '10'
+  temperature: '1.0'
+  test_every_n_steps: '5'
+  test_num_generations: '1'
+  test_top_p: '1'
+  top_p: '1'
+  num_steps: '20'
+custom_rl_pipeline:
+  graders:
+  - plugin:
+      name: random_reward
+      python_func: plugins.random_reward:random_reward_fn
+      envs:
+        foo: bar
+    weight: 0.5
+  rollout:
+    plugin:
+      name: demo_veadk_rollout
+      python_func: plugins.async_weather_rollout:demo_rollout
+      envs:
+        foo: bar
+
+data:
+  training_set:
+    local_files:
+    - ./data/mcj_rollout_test_dataset.jsonl
+save_model_limit: 1
diff --git a/veadk/cli/templates/rl/ark/plugins/random_reward.py b/veadk/cli/templates/rl/ark/plugins/random_reward.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+from typing import List
+from ark_sdk.resources.pipeline_plugin import group_grader
+from ark_sdk.types.pipeline_plugin.pipeline_plugin import PluginStatus, PluginContext
+from ark_sdk.types.pipeline_plugin.rollout import Trajectory, ChatCompletionSample
+from ark_sdk.types.pipeline_plugin import (
+    Runtime,
+    PluginInstance,
+    GroupGraderResult,
+)
+
+
+@group_grader(
+    name="randaom_reward",
+    runtime=Runtime(
+        instance=PluginInstance.CPU1MEM2,
+        max_concurrency=100,
+        timeout=300,
+    ),
+)
+def random_reward_fn(
+    context: PluginContext,
+    sample: ChatCompletionSample,
+    trajectories: List[Trajectory],
+) -> GroupGraderResult:
+    """
+    奖励函数：返回随机奖励
+
+    参数:
+    - trajectories: 完整的对话历史
+    - sample: 样本数据，包含标准答案的字典
+
+    返回:
+    - list[float]: 奖励分数列表，每个分数对应一个候选回复（1.0表示完全匹配，0.0表示不匹配）
+
+    依赖:
+    - 数据集里的字典字段 extra 内需要携带 answer 字段。
+    """
+    rewards = [
+        t.extra["reward"] if (t.extra and "reward" in t.extra) else random.random()
+        for t in trajectories
+    ]
+    return GroupGraderResult(
+        rewards=rewards, status=PluginStatus.SUCCESS, error="", metrics={}
+    )
diff --git a/veadk/cli/templates/rl/ark/plugins/raw_async_veadk_rollout.py b/veadk/cli/templates/rl/ark/plugins/raw_async_veadk_rollout.py
diff --git a/veadk/cli/templates/rl/ark/plugins/test_utils.py b/veadk/cli/templates/rl/ark/plugins/test_utils.py
diff --git a/veadk/cli/templates/rl/ark/requirements.txt b/veadk/cli/templates/rl/ark/requirements.txt
diff --git a/veadk/cli/templates/rl/ark/test_faas.py b/veadk/cli/templates/rl/ark/test_faas.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+[plugin.package]`
	`2`	`+include=["*.py"]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{"messages":[{"role":"user","content":"将一个故事然后告诉我北京的天气怎么样"}],"thinking":{"type": "enabled"}}`
	`2`	`+{"messages":[{"role":"user","content":"上海的天气怎么样"}],"thinking":{"type": "enabled"}}`
	`3`	`+{"messages":[{"role":"user","content":"上海和北京的天气怎么样"}],"thinking":{"type": "enabled"}}`