Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions engineV2-README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
| `--custom_device_vs_gpu` | bool | 启用自定义设备与GPU的精度对比测试模式(默认 False) |
| `--custom_device_vs_gpu_mode` | str | 自定义设备与GPU对比的模式:`upload` 或 `download`(默认 `upload`) |
| `--bitwise_alignment` | bool | 是否进行诸位对齐对比,开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果(默认False)|
| `--generate_failed_tests` | bool | 是否为失败的测试用例生成可复现的测试文件。开启后,当测试失败时,会在`failed_tests`目录下生成独立的Python测试文件,便于后续复现和调试(默认False)|
| `--exit_on_error` | bool | 是否在精度测试出现`paddle_error`或者 `accuracy_error` 错误时立即退出测试进程(exit code 为1)。默认为False,测试进程会继续执行 |

### 示例命令
Expand Down
7 changes: 7 additions & 0 deletions engineV2.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"random_seed",
"bos_conf_path",
"bcecmd_path",
"generate_failed_tests",
"bitwise_alignment",
"exit_on_error",
}
Expand Down Expand Up @@ -684,6 +685,12 @@ def main():
default=False,
help="Whether to using bitwise alignment when run accuracy test",
)
parser.add_argument(
"--generate_failed_tests",
type=parse_bool,
default=False,
help="Whether to generate reproducible test files for failed cases",
)
parser.add_argument(
"--exit_on_error",
type=parse_bool,
Expand Down
88 changes: 88 additions & 0 deletions tester/paddle_device_vs_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def __init__(self, api_config, **kwargs):
super().__init__(api_config)
self.test_amp = kwargs.get("test_amp", False)
self.custom_device_type = self._get_first_custom_device_type()
self.generate_failed_tests = kwargs.get("generate_failed_tests", False)
if self.check_custom_device_available():
self.custom_device_id = 0
if self.check_xpu_available():
Expand Down Expand Up @@ -260,6 +261,28 @@ def test(self):
if cpu_output is None:
print("[cpu execution failed]", self.api_config.config, flush=True)
write_to_log("paddle_error", self.api_config.config)
# CPU 前向/反向执行失败时,如果开启了生成失败用例,则生成可复现单测
if self.generate_failed_tests:
try:
from .test_file_generator import generate_reproducible_test_file

error_info = {
"error_type": "paddle_error",
"stage": "forward",
"need_backward": self.need_check_grad(),
}
test_file_path = generate_reproducible_test_file(
self.api_config,
error_info,
test_amp=self.test_amp,
target_device="cpu",
device_id=0,
test_instance=self,
)
if test_file_path:
print(f"[Generated test file] {test_file_path}", flush=True)
except Exception as e:
print(f"[Error generating test file] {e}", flush=True)
return

# 6. Run API on target device (including forward and backward)
Expand All @@ -271,6 +294,28 @@ def test(self):
flush=True,
)
write_to_log("paddle_error", self.api_config.config)
# 目标设备前向/反向执行失败,同样生成失败用例
if self.generate_failed_tests:
try:
from .test_file_generator import generate_reproducible_test_file

error_info = {
"error_type": "paddle_error",
"stage": "forward",
"need_backward": self.need_check_grad(),
}
test_file_path = generate_reproducible_test_file(
self.api_config,
error_info,
test_amp=self.test_amp,
target_device=target_device,
device_id=device_id,
test_instance=self,
)
if test_file_path:
print(f"[Generated test file] {test_file_path}", flush=True)
except Exception as e:
print(f"[Error generating test file] {e}", flush=True)
return

# 7. Compare forward results
Expand Down Expand Up @@ -310,3 +355,46 @@ def test(self):
else:
print("[Fail]", self.api_config.config, flush=True)
write_to_log("accuracy_error", self.api_config.config)
# 生成可复现的单测文件
if self.generate_failed_tests:
try:
from .test_file_generator import generate_reproducible_test_file

# 确定目标设备
if self.check_xpu_available():
target_device = "xpu"
device_id = self.xpu_device_id
elif self.check_custom_device_available():
target_device = self.custom_device_type
device_id = self.custom_device_id
else:
target_device = "cpu"
device_id = 0

# 确定失败阶段
stage = "unknown"
if not forward_pass:
stage = "forward"
elif not backward_pass:
stage = "backward"

error_info = {
"error_type": "accuracy_error",
"stage": stage,
"need_backward": self.need_check_grad(),
}

# 生成测试文件
test_file_path = generate_reproducible_test_file(
self.api_config,
error_info,
test_amp=self.test_amp,
target_device=target_device,
device_id=device_id,
test_instance=self,
)

if test_file_path:
print(f"[Generated test file] {test_file_path}", flush=True)
except Exception as e:
print(f"[Error generating test file] {e}", flush=True)
Loading