diff --git a/.github/workflows/_stable_test.yml b/.github/workflows/_stable_test.yml index b7925a9d190..e37d0c4fe6b 100644 --- a/.github/workflows/_stable_test.yml +++ b/.github/workflows/_stable_test.yml @@ -213,16 +213,16 @@ jobs: if [ -d log ]; then echo ">>> grep error in ./log/" - grep -Rni --color=auto "error" log || true + grep -Rni --color=auto "error" log --exclude="backup_env.*.json" --exclude="default.*.log" --exclude="envlog.*" || true else echo "log/ directory not found" fi - if [ -f log/workerlog.0 ]; then - echo ">>> tail -n 100 log/workerlog.0" - tail -n 100 log/workerlog.0 + if [ -f log/paddle/workerlog.0 ]; then + echo ">>> tail -n 100 log/paddle/workerlog.0" + tail -n 100 log/paddle/workerlog.0 else - echo "log/workerlog.0 not found" + echo "log/paddle/workerlog.0 not found" fi echo "=======================================================" diff --git a/scripts/coverage_run.sh b/scripts/coverage_run.sh index 3fe0514a202..1d44f72eb97 100644 --- a/scripts/coverage_run.sh +++ b/scripts/coverage_run.sh @@ -107,7 +107,7 @@ run_test_with_logging() { echo ">>>> Processing log directory: ${isolated_log_dir}" # workerlog - worker_logs=("${isolated_log_dir}"/workerlog.0) + worker_logs=("${isolated_log_dir}"/paddle/workerlog.0) if [ -f "${worker_logs[0]}" ]; then for worker_log in "${worker_logs[@]}"; do @@ -119,7 +119,7 @@ run_test_with_logging() { fi echo ">>> grep error in ${isolated_log_dir}" - grep -Rni --color=auto "error" "${isolated_log_dir}" --exclude="pytest_*_error.log" || true + grep -Rni --color=auto "error" "${isolated_log_dir}" --exclude="pytest_*_error.log" --exclude="backup_env.*.json" --exclude="default.*.log" --exclude="envlog.*" || true fi # print all server logs diff --git a/scripts/run_ci_dcu.sh b/scripts/run_ci_dcu.sh index 6c1e5e21df3..03ed1a2946b 100644 --- a/scripts/run_ci_dcu.sh +++ b/scripts/run_ci_dcu.sh @@ -52,7 +52,7 @@ if grep -q "Failed to launch worker processes" server.log; then echo "Failed to launch worker processes..." stop_processes cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -60,7 +60,7 @@ if grep -q "Traceback (most recent call last):" server.log; then echo "Some errors occurred..." stop_processes cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -79,7 +79,7 @@ while true; do if [ $ELAPSED -ge $TIMEOUT ]; then echo -e "\nServer start timeout: After $((TIMEOUT/60)) minutes, the service still doesn't start!" cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -106,7 +106,7 @@ stop_processes echo "Stop server done." if [ ${exit_code} -ne 0 ]; then - echo "Exit with error, please refer to log/workerlog.0" - cat log/workerlog.0 + echo "Exit with error, please refer to log/paddle/workerlog.0" + cat log/paddle/workerlog.0 exit 1 fi diff --git a/scripts/run_ci_gcu.sh b/scripts/run_ci_gcu.sh index 46ceee8d788..8fec701888f 100644 --- a/scripts/run_ci_gcu.sh +++ b/scripts/run_ci_gcu.sh @@ -51,7 +51,7 @@ if grep -q "Failed to launch worker processes" server.log; then echo "Failed to launch worker processes..." stop_processes cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -59,7 +59,7 @@ if grep -q "Traceback (most recent call last):" server.log; then echo "Some errors occurred..." stop_processes cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -79,7 +79,7 @@ while true; do echo -e "\nServer start timeout: After $((TIMEOUT/60)) minutes, the service still doesn't start!" stop_processes cat server.log - cat log/workerlog.0 + cat log/paddle/workerlog.0 exit 1 fi @@ -106,7 +106,7 @@ stop_processes echo "Stop server done." if [ ${exit_code} -ne 0 ]; then - echo "Exit with error, please refer to log/workerlog.0" - cat log/workerlog.0 + echo "Exit with error, please refer to log/paddle/workerlog.0" + cat log/paddle/workerlog.0 exit 1 fi diff --git a/scripts/run_ci_hpu.sh b/scripts/run_ci_hpu.sh index 2dc0284a7f6..ff63b2d011d 100755 --- a/scripts/run_ci_hpu.sh +++ b/scripts/run_ci_hpu.sh @@ -107,16 +107,16 @@ while true; do echo -e "\nstart serving failed with timeout: $((TIMEOUT/60)) seconds" cat server.log #ERNIE-4.5-21B-A3B-Paddle only has workerlog.0 - cat log/workerlog.0 + cat log/paddle/workerlog.0 #ERNIE-4.5-300B-A47B-Paddle (300B) will have 8 workerlog if [ $ENABLE_TESTING_ERNIE45_300B_A47B_Paddle -eq 1 ]; then - cat log/workerlog.1 - cat log/workerlog.2 - cat log/workerlog.3 - cat log/workerlog.4 - cat log/workerlog.5 - cat log/workerlog.6 - cat log/workerlog.7 + cat log/paddle/workerlog.1 + cat log/paddle/workerlog.2 + cat log/paddle/workerlog.3 + cat log/paddle/workerlog.4 + cat log/paddle/workerlog.5 + cat log/paddle/workerlog.6 + cat log/paddle/workerlog.7 fi exit 1 fi @@ -145,8 +145,8 @@ ps -efww | grep -E $FD_API_PORT | grep -v grep | awk '{print $2}' | xargs kill - lsof -t -i :$FD_API_PORT | xargs kill -9 || true if [ ${exit_code} -ne 0 ]; then - echo "log/workerlog.0" - cat log/workerlog.0 + echo "log/paddle/workerlog.0" + cat log/paddle/workerlog.0 echo "mold testing failed, please help to do check for your PR source codeing" exit 1 fi diff --git a/scripts/run_golang_router.sh b/scripts/run_golang_router.sh index 66578d267d9..6ea910d1aac 100644 --- a/scripts/run_golang_router.sh +++ b/scripts/run_golang_router.sh @@ -40,7 +40,7 @@ for test_file in "${test_files[@]}"; do echo ">>>> Processing log directory: ${log_dir}" # print all workerlog.0 - worker_logs=("${log_dir}"/workerlog.0) + worker_logs=("${log_dir}"/paddle/workerlog.0) if [ "${#worker_logs[@]}" -gt 0 ]; then for worker_log in "${worker_logs[@]}"; do if [ -f "${worker_log}" ]; then @@ -54,7 +54,7 @@ for test_file in "${test_files[@]}"; do fi echo ">>> grep error in ${log_dir}" - grep -Rni --color=auto "error" "${log_dir}" || true + grep -Rni --color=auto "error" "${log_dir}" --exclude="pytest_*_error.log" --exclude="backup_env.*.json" --exclude="default.*.log" --exclude="envlog.*" || true fi done diff --git a/scripts/run_gpu_4cards.sh b/scripts/run_gpu_4cards.sh index 719ec19255c..a58534f48d8 100644 --- a/scripts/run_gpu_4cards.sh +++ b/scripts/run_gpu_4cards.sh @@ -44,14 +44,20 @@ for test_file in "${test_files[@]}"; do if [ -d "${REPO_ROOT}/log" ]; then echo ">>> grep error in ${REPO_ROOT}/log/" - grep -Rni --color=auto "error" "${REPO_ROOT}/log/" || true + grep -Rni --color=auto "error" "${REPO_ROOT}/log/" --exclude="pytest_*_error.log" --exclude="backup_env.*.json" --exclude="default.*.log" --exclude="envlog.*" || true else echo "${REPO_ROOT}/log directory not found" fi - if [ -f "${REPO_ROOT}/log/log_0/workerlog.0" ]; then + if [ -f "${REPO_ROOT}/log/paddle/workerlog.0" ]; then echo "---------------- workerlog.0 (last 100 lines) -------------" - tail -n 100 "${REPO_ROOT}/log/log_0/workerlog.0" + tail -n 100 "${REPO_ROOT}/log/paddle/workerlog.0" + echo "------------------------------------------------------------" + fi + + if [ -f "${REPO_ROOT}/log/log_0/paddle/workerlog.0" ]; then + echo "---------------- workerlog.0 (last 100 lines) -------------" + tail -n 100 "${REPO_ROOT}/log/log_0/paddle/workerlog.0" echo "------------------------------------------------------------" fi diff --git a/scripts/run_pre_ce.sh b/scripts/run_pre_ce.sh index 928aa2e7cef..a024bc7bf3e 100644 --- a/scripts/run_pre_ce.sh +++ b/scripts/run_pre_ce.sh @@ -38,14 +38,14 @@ for subdir in "$run_path"*/; do if [ $exit_code -ne 0 ]; then if [ -d "${subdir%/}/log" ]; then echo ">>> grep error in ${subdir%/}/log/" - grep -Rni --color=auto "error" "${subdir%/}/log/" || true + grep -Rni --color=auto "error" "${subdir%/}/log/" --exclude="pytest_*_error.log" --exclude="backup_env.*.json" --exclude="default.*.log" --exclude="envlog.*" || true else echo "${subdir%/}/log directory not found" fi - if [ -f "${subdir%/}/log/workerlog.0" ]; then - echo "---------------- log/workerlog.0 -------------------" - cat "${subdir%/}/log/workerlog.0" + if [ -f "${subdir%/}/log/paddle/workerlog.0" ]; then + echo "---------------- log/paddle/workerlog.0 -------------------" + cat "${subdir%/}/log/paddle/workerlog.0" echo "----------------------------------------------------" fi diff --git a/tests/ci_validation/deploy/deploy.py b/tests/ci_validation/deploy/deploy.py index 856a7b594ad..1f40c706bfd 100644 --- a/tests/ci_validation/deploy/deploy.py +++ b/tests/ci_validation/deploy/deploy.py @@ -491,8 +491,8 @@ def tail_file(path, lines=50): result = f"服务启动超时,耗时:[{timeout}s]\n\n" result += "==== server.log tail 50 ====\n" result += tail_file("server.log") - result += "\n==== log/workerlog.0 tail 50 ====\n" - result += tail_file("log/workerlog.0") + result += "\n==== log/paddle/workerlog.0 tail 50 ====\n" + result += tail_file("log/paddle/workerlog.0") yield result break diff --git a/tests/e2e/utils/serving_utils.py b/tests/e2e/utils/serving_utils.py index 6dd5e77c9b7..89a6a37add7 100644 --- a/tests/e2e/utils/serving_utils.py +++ b/tests/e2e/utils/serving_utils.py @@ -272,10 +272,10 @@ def tail_file(path, n=50): def dump_server_logs(tail_lines=50): """打印server日志""" log_files = [ - "log/workerlog.0", + "log/paddle/workerlog.0", "log/fastdeploy.log", "log/log_0/fastdeploy.log", - "log/log_0/workerlog.0", + "log/log_0/paddle/workerlog.0", ] for path in log_files: diff --git a/tests/metax_ci/run_paddle_ocr_test.sh b/tests/metax_ci/run_paddle_ocr_test.sh index 3639759f93b..698ba39ccc3 100644 --- a/tests/metax_ci/run_paddle_ocr_test.sh +++ b/tests/metax_ci/run_paddle_ocr_test.sh @@ -86,14 +86,14 @@ while true; do POLL_COUNT=$((POLL_COUNT + 1)) if [ $POLL_COUNT -ge $SERVER_TIMEOUT_SEC ]; then cat ${SERVER_LOG_FILE} - cat log/workerlog.0 + cat log/paddle/workerlog.0 echo "[TIMEOUT] Server process is about to terminate and exit the script!" exit 1 fi if ! kill -0 $SERVER_PID >/dev/null 2>&1; then cat ${SERVER_LOG_FILE} - cat log/workerlog.0 + cat log/paddle/workerlog.0 echo "[ERROR] Server process(PID: $SERVER_PID) has exited abnormally and no keywords were detected!" exit 1 fi diff --git a/tests/model_loader/utils.py b/tests/model_loader/utils.py index 1037d2c64e2..3b7321ba38a 100644 --- a/tests/model_loader/utils.py +++ b/tests/model_loader/utils.py @@ -41,7 +41,7 @@ def clear_logs(): def print_logs(): - log_dir = os.path.join(os.getcwd(), "log") + log_dir = os.path.join(os.getcwd(), "log", "paddle") log_file = os.path.join(log_dir, "workerlog.0") if not os.path.exists(log_file): diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py index 2429b8c1458..cc53cf49681 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp1.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/workerlog.0") + nohup_path = os.path.join(log_dir, "log_0/paddle/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py index c1f804e6466..373a0633cd9 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/workerlog.0") + nohup_path = os.path.join(log_dir, "log_0/paddle/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py index 280870f0a55..4a69e7d8e73 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py +++ b/tests/xpu_ci/8cards_cases/test_pd_21b_ep4tp4_cudagraph.py @@ -109,7 +109,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/workerlog.0") + nohup_path = os.path.join(log_dir, "log_0/paddle/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py b/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py index 936bc8b6371..a48730bec38 100644 --- a/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py +++ b/tests/xpu_ci/8cards_cases/test_pd_p_tp4ep4_d_tp1ep4.py @@ -110,7 +110,7 @@ def print_pd_logs_on_failure(): log_dirs = ["log_router", "log_prefill", "log_decode"] for log_dir in log_dirs: - nohup_path = os.path.join(log_dir, "log_0/workerlog.0") + nohup_path = os.path.join(log_dir, "log_0/paddle/workerlog.0") if os.path.exists(nohup_path): print(f"\n========== {nohup_path} ==========") with open(nohup_path, "r") as f: diff --git a/tests/xpu_ci/README.md b/tests/xpu_ci/README.md index ecfa46d2179..b804af9a43b 100644 --- a/tests/xpu_ci/README.md +++ b/tests/xpu_ci/README.md @@ -313,12 +313,12 @@ import pdb; pdb.set_trace() ### 2. 如何查看服务器日志? -测试失败时会自动打印 `server.log` 和 `log/workerlog.0` 的内容。 +测试失败时会自动打印 `server.log` 和 `log/paddle/workerlog.0` 的内容。 你也可以在测试运行时手动查看: ```bash tail -f server.log -tail -f log/workerlog.0 +tail -f log/paddle/workerlog.0 ``` ### 3. 如何跳过某个测试? diff --git a/tests/xpu_ci/conftest.py b/tests/xpu_ci/conftest.py index aae150532da..4cddded980a 100644 --- a/tests/xpu_ci/conftest.py +++ b/tests/xpu_ci/conftest.py @@ -206,9 +206,9 @@ def print_logs_on_failure(): with open("server.log", "r") as f: print(f.read()) - print("\n========== log/workerlog.0 ==========") - if os.path.exists("log/workerlog.0"): - with open("log/workerlog.0", "r") as f: + print("\n========== log/paddle/workerlog.0 ==========") + if os.path.exists("log/paddle/workerlog.0"): + with open("log/paddle/workerlog.0", "r") as f: print(f.read())