Skip to content

Commit aded44e

Browse files
committed
fix: [6880760974] normalize unstable win vtable output.
1 parent f6a26db commit aded44e

File tree

2 files changed

+142
-8
lines changed

2 files changed

+142
-8
lines changed

test/cases/05-VirtualTables/test_vtable_join.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,6 @@ def test_vtable_join(self):
197197
self.sqlFile = etool.curFile(__file__, f"in/{testCase}.in")
198198
self.ansFile = etool.curFile(__file__, f"ans/{testCase}.ans")
199199

200-
tdCom.compare_testcase_result(self.sqlFile, self.ansFile, testCase)
201-
202-
200+
tdCom.compare_testcase_result(
201+
self.sqlFile, self.ansFile, testCase, float_tolerance=2e-5
202+
)

test/new_test_framework/utils/common.py

Lines changed: 139 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from .constant import *
3232
from .epath import *
3333
from dataclasses import dataclass, field
34+
from decimal import Decimal, InvalidOperation
3435
from typing import List
3536
from datetime import datetime, timedelta
3637
import re
@@ -3039,7 +3040,119 @@ def generate_query_result(self, inputfile, test_case):
30393040
)
30403041
return self.query_result_file
30413042

3042-
def compare_result_files(self, file1, file2):
3043+
def _get_numeric_compare_tolerance(self, token1, token2, float_tolerance):
3044+
if float_tolerance > 0.0:
3045+
return Decimal(str(float_tolerance))
3046+
3047+
def count_decimal_places(token):
3048+
mantissa = token.lower().split("e", 1)[0]
3049+
if "." not in mantissa:
3050+
return 0
3051+
return len(mantissa.split(".", 1)[1])
3052+
3053+
precision = max(count_decimal_places(token1), count_decimal_places(token2))
3054+
if precision <= 0:
3055+
return Decimal("0")
3056+
return Decimal(1).scaleb(-precision)
3057+
3058+
def _normalize_result_line_for_compare(self, line):
3059+
"""Normalize CLI-only suffixes before answer/result file comparison.
3060+
3061+
Args:
3062+
line: A single line from an answer or result file.
3063+
3064+
Returns:
3065+
The normalized line with runtime-only Windows suffixes removed.
3066+
"""
3067+
3068+
normalized = line.rstrip()
3069+
normalized = re.sub(r"\s*\([0-9]+\.[0-9]+s\)$", "", normalized)
3070+
normalized = re.sub(r"cost=[0-9]+\.[0-9]+\.\.[0-9]+\.[0-9]+", "", normalized)
3071+
normalized = re.sub(r"Planning Time: [0-9]+\.[0-9]+ ms", "", normalized)
3072+
normalized = re.sub(r"Execution Time: [0-9]+\.[0-9]+ ms", "", normalized)
3073+
normalized = re.sub(r"max_row_task=[0-9]+, ", "", normalized)
3074+
return normalized.rstrip()
3075+
3076+
def _compare_normalized_result_lines(self, file1, file2):
3077+
"""Compare result files after stripping platform-specific CLI noise.
3078+
3079+
Args:
3080+
file1: Expected result file path.
3081+
file2: Actual result file path.
3082+
3083+
Returns:
3084+
True when the normalized result lines are identical.
3085+
"""
3086+
3087+
with open(file1, "r", encoding="utf-8", errors="ignore") as f1:
3088+
lines1 = f1.read().splitlines()
3089+
with open(file2, "r", encoding="utf-8", errors="ignore") as f2:
3090+
lines2 = f2.read().splitlines()
3091+
3092+
if len(lines1) != len(lines2):
3093+
return False
3094+
3095+
for line1, line2 in zip(lines1, lines2):
3096+
if self._normalize_result_line_for_compare(
3097+
line1
3098+
) != self._normalize_result_line_for_compare(line2):
3099+
return False
3100+
3101+
return True
3102+
3103+
def _compare_file_lines_with_float_tolerance(self, file1, file2, float_tolerance):
3104+
number_pattern = re.compile(r"[-+]?(?:\d+\.\d+|\d+|\.\d+)(?:[eE][-+]?\d+)?")
3105+
3106+
with open(file1, "r", encoding="utf-8", errors="ignore") as f1:
3107+
lines1 = f1.read().splitlines()
3108+
with open(file2, "r", encoding="utf-8", errors="ignore") as f2:
3109+
lines2 = f2.read().splitlines()
3110+
3111+
if len(lines1) != len(lines2):
3112+
return False
3113+
3114+
for line1, line2 in zip(lines1, lines2):
3115+
line1 = self._normalize_result_line_for_compare(line1)
3116+
line2 = self._normalize_result_line_for_compare(line2)
3117+
3118+
if line1 == line2:
3119+
continue
3120+
3121+
matches1 = list(number_pattern.finditer(line1))
3122+
matches2 = list(number_pattern.finditer(line2))
3123+
if len(matches1) != len(matches2):
3124+
return False
3125+
3126+
cursor1 = 0
3127+
cursor2 = 0
3128+
for match1, match2 in zip(matches1, matches2):
3129+
if line1[cursor1:match1.start()] != line2[cursor2:match2.start()]:
3130+
return False
3131+
3132+
token1 = match1.group(0)
3133+
token2 = match2.group(0)
3134+
try:
3135+
value1 = Decimal(token1)
3136+
value2 = Decimal(token2)
3137+
except InvalidOperation:
3138+
if token1 != token2:
3139+
return False
3140+
else:
3141+
tolerance = self._get_numeric_compare_tolerance(
3142+
token1, token2, float_tolerance
3143+
)
3144+
if abs(value1 - value2) > tolerance:
3145+
return False
3146+
3147+
cursor1 = match1.end()
3148+
cursor2 = match2.end()
3149+
3150+
if line1[cursor1:] != line2[cursor2:]:
3151+
return False
3152+
3153+
return True
3154+
3155+
def compare_result_files(self, file1, file2, float_tolerance=0.0):
30433156
try:
30443157
# use subprocess.run to execute diff/fc commands
30453158
# print(file1, file2)
@@ -3067,6 +3180,19 @@ def compare_result_files(self, file1, file2):
30673180
return True
30683181
# if result is not empty, print the differences and files name. Otherwise, the files are identical.
30693182
if result.returncode != 0:
3183+
if self._compare_normalized_result_lines(file1, file2):
3184+
tdLog.info("Result files matched after output normalization.")
3185+
return True
3186+
if platform.system().lower() == "windows" and self._compare_file_lines_with_float_tolerance(
3187+
file1, file2, float_tolerance
3188+
):
3189+
tdLog.info(
3190+
"Result files matched after Windows output normalization."
3191+
if float_tolerance <= 0.0
3192+
else "Result files matched after Windows output normalization "
3193+
f"with float tolerance {float_tolerance}."
3194+
)
3195+
return True
30703196
tdLog.info(f"{cmd} result.returncode: {result.returncode}")
30713197
tdLog.info(f"{cmd} result.stdout: {result.stdout}")
30723198
tdLog.info(f"{cmd} result.stderr: {result.stderr}")
@@ -3087,9 +3213,13 @@ def compare_result_files(self, file1, file2):
30873213
except Exception as e:
30883214
tdLog.debug(f"An error occurred: {e}")
30893215

3090-
def compare_query_with_result_file(self, idx, sql, resultFile, test_case):
3216+
def compare_query_with_result_file(
3217+
self, idx, sql, resultFile, test_case, float_tolerance=0.0
3218+
):
30913219
self.generate_query_result_file(test_case, idx, sql)
3092-
if self.compare_result_files(resultFile, self.query_result_file):
3220+
if self.compare_result_files(
3221+
resultFile, self.query_result_file, float_tolerance=float_tolerance
3222+
):
30933223
tdLog.info("Test passed: Result files are identical.")
30943224
# os.system(f"rm -f {self.query_result_file}")
30953225
else:
@@ -3098,10 +3228,14 @@ def compare_query_with_result_file(self, idx, sql, resultFile, test_case):
30983228
f"{caller.lineno}(line:{caller.lineno}) failed: expect_file:{resultFile} != reult_file:{self.query_result_file} "
30993229
)
31003230

3101-
def compare_testcase_result(self, inputfile, expected_file, test_case):
3231+
def compare_testcase_result(
3232+
self, inputfile, expected_file, test_case, float_tolerance=0.0
3233+
):
31023234
test_reulst_file = self.generate_query_result(inputfile, test_case)
31033235

3104-
if self.compare_result_files(expected_file, test_reulst_file):
3236+
if self.compare_result_files(
3237+
expected_file, test_reulst_file, float_tolerance=float_tolerance
3238+
):
31053239
tdLog.info("Test passed: Result files are identical.")
31063240
os.system(f"rm -f {test_reulst_file}")
31073241
else:

0 commit comments

Comments
 (0)