Skip to content

Commit 5fb210d

Browse files
authored
NO-SNOW: order databricks test data to address flakiness (#3390)
1 parent b795d34 commit 5fb210d

File tree

2 files changed

+55
-46
lines changed

2 files changed

+55
-46
lines changed

tests/integ/datasource/test_databricks.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,15 @@ def test_basic_databricks(session, input_type, input_value):
6464
input_dict = {
6565
input_type: input_value,
6666
}
67-
df = session.read.dbapi(create_databricks_connection, **input_dict)
67+
df = session.read.dbapi(create_databricks_connection, **input_dict).order_by(
68+
"COL_BYTE"
69+
)
6870
ret = df.collect()
6971
assert ret == EXPECTED_TEST_DATA and df.schema == EXPECTED_TYPE
7072

7173
table_name = random_name_for_temp_object(TempObjectType.TABLE)
7274
df.write.save_as_table(table_name, mode="overwrite", table_type="temp")
73-
df2 = session.table(table_name)
75+
df2 = session.table(table_name).order_by("COL_BYTE")
7476
assert df2.collect() == EXPECTED_TEST_DATA and df2.schema == EXPECTED_TYPE
7577

7678

@@ -144,7 +146,7 @@ def local_create_databricks_connection():
144146
udtf_configs={
145147
"external_access_integration": DATABRICKS_TEST_EXTERNAL_ACCESS_INTEGRATION
146148
},
147-
)
149+
).order_by("COL_BYTE")
148150
ret = df.collect()
149151
assert ret == EXPECTED_TEST_DATA and df.schema == EXPECTED_TYPE
150152

@@ -160,10 +162,17 @@ def test_unit_udtf_ingestion():
160162
udtf_ingestion_instance = udtf_ingestion_class()
161163

162164
dsp = DataSourcePartitioner(
163-
create_databricks_connection, TEST_TABLE_NAME, is_query=False
165+
create_databricks_connection,
166+
f"(select * from {TEST_TABLE_NAME}) SORT BY COL_BYTE NULLS FIRST",
167+
is_query=True,
164168
)
165-
yield_data = udtf_ingestion_instance.process(dsp.partitions[0])
166-
for row, expected_row in zip(yield_data, EXPECTED_TEST_DATA):
169+
yield_data = list(udtf_ingestion_instance.process(dsp.partitions[0]))
170+
# databricks sort by returns the all None row as the last row regardless of NULLS FIRST/LAST
171+
# while in snowflake test data after default sort None is the first row
172+
assert yield_data[-1] == EXPECTED_TEST_DATA[0]
173+
for row, expected_row in zip(
174+
yield_data[:-1], EXPECTED_TEST_DATA[1:]
175+
): # None data ordering is the same
167176
for index, (field, value) in enumerate(zip(EXPECTED_TYPE.fields, row)):
168177
if isinstance(field.datatype, VariantType):
169178
# Convert ArrayType, MapType, and StructType to JSON

tests/resources/test_data_source_dir/test_databricks_data.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,7 @@
2525
TEST_TABLE_NAME = "ALL_TYPE_TABLE_2" # ALL_TYPE_TABLE_2 contains None data while ALL_TYPE_TABLE doesn't
2626
TZ_INFO = pytz.timezone("America/Los_Angeles")
2727
EXPECTED_TEST_DATA = [
28-
(
29-
-34,
30-
25393,
31-
35234,
32-
5644171805,
33-
18.264881134033203,
34-
9187.446999674603,
35-
Decimal("269.89"),
36-
"str_8541",
37-
True,
38-
bytearray(b"\xad\xa9\xdd\xa2"),
39-
datetime.date(2025, 6, 8),
40-
TZ_INFO.localize(
41-
datetime.datetime(2025, 4, 16, 10, 39, 39, 565000), is_dst=True
42-
),
43-
datetime.datetime(2025, 4, 16, 17, 49, 8, 565000),
44-
"[\n 82,\n 40\n]",
45-
'{\n "key1": 71,\n "key2": 81\n}',
46-
'{\n "field1": "f_25",\n "field2": 25\n}',
47-
"3-10",
48-
"18 14:29:08.000000000",
49-
),
28+
tuple([None] * 18),
5029
(
5130
-113,
5231
-14623,
@@ -92,26 +71,26 @@
9271
"2 11:12:05.000000000",
9372
),
9473
(
95-
114,
96-
11139,
97-
75014,
98-
1135763646,
99-
14.668656349182129,
100-
1378.8325065107654,
101-
Decimal("7411.91"),
102-
"str_9765",
103-
False,
74+
-34,
75+
25393,
76+
35234,
77+
5644171805,
78+
18.264881134033203,
79+
9187.446999674603,
80+
Decimal("269.89"),
81+
"str_8541",
82+
True,
10483
bytearray(b"\xad\xa9\xdd\xa2"),
105-
datetime.date(2025, 6, 29),
84+
datetime.date(2025, 6, 8),
10685
TZ_INFO.localize(
107-
datetime.datetime(2025, 4, 16, 10, 48, 27, 565000), is_dst=True
86+
datetime.datetime(2025, 4, 16, 10, 39, 39, 565000), is_dst=True
10887
),
109-
datetime.datetime(2025, 4, 16, 17, 50, 8, 565000),
110-
"[\n 92,\n 27\n]",
111-
'{\n "key1": 52,\n "key2": 65\n}',
112-
'{\n "field1": "f_85",\n "field2": 50\n}',
113-
"7-4",
114-
"22 04:52:41.000000000",
88+
datetime.datetime(2025, 4, 16, 17, 49, 8, 565000),
89+
"[\n 82,\n 40\n]",
90+
'{\n "key1": 71,\n "key2": 81\n}',
91+
'{\n "field1": "f_25",\n "field2": 25\n}',
92+
"3-10",
93+
"18 14:29:08.000000000",
11594
),
11695
(
11796
-31,
@@ -135,7 +114,28 @@
135114
"0-7",
136115
"19 06:25:08.000000000",
137116
),
138-
tuple([None] * 18),
117+
(
118+
114,
119+
11139,
120+
75014,
121+
1135763646,
122+
14.668656349182129,
123+
1378.8325065107654,
124+
Decimal("7411.91"),
125+
"str_9765",
126+
False,
127+
bytearray(b"\xad\xa9\xdd\xa2"),
128+
datetime.date(2025, 6, 29),
129+
TZ_INFO.localize(
130+
datetime.datetime(2025, 4, 16, 10, 48, 27, 565000), is_dst=True
131+
),
132+
datetime.datetime(2025, 4, 16, 17, 50, 8, 565000),
133+
"[\n 92,\n 27\n]",
134+
'{\n "key1": 52,\n "key2": 65\n}',
135+
'{\n "field1": "f_85",\n "field2": 50\n}',
136+
"7-4",
137+
"22 04:52:41.000000000",
138+
),
139139
]
140140
EXPECTED_TYPE = StructType(
141141
[

0 commit comments

Comments
 (0)