Skip to content

Commit ed79d7c

Browse files
committed
modify test for structured types compat
1 parent 020f5d8 commit ed79d7c

File tree

1 file changed

+154
-111
lines changed

1 file changed

+154
-111
lines changed

tests/integ/test_dataframe.py

Lines changed: 154 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from collections import namedtuple
1313
from decimal import Decimal
1414
from itertools import product
15+
from textwrap import dedent
1516
from typing import Tuple
1617
from unittest import mock
1718

@@ -91,6 +92,8 @@
9192
TestFiles,
9293
Utils,
9394
multithreaded_run,
95+
structured_types_enabled_session,
96+
structured_types_supported,
9497
)
9598

9699
# Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable
@@ -1877,6 +1880,9 @@ def test_create_dataframe_with_variant(session):
18771880
reason="local testing does not fully support structured types yet.",
18781881
)
18791882
def test_show_dataframe_spark(session):
1883+
if not structured_types_supported(session, False):
1884+
pytest.skip("Test requires structured type support.")
1885+
18801886
data = [
18811887
1,
18821888
"one",
@@ -1892,117 +1898,154 @@ def test_show_dataframe_spark(session):
18921898
[1, 2, 3],
18931899
{"a": "foo"},
18941900
]
1895-
df = session.create_dataframe(
1896-
[data],
1897-
schema=StructType(
1898-
[StructField(f"col_{i + 1}", VariantType()) for i in range(len(data))]
1899-
),
1900-
)
1901-
spark_col_names = [f"col_{i + 1}" for i in range(len(data))]
1902-
1903-
def compare_show_string(actual: str, expected: str) -> bool:
1904-
actual_lines = actual.strip().split("\n")
1905-
expected_lines = expected.strip().split("\n")
1906-
result = [a.strip() == e.strip() for a, e in zip(actual_lines, expected_lines)]
1907-
return all(result)
1908-
1909-
assert compare_show_string(
1910-
df._show_string_spark(_emit_ast=session.ast_enabled).strip(),
1911-
"""
1912-
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1913-
|"COL_1"|"COL_2"|"COL_3"| "COL_4"| "COL_5"| "COL_6"|"COL_7"|"COL_8"|"COL_9"|"COL_10"|"COL_11"| "COL_12"| "COL_13"|
1914-
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1915-
| 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True| False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1916-
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1917-
""",
1918-
)
1919-
assert compare_show_string(
1920-
df._show_string_spark(
1921-
_emit_ast=session.ast_enabled, _spark_column_names=spark_col_names
1922-
),
1923-
"""
1924-
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1925-
|col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
1926-
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1927-
| 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1928-
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1929-
""",
1930-
)
1931-
assert compare_show_string(
1932-
df._show_string_spark(
1933-
vertical=True,
1934-
_emit_ast=session.ast_enabled,
1935-
_spark_column_names=spark_col_names,
1936-
),
1937-
"""
1938-
-RECORD 0----------------------
1939-
col_1 | 1
1940-
col_2 | one
1941-
col_3 | 1.1
1942-
col_4 | 2017-02-24T12:00:...
1943-
col_5 | 20:57:06
1944-
col_6 | 2017-02-25
1945-
col_7 | True
1946-
col_8 | False
1947-
col_9 | NULL
1948-
col_10 | 61
1949-
col_11 | 0.5
1950-
col_12 | [1, 2, 3]
1951-
col_13 | {'a': 'foo'}
1952-
""",
1953-
)
1954-
assert compare_show_string(
1955-
df._show_string_spark(
1956-
vertical=True,
1957-
truncate=False,
1958-
_emit_ast=session.ast_enabled,
1959-
_spark_column_names=spark_col_names,
1960-
),
1961-
"""
1962-
-RECORD 0----------------------------
1963-
col_1 | 1
1964-
col_2 | one
1965-
col_3 | 1.1
1966-
col_4 | 2017-02-24T12:00:05.456000
1967-
col_5 | 20:57:06
1968-
col_6 | 2017-02-25
1969-
col_7 | True
1970-
col_8 | False
1971-
col_9 | NULL
1972-
col_10 | 61
1973-
col_11 | 0.5
1974-
col_12 | [1, 2, 3]
1975-
col_13 | {'a': 'foo'}
1976-
""",
1977-
)
1978-
assert compare_show_string(
1979-
df._show_string_spark(
1980-
truncate=False,
1981-
_emit_ast=session.ast_enabled,
1982-
_spark_column_names=spark_col_names,
1983-
),
1984-
"""
1985-
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1986-
|col_1|col_2|col_3|col_4 |col_5 |col_6 |col_7|col_8|col_9|col_10|col_11|col_12 |col_13 |
1987-
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1988-
|1 |one |1.1 |2017-02-24T12:00:05.456000|20:57:06|2017-02-25|True |False|NULL |61 |0.5 |[1, 2, 3]|{'a': 'foo'}|
1989-
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1990-
""",
1991-
)
1992-
assert compare_show_string(
1993-
df._show_string_spark(
1994-
truncate=10,
1995-
_emit_ast=session.ast_enabled,
1996-
_spark_column_names=spark_col_names,
1997-
),
1998-
"""
1999-
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2000-
|col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
2001-
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2002-
| 1| one| 1.1|2017-02...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': '...|
2003-
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2004-
""",
2005-
)
1901+
1902+
with structured_types_enabled_session(session) as session:
1903+
schema = StructType(
1904+
[
1905+
StructField("col_1", IntegerType()),
1906+
StructField("col_2", StringType()),
1907+
StructField("col_3", FloatType()),
1908+
StructField("col_4", TimestampType()),
1909+
StructField("col_5", TimeType()),
1910+
StructField("col_6", DateType()),
1911+
StructField("col_7", BooleanType()),
1912+
StructField("col_8", BooleanType()),
1913+
StructField("col_9", VariantType()),
1914+
StructField("col_10", BinaryType()),
1915+
StructField("col_11", DecimalType()),
1916+
StructField("col_12", ArrayType(IntegerType())),
1917+
StructField("col_13", MapType(StringType(), StringType())),
1918+
]
1919+
)
1920+
df = session.create_dataframe([data], schema=schema)
1921+
spark_col_names = [f"col_{i + 1}" for i in range(len(data))]
1922+
1923+
def compare_show_string(actual: str, expected: str) -> bool:
1924+
actual_lines = actual.strip().split("\n")
1925+
expected_lines = expected.strip().split("\n")
1926+
result = [
1927+
a.strip() == e.strip() for a, e in zip(actual_lines, expected_lines)
1928+
]
1929+
return all(result)
1930+
1931+
print("\n")
1932+
print(
1933+
df._show_string_spark(
1934+
truncate=False,
1935+
_emit_ast=session.ast_enabled,
1936+
_spark_column_names=spark_col_names,
1937+
).strip()
1938+
)
1939+
print("\n")
1940+
assert compare_show_string(
1941+
df._show_string_spark(_emit_ast=session.ast_enabled).strip(),
1942+
dedent(
1943+
"""
1944+
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1945+
|"COL_1"|"COL_2"|"COL_3"| "COL_4"| "COL_5"| "COL_6"|"COL_7"|"COL_8"|"COL_9"|"COL_10"|"COL_11"| "COL_12"| "COL_13"|
1946+
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1947+
| 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True| False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1948+
+-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1949+
"""
1950+
),
1951+
)
1952+
assert compare_show_string(
1953+
df._show_string_spark(
1954+
_emit_ast=session.ast_enabled, _spark_column_names=spark_col_names
1955+
),
1956+
dedent(
1957+
"""
1958+
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1959+
|col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
1960+
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1961+
| 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1962+
+-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1963+
"""
1964+
),
1965+
)
1966+
assert compare_show_string(
1967+
df._show_string_spark(
1968+
vertical=True,
1969+
_emit_ast=session.ast_enabled,
1970+
_spark_column_names=spark_col_names,
1971+
),
1972+
dedent(
1973+
"""
1974+
-RECORD 0----------------------
1975+
col_1 | 1
1976+
col_2 | one
1977+
col_3 | 1.1
1978+
col_4 | 2017-02-24T12:00:...
1979+
col_5 | 20:57:06
1980+
col_6 | 2017-02-25
1981+
col_7 | True
1982+
col_8 | False
1983+
col_9 | NULL
1984+
col_10 | 61
1985+
col_11 | 0.5
1986+
col_12 | [1, 2, 3]
1987+
col_13 | {'a': 'foo'}
1988+
"""
1989+
),
1990+
)
1991+
assert compare_show_string(
1992+
df._show_string_spark(
1993+
vertical=True,
1994+
truncate=False,
1995+
_emit_ast=session.ast_enabled,
1996+
_spark_column_names=spark_col_names,
1997+
),
1998+
dedent(
1999+
"""
2000+
-RECORD 0----------------------------
2001+
col_1 | 1
2002+
col_2 | one
2003+
col_3 | 1.1
2004+
col_4 | 2017-02-24T12:00:05.456000
2005+
col_5 | 20:57:06
2006+
col_6 | 2017-02-25
2007+
col_7 | True
2008+
col_8 | False
2009+
col_9 | NULL
2010+
col_10 | 61
2011+
col_11 | 0.5
2012+
col_12 | [1, 2, 3]
2013+
col_13 | {'a': 'foo'}
2014+
"""
2015+
),
2016+
)
2017+
assert compare_show_string(
2018+
df._show_string_spark(
2019+
truncate=False,
2020+
_emit_ast=session.ast_enabled,
2021+
_spark_column_names=spark_col_names,
2022+
),
2023+
dedent(
2024+
"""
2025+
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2026+
|col_1|col_2|col_3|col_4 |col_5 |col_6 |col_7|col_8|col_9|col_10|col_11|col_12 |col_13 |
2027+
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2028+
|1 |one |1.1 |2017-02-24T12:00:05.456000|20:57:06|2017-02-25|True |False|NULL |61 |0.5 |[1, 2, 3]|{'a': 'foo'}|
2029+
+-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2030+
"""
2031+
),
2032+
)
2033+
assert compare_show_string(
2034+
df._show_string_spark(
2035+
truncate=10,
2036+
_emit_ast=session.ast_enabled,
2037+
_spark_column_names=spark_col_names,
2038+
),
2039+
dedent(
2040+
"""
2041+
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2042+
|col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
2043+
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2044+
| 1| one| 1.1|2017-02...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': '...|
2045+
+-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2046+
"""
2047+
),
2048+
)
20062049

20072050

20082051
@pytest.mark.parametrize("data", [[0, 1, 2, 3], ["", "a"], [False, True], [None]])

0 commit comments

Comments
 (0)