1212from collections import namedtuple
1313from decimal import Decimal
1414from itertools import product
15+ from textwrap import dedent
1516from typing import Tuple
1617from unittest import mock
1718
9192 TestFiles ,
9293 Utils ,
9394 multithreaded_run ,
95+ structured_types_enabled_session ,
96+ structured_types_supported ,
9497)
9598
9699# Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable
@@ -1877,6 +1880,9 @@ def test_create_dataframe_with_variant(session):
18771880 reason = "local testing does not fully support structured types yet." ,
18781881)
18791882def test_show_dataframe_spark (session ):
1883+ if not structured_types_supported (session , False ):
1884+ pytest .skip ("Test requires structured type support." )
1885+
18801886 data = [
18811887 1 ,
18821888 "one" ,
@@ -1892,117 +1898,154 @@ def test_show_dataframe_spark(session):
18921898 [1 , 2 , 3 ],
18931899 {"a" : "foo" },
18941900 ]
1895- df = session .create_dataframe (
1896- [data ],
1897- schema = StructType (
1898- [StructField (f"col_{ i + 1 } " , VariantType ()) for i in range (len (data ))]
1899- ),
1900- )
1901- spark_col_names = [f"col_{ i + 1 } " for i in range (len (data ))]
1902-
1903- def compare_show_string (actual : str , expected : str ) -> bool :
1904- actual_lines = actual .strip ().split ("\n " )
1905- expected_lines = expected .strip ().split ("\n " )
1906- result = [a .strip () == e .strip () for a , e in zip (actual_lines , expected_lines )]
1907- return all (result )
1908-
1909- assert compare_show_string (
1910- df ._show_string_spark (_emit_ast = session .ast_enabled ).strip (),
1911- """
1912- +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1913- |"COL_1"|"COL_2"|"COL_3"| "COL_4"| "COL_5"| "COL_6"|"COL_7"|"COL_8"|"COL_9"|"COL_10"|"COL_11"| "COL_12"| "COL_13"|
1914- +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1915- | 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True| False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1916- +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1917- """ ,
1918- )
1919- assert compare_show_string (
1920- df ._show_string_spark (
1921- _emit_ast = session .ast_enabled , _spark_column_names = spark_col_names
1922- ),
1923- """
1924- +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1925- |col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
1926- +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1927- | 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1928- +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1929- """ ,
1930- )
1931- assert compare_show_string (
1932- df ._show_string_spark (
1933- vertical = True ,
1934- _emit_ast = session .ast_enabled ,
1935- _spark_column_names = spark_col_names ,
1936- ),
1937- """
1938- -RECORD 0----------------------
1939- col_1 | 1
1940- col_2 | one
1941- col_3 | 1.1
1942- col_4 | 2017-02-24T12:00:...
1943- col_5 | 20:57:06
1944- col_6 | 2017-02-25
1945- col_7 | True
1946- col_8 | False
1947- col_9 | NULL
1948- col_10 | 61
1949- col_11 | 0.5
1950- col_12 | [1, 2, 3]
1951- col_13 | {'a': 'foo'}
1952- """ ,
1953- )
1954- assert compare_show_string (
1955- df ._show_string_spark (
1956- vertical = True ,
1957- truncate = False ,
1958- _emit_ast = session .ast_enabled ,
1959- _spark_column_names = spark_col_names ,
1960- ),
1961- """
1962- -RECORD 0----------------------------
1963- col_1 | 1
1964- col_2 | one
1965- col_3 | 1.1
1966- col_4 | 2017-02-24T12:00:05.456000
1967- col_5 | 20:57:06
1968- col_6 | 2017-02-25
1969- col_7 | True
1970- col_8 | False
1971- col_9 | NULL
1972- col_10 | 61
1973- col_11 | 0.5
1974- col_12 | [1, 2, 3]
1975- col_13 | {'a': 'foo'}
1976- """ ,
1977- )
1978- assert compare_show_string (
1979- df ._show_string_spark (
1980- truncate = False ,
1981- _emit_ast = session .ast_enabled ,
1982- _spark_column_names = spark_col_names ,
1983- ),
1984- """
1985- +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1986- |col_1|col_2|col_3|col_4 |col_5 |col_6 |col_7|col_8|col_9|col_10|col_11|col_12 |col_13 |
1987- +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1988- |1 |one |1.1 |2017-02-24T12:00:05.456000|20:57:06|2017-02-25|True |False|NULL |61 |0.5 |[1, 2, 3]|{'a': 'foo'}|
1989- +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1990- """ ,
1991- )
1992- assert compare_show_string (
1993- df ._show_string_spark (
1994- truncate = 10 ,
1995- _emit_ast = session .ast_enabled ,
1996- _spark_column_names = spark_col_names ,
1997- ),
1998- """
1999- +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2000- |col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
2001- +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2002- | 1| one| 1.1|2017-02...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': '...|
2003- +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2004- """ ,
2005- )
1901+
1902+ with structured_types_enabled_session (session ) as session :
1903+ schema = StructType (
1904+ [
1905+ StructField ("col_1" , IntegerType ()),
1906+ StructField ("col_2" , StringType ()),
1907+ StructField ("col_3" , FloatType ()),
1908+ StructField ("col_4" , TimestampType ()),
1909+ StructField ("col_5" , TimeType ()),
1910+ StructField ("col_6" , DateType ()),
1911+ StructField ("col_7" , BooleanType ()),
1912+ StructField ("col_8" , BooleanType ()),
1913+ StructField ("col_9" , VariantType ()),
1914+ StructField ("col_10" , BinaryType ()),
1915+ StructField ("col_11" , DecimalType ()),
1916+ StructField ("col_12" , ArrayType (IntegerType ())),
1917+ StructField ("col_13" , MapType (StringType (), StringType ())),
1918+ ]
1919+ )
1920+ df = session .create_dataframe ([data ], schema = schema )
1921+ spark_col_names = [f"col_{ i + 1 } " for i in range (len (data ))]
1922+
1923+ def compare_show_string (actual : str , expected : str ) -> bool :
1924+ actual_lines = actual .strip ().split ("\n " )
1925+ expected_lines = expected .strip ().split ("\n " )
1926+ result = [
1927+ a .strip () == e .strip () for a , e in zip (actual_lines , expected_lines )
1928+ ]
1929+ return all (result )
1930+
1931+ print ("\n " )
1932+ print (
1933+ df ._show_string_spark (
1934+ truncate = False ,
1935+ _emit_ast = session .ast_enabled ,
1936+ _spark_column_names = spark_col_names ,
1937+ ).strip ()
1938+ )
1939+ print ("\n " )
1940+ assert compare_show_string (
1941+ df ._show_string_spark (_emit_ast = session .ast_enabled ).strip (),
1942+ dedent (
1943+ """
1944+ +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1945+ |"COL_1"|"COL_2"|"COL_3"| "COL_4"| "COL_5"| "COL_6"|"COL_7"|"COL_8"|"COL_9"|"COL_10"|"COL_11"| "COL_12"| "COL_13"|
1946+ +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1947+ | 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True| False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1948+ +-------+-------+-------+--------------------+--------+----------+-------+-------+-------+--------+--------+---------+------------+
1949+ """
1950+ ),
1951+ )
1952+ assert compare_show_string (
1953+ df ._show_string_spark (
1954+ _emit_ast = session .ast_enabled , _spark_column_names = spark_col_names
1955+ ),
1956+ dedent (
1957+ """
1958+ +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1959+ |col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
1960+ +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1961+ | 1| one| 1.1|2017-02-24T12:00:...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': 'foo'}|
1962+ +-----+-----+-----+--------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
1963+ """
1964+ ),
1965+ )
1966+ assert compare_show_string (
1967+ df ._show_string_spark (
1968+ vertical = True ,
1969+ _emit_ast = session .ast_enabled ,
1970+ _spark_column_names = spark_col_names ,
1971+ ),
1972+ dedent (
1973+ """
1974+ -RECORD 0----------------------
1975+ col_1 | 1
1976+ col_2 | one
1977+ col_3 | 1.1
1978+ col_4 | 2017-02-24T12:00:...
1979+ col_5 | 20:57:06
1980+ col_6 | 2017-02-25
1981+ col_7 | True
1982+ col_8 | False
1983+ col_9 | NULL
1984+ col_10 | 61
1985+ col_11 | 0.5
1986+ col_12 | [1, 2, 3]
1987+ col_13 | {'a': 'foo'}
1988+ """
1989+ ),
1990+ )
1991+ assert compare_show_string (
1992+ df ._show_string_spark (
1993+ vertical = True ,
1994+ truncate = False ,
1995+ _emit_ast = session .ast_enabled ,
1996+ _spark_column_names = spark_col_names ,
1997+ ),
1998+ dedent (
1999+ """
2000+ -RECORD 0----------------------------
2001+ col_1 | 1
2002+ col_2 | one
2003+ col_3 | 1.1
2004+ col_4 | 2017-02-24T12:00:05.456000
2005+ col_5 | 20:57:06
2006+ col_6 | 2017-02-25
2007+ col_7 | True
2008+ col_8 | False
2009+ col_9 | NULL
2010+ col_10 | 61
2011+ col_11 | 0.5
2012+ col_12 | [1, 2, 3]
2013+ col_13 | {'a': 'foo'}
2014+ """
2015+ ),
2016+ )
2017+ assert compare_show_string (
2018+ df ._show_string_spark (
2019+ truncate = False ,
2020+ _emit_ast = session .ast_enabled ,
2021+ _spark_column_names = spark_col_names ,
2022+ ),
2023+ dedent (
2024+ """
2025+ +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2026+ |col_1|col_2|col_3|col_4 |col_5 |col_6 |col_7|col_8|col_9|col_10|col_11|col_12 |col_13 |
2027+ +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2028+ |1 |one |1.1 |2017-02-24T12:00:05.456000|20:57:06|2017-02-25|True |False|NULL |61 |0.5 |[1, 2, 3]|{'a': 'foo'}|
2029+ +-----+-----+-----+--------------------------+--------+----------+-----+-----+-----+------+------+---------+------------+
2030+ """
2031+ ),
2032+ )
2033+ assert compare_show_string (
2034+ df ._show_string_spark (
2035+ truncate = 10 ,
2036+ _emit_ast = session .ast_enabled ,
2037+ _spark_column_names = spark_col_names ,
2038+ ),
2039+ dedent (
2040+ """
2041+ +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2042+ |col_1|col_2|col_3| col_4| col_5| col_6|col_7|col_8|col_9|col_10|col_11| col_12| col_13|
2043+ +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2044+ | 1| one| 1.1|2017-02...|20:57:06|2017-02-25| True|False| NULL| 61| 0.5|[1, 2, 3]|{'a': '...|
2045+ +-----+-----+-----+----------+--------+----------+-----+-----+-----+------+------+---------+----------+
2046+ """
2047+ ),
2048+ )
20062049
20072050
20082051@pytest .mark .parametrize ("data" , [[0 , 1 , 2 , 3 ], ["" , "a" ], [False , True ], [None ]])
0 commit comments