@@ -8,13 +8,37 @@ class SchemasNotEqualError(Exception):
88 pass
99
1010
11- def assert_schema_equality (s1 , s2 , ignore_nullable = False ):
12- if ignore_nullable :
13- assert_schema_equality_ignore_nullable (s1 , s2 )
14- else :
11+ def assert_schema_equality (s1 , s2 , ignore_nullable = False , ignore_metadata = False ):
12+ if not ignore_nullable and not ignore_metadata :
1513 assert_basic_schema_equality (s1 , s2 )
14+ else :
15+ assert_schema_equality_full (s1 , s2 , ignore_nullable , ignore_metadata )
16+
1617
18+ def assert_schema_equality_full (s1 , s2 , ignore_nullable = False , ignore_metadata = False ):
19+ def inner (s1 , s2 , ignore_nullable , ignore_metadata ):
20+ if len (s1 ) != len (s2 ):
21+ return False
22+ zipped = list (six .moves .zip_longest (s1 , s2 ))
23+ for sf1 , sf2 in zipped :
24+ if not are_structfields_equal (sf1 , sf2 , ignore_nullable , ignore_metadata ):
25+ return False
26+ return True
1727
28+ if not inner (s1 , s2 , ignore_nullable , ignore_metadata ):
29+ t = PrettyTable (["schema1" , "schema2" ])
30+ zipped = list (six .moves .zip_longest (s1 , s2 ))
31+ for sf1 , sf2 in zipped :
32+ if are_structfields_equal (sf1 , sf2 , True ):
33+ t .add_row ([blue (sf1 ), blue (sf2 )])
34+ else :
35+ t .add_row ([sf1 , sf2 ])
36+ raise SchemasNotEqualError ("\n " + t .get_string ())
37+
38+
39+ # deprecate this
40+ # perhaps it is a little faster, but do we really need this?
41+ # I think schema equality operations are really fast to begin with
1842def assert_basic_schema_equality (s1 , s2 ):
1943 if s1 != s2 :
2044 t = PrettyTable (["schema1" , "schema2" ])
@@ -27,8 +51,10 @@ def assert_basic_schema_equality(s1, s2):
2751 raise SchemasNotEqualError ("\n " + t .get_string ())
2852
2953
54+
55+ # deprecate this. ignore_nullable should be a flag.
3056def assert_schema_equality_ignore_nullable (s1 , s2 ):
31- if are_schemas_equal_ignore_nullable (s1 , s2 ) == False :
57+ if not are_schemas_equal_ignore_nullable (s1 , s2 ):
3258 t = PrettyTable (["schema1" , "schema2" ])
3359 zipped = list (six .moves .zip_longest (s1 , s2 ))
3460 for sf1 , sf2 in zipped :
@@ -39,6 +65,7 @@ def assert_schema_equality_ignore_nullable(s1, s2):
3965 raise SchemasNotEqualError ("\n " + t .get_string ())
4066
4167
68+ # deprecate this. ignore_nullable should be a flag.
4269def are_schemas_equal_ignore_nullable (s1 , s2 ):
4370 if len (s1 ) != len (s2 ):
4471 return False
@@ -49,21 +76,25 @@ def are_schemas_equal_ignore_nullable(s1, s2):
4976 return True
5077
5178
52- def are_structfields_equal (sf1 , sf2 , ignore_nullability = False ):
53- if ignore_nullability :
79+ # "ignore_nullability" should be "ignore_nullable" for consistent terminology
80+ def are_structfields_equal (sf1 , sf2 , ignore_nullability = False , ignore_metadata = False ):
81+ if not ignore_nullability and not ignore_metadata :
82+ return sf1 == sf2
83+ else :
5484 if sf1 is None or sf2 is None :
5585 if sf1 is None and sf2 is None :
5686 return True
5787 else :
5888 return False
5989 if sf1 .name != sf2 .name :
6090 return False
91+ if not ignore_metadata and sf1 .metadata != sf2 .metadata :
92+ return False
6193 else :
6294 return are_datatypes_equal_ignore_nullable (sf1 .dataType , sf2 .dataType )
63- else :
64- return sf1 == sf2
6595
6696
97+ # deprecate this
6798def are_datatypes_equal_ignore_nullable (dt1 , dt2 ):
6899 """Checks if datatypes are equal, descending into structs and arrays to
69100 ignore nullability.
0 commit comments