44
55
66from abc import ABC
7- from typing import Iterable , Set , Dict , List , Optional , Union
7+ from typing import Iterable , Set , Dict , List , Optional , Tuple , Union
88import numbers
99import re
1010
@@ -61,32 +61,36 @@ class ViewRepresentation(OperatorPlatform, ABC):
6161 """Structure to represent the columns of a query or a table.
6262 Abstract base class."""
6363
64- column_names : List [str ]
65- sources : List [
64+ column_names : Tuple [str ]
65+ sources : Tuple [
6666 "ViewRepresentation"
6767 ] # https://www.python.org/dev/peps/pep-0484/#forward-references
6868
6969 def __init__ (
7070 self ,
7171 column_names : Iterable [str ],
7272 * ,
73- sources : Optional [List ["ViewRepresentation" ]] = None ,
73+ sources : Optional [Iterable ["ViewRepresentation" ]] = None ,
7474 node_name : str ,
7575 ):
76- if isinstance ( column_names , str ):
77- column_names = [ column_names ]
78- else :
79- column_names = list (column_names ) # make sure a list and a disjoint copy
80- self . column_names = column_names
81- assert len (self . column_names ) > 0
82- for v in self . column_names :
76+ # don't let instances masquarade as iterables
77+ assert not isinstance ( column_names , str )
78+ assert not isinstance ( sources , OperatorPlatform )
79+ if not isinstance (column_names , tuple ):
80+ column_names = tuple ( column_names )
81+ assert len (column_names ) > 0
82+ for v in column_names :
8383 assert isinstance (v , str )
8484 assert len (column_names ) == len (set (column_names ))
85+ self .column_names = column_names
8586 if sources is None :
86- sources = []
87+ sources = ()
88+ else :
89+ if not isinstance (sources , tuple ):
90+ sources = tuple (sources )
8791 for si in sources :
8892 assert isinstance (si , ViewRepresentation )
89- self .sources = [ si for si in sources ]
93+ self .sources = sources
9094 OperatorPlatform .__init__ (self , node_name = node_name )
9195
9296 def column_map (self ) -> collections .OrderedDict :
@@ -155,7 +159,7 @@ def columns_used_from_sources(self, using=None):
155159 raise NotImplementedError ("base method called" )
156160
157161 def columns_produced (self ):
158- return self .column_names . copy ( )
162+ return list ( self .column_names )
159163
160164 def _columns_used_implementation (self , * , using , columns_currently_using_records ):
161165 self_merged_rep_id = self .merged_rep_id ()
@@ -383,7 +387,7 @@ def as_table_description(
383387 ):
384388 return TableDescription (
385389 table_name = table_name ,
386- column_names = self .column_names . copy () ,
390+ column_names = self .column_names ,
387391 qualifiers = qualifiers ,
388392 )
389393
@@ -937,7 +941,7 @@ def __init__(
937941 if isinstance (reverse , str ):
938942 reverse = [reverse ]
939943 self .reverse = reverse
940- column_names = source .column_names . copy ( )
944+ column_names = list ( source .column_names )
941945 consumed_cols = set ()
942946 for (k , o ) in parsed_ops .items ():
943947 o .get_column_names (consumed_cols )
@@ -1547,7 +1551,7 @@ def _equiv_nodes(self, other):
15471551 return True
15481552
15491553 def columns_used_from_sources (self , using = None ):
1550- cols = set (self .column_names . copy () )
1554+ cols = set (self .column_names )
15511555 if using is None :
15521556 return [cols ]
15531557 cols = cols .intersection (using ).union (self .order_columns )
@@ -1705,7 +1709,7 @@ def __init__(self, a, b, *, by, jointype, check_all_common_keys_in_by=False):
17051709 "Different definition of table object on a/b for: " + k
17061710 )
17071711 # check columns
1708- column_names = a .column_names . copy ( )
1712+ column_names = list ( a .column_names )
17091713 columns_seen = set (column_names )
17101714 for ci in b .column_names :
17111715 if ci not in columns_seen :
@@ -1729,6 +1733,12 @@ def __init__(self, a, b, *, by, jointype, check_all_common_keys_in_by=False):
17291733 "check_all_common_keys_in_by set, and the following common keys are are not in the by-clause: "
17301734 + str (missing_common )
17311735 )
1736+ # try to re-use column names if possible, saves space in deeply nested join trees.
1737+ column_names = tuple (column_names )
1738+ if isinstance (a .column_names , tuple ) and (set (column_names ) == set (a .column_names )):
1739+ column_names = a .column_names
1740+ elif isinstance (b .column_names , tuple ) and (set (column_names ) == set (b .column_names )):
1741+ column_names = b .column_names
17321742 ViewRepresentation .__init__ (
17331743 self ,
17341744 column_names = column_names ,
@@ -1825,7 +1835,7 @@ def __init__(self, a, b, *, id_column="table_name", a_name="a", b_name="b"):
18251835 raise ValueError ("a and b should have same set of column names" )
18261836 if id_column is not None and id_column in sources [0 ].column_names :
18271837 raise ValueError ("id_column should not be an input table column name" )
1828- column_names = sources [0 ].column_names . copy ( )
1838+ column_names = list ( sources [0 ].column_names )
18291839 if id_column is not None :
18301840 assert id_column not in column_names
18311841 column_names .append (id_column )
0 commit comments