1111# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212# See the License for the specific language governing permissions and
1313# limitations under the License.
14- from pyarrow import ListArray , StructArray , Table , timestamp
14+ from pyarrow import ListArray , StructArray , Table
1515from pyarrow .types import is_struct
1616
1717from pymongoarrow .types import _BsonArrowTypes , _get_internal_typemap
1818
19- try :
20- from pymongoarrow .lib import (
21- BinaryBuilder ,
22- BoolBuilder ,
23- BuilderManager ,
24- CodeBuilder ,
25- Date32Builder ,
26- Date64Builder ,
27- DatetimeBuilder ,
28- Decimal128Builder ,
29- DocumentBuilder ,
30- DoubleBuilder ,
31- Int32Builder ,
32- Int64Builder ,
33- ListBuilder ,
34- NullBuilder ,
35- ObjectIdBuilder ,
36- StringBuilder ,
37- )
38-
39- _TYPE_TO_BUILDER_CLS = {
40- _BsonArrowTypes .int32 : Int32Builder ,
41- _BsonArrowTypes .int64 : Int64Builder ,
42- _BsonArrowTypes .double : DoubleBuilder ,
43- _BsonArrowTypes .datetime : DatetimeBuilder ,
44- _BsonArrowTypes .objectid : ObjectIdBuilder ,
45- _BsonArrowTypes .decimal128 : Decimal128Builder ,
46- _BsonArrowTypes .string : StringBuilder ,
47- _BsonArrowTypes .bool : BoolBuilder ,
48- _BsonArrowTypes .document : DocumentBuilder ,
49- _BsonArrowTypes .array : ListBuilder ,
50- _BsonArrowTypes .binary : BinaryBuilder ,
51- _BsonArrowTypes .code : CodeBuilder ,
52- _BsonArrowTypes .date32 : Date32Builder ,
53- _BsonArrowTypes .date64 : Date64Builder ,
54- _BsonArrowTypes .null : NullBuilder ,
55- }
56- except ImportError :
57- pass
58-
5919
6020class PyMongoArrowContext :
6121 """A context for converting BSON-formatted data to an Arrow Table."""
@@ -73,67 +33,68 @@ def __init__(self, schema, codec_options=None):
7333 self .tzinfo = codec_options .tzinfo
7434 else :
7535 self .tzinfo = None
76- builder_map = {}
36+ schema_map = {}
7737 if self .schema is not None :
7838 str_type_map = _get_internal_typemap (schema .typemap )
79- _parse_types (str_type_map , builder_map , self .tzinfo )
39+ _parse_types (str_type_map , schema_map , self .tzinfo )
40+
41+ # Delayed import to prevent import errors for unbuilt library.
42+ from pymongoarrow .lib import BuilderManager
8043
81- self .manager = BuilderManager (builder_map , self .schema is not None , self .tzinfo )
44+ self .manager = BuilderManager (schema_map , self .schema is not None , self .tzinfo )
8245
8346 def process_bson_stream (self , stream ):
8447 self .manager .process_bson_stream (stream , len (stream ))
8548
8649 def finish (self ):
87- builder_map = _parse_builder_map (self .manager .finish ())
88- arrays = list (builder_map .values ())
50+ array_map = _parse_array_map (self .manager .finish ())
51+ arrays = list (array_map .values ())
8952 if self .schema is not None :
9053 return Table .from_arrays (arrays = arrays , schema = self .schema .to_arrow ())
91- return Table .from_arrays (arrays = arrays , names = list (builder_map .keys ()))
54+ return Table .from_arrays (arrays = arrays , names = list (array_map .keys ()))
9255
9356
94- def _parse_builder_map ( builder_map ):
57+ def _parse_array_map ( array_map ):
9558 # Handle nested builders.
9659 to_remove = []
9760 # Traverse the builder map right to left.
98- for key , value in reversed (builder_map .items ()):
61+ for key , value in reversed (array_map .items ()):
9962 field = key .decode ("utf-8" )
100- if isinstance (value , DocumentBuilder ):
101- arr = value .finish ()
102- full_names = [f"{ field } .{ name .decode ('utf-8' )} " for name in arr ]
103- arrs = [builder_map [c .encode ("utf-8" )] for c in full_names ]
104- builder_map [field ] = StructArray .from_arrays (arrs , names = arr )
63+ if value .type_marker == _BsonArrowTypes .document :
64+ full_names = [f"{ field } .{ name .decode ('utf-8' )} " for name in value ]
65+ arrs = [array_map [c .encode ("utf-8" )] for c in full_names ]
66+ array_map [field ] = StructArray .from_arrays (arrs , names = value )
10567 to_remove .extend (full_names )
106- elif isinstance (value , ListBuilder ):
107- arr = value .finish ()
68+ elif value .type_marker == _BsonArrowTypes .array :
10869 child_name = field + "[]"
10970 to_remove .append (child_name )
110- child = builder_map [child_name .encode ("utf-8" )]
111- builder_map [key ] = ListArray .from_arrays (arr , child )
112- else :
113- builder_map [key ] = value .finish ()
71+ child = array_map [child_name .encode ("utf-8" )]
72+ array_map [key ] = ListArray .from_arrays (value , child )
11473
11574 for field in to_remove :
11675 key = field .encode ("utf-8" )
117- if key in builder_map :
118- del builder_map [key ]
76+ if key in array_map :
77+ del array_map [key ]
11978
79+ return array_map
12080
121- def _parse_types (str_type_map , builder_map , tzinfo ):
81+
82+ def _parse_types (str_type_map , schema_map , tzinfo ):
12283 for fname , (ftype , arrow_type ) in str_type_map .items ():
123- builder_cls = _TYPE_TO_BUILDER_CLS [ftype ]
12484 encoded_fname = fname .encode ("utf-8" )
85+ schema_map [encoded_fname ] = ftype , arrow_type
12586
12687 # special-case nested builders
127- if builder_cls == DocumentBuilder :
88+ if ftype == _BsonArrowTypes . document :
12889 # construct a sub type map here
12990 sub_type_map = {}
13091 for i in range (arrow_type .num_fields ):
13192 field = arrow_type [i ]
13293 sub_name = f"{ fname } .{ field .name } "
13394 sub_type_map [sub_name ] = field .type
13495 sub_type_map = _get_internal_typemap (sub_type_map )
135- _parse_types (sub_type_map , builder_map , tzinfo )
136- elif builder_cls == ListBuilder :
96+ _parse_types (sub_type_map , schema_map , tzinfo )
97+ elif ftype == _BsonArrowTypes . array :
13798 if is_struct (arrow_type .value_type ):
13899 # construct a sub type map here
139100 sub_type_map = {}
@@ -142,15 +103,4 @@ def _parse_types(str_type_map, builder_map, tzinfo):
142103 sub_name = f"{ fname } [].{ field .name } "
143104 sub_type_map [sub_name ] = field .type
144105 sub_type_map = _get_internal_typemap (sub_type_map )
145- _parse_types (sub_type_map , sub_type_map , tzinfo )
146-
147- # special-case initializing builders for parameterized types
148- if builder_cls == DatetimeBuilder :
149- if tzinfo is not None and arrow_type .tz is None :
150- arrow_type = timestamp (arrow_type .unit , tz = tzinfo ) # noqa: PLW2901
151- builder_map [encoded_fname ] = DatetimeBuilder (dtype = arrow_type )
152- elif builder_cls == BinaryBuilder :
153- subtype = arrow_type .subtype
154- builder_map [fname ] = BinaryBuilder (subtype )
155- else :
156- builder_map [fname ] = builder_cls ()
106+ _parse_types (sub_type_map , schema_map , tzinfo )
0 commit comments