mongodb-labs
diff --git a/‎.github/workflows/benchmark.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/benchmark.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎bindings/python/docs/source/changelog.rst
Lines changed: 4 additions & 1 deletion b/‎bindings/python/docs/source/changelog.rst
Lines changed: 4 additions & 1 deletion
diff --git a/‎bindings/python/docs/source/quickstart.rst
Lines changed: 25 additions & 0 deletions b/‎bindings/python/docs/source/quickstart.rst
Lines changed: 25 additions & 0 deletions
diff --git a/‎bindings/python/docs/source/supported_types.rst
Lines changed: 2 additions & 0 deletions b/‎bindings/python/docs/source/supported_types.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎bindings/python/pymongoarrow/context.py
Lines changed: 11 additions & 3 deletions b/‎bindings/python/pymongoarrow/context.py
Lines changed: 11 additions & 3 deletions
@@ -38,10 +38,6 @@ jobs:
       - name: Install Python dependencies
         run: |
           python -m pip install -U pip
-      - name: Install pymongoarrow
-        run: |
-          # Install the library
-          LIBBSON_INSTALL_DIR=$(pwd)/libbson python -m pip install -vvv -e ".[test]"
       - name: Run tests
         run: |
           set -eu
@@ -63,10 +59,14 @@ jobs:
           # the current target that this PR will be merged into is HEAD^1.
           git update-ref refs/bm/merge-target $(git log -n 1 --pretty=format:"%H" main --)
           git checkout --force refs/bm/pr --
+          # Install the library
+          LIBBSON_INSTALL_DIR=$(pwd)/libbson python -m pip install -vvv -e ".[test]"
           run_asv
 
 
           git checkout --force refs/bm/merge-target --
+          # Install the library
+          LIBBSON_INSTALL_DIR=$(pwd)/libbson python -m pip install -vvv -e ".[test]"
           run_asv
 
           asv compare refs/bm/merge-target refs/bm/pr --
 
@@ -1,13 +1,16 @@
 Changelog
 =========
 
+Changes in Version 0.7.0
+------------------------
+- Added support for BSON Embedded Document type.
+
 Changes in Version 0.6.3
 ------------------------
 
 - Added wheels for Linux AArch64 and Python 3.11.
 - Fixed handling of time zones in schema auto-discovery.
 
-
 Changes in Version 0.6.2
 ------------------------
 Note: We did not publish 0.6.0 or 0.6.1 due to technical difficulties.
 
@@ -71,6 +71,12 @@ There are multiple permissible type-identifiers for each supported BSON type.
 For a full-list of supported types and associated type-identifiers see
 :doc:`supported_types`.
 
+Nested data (embedded documents) are also supported::
+
+  from pymongoarrow.api import Schema
+  schema = Schema({'_id': int, 'amount': float, 'account': { 'name': str, 'account_number': int}})
+
+
 .. note::
 
    For all of the examples below, the schema can be omitted like so::
@@ -80,6 +86,7 @@ For a full-list of supported types and associated type-identifiers see
    In this case, PyMongoArrow will try to automatically apply a schema based on
    the data contained in the first batch.
 
+
 Find operations
 ---------------
 We are now ready to query our data. Let's start by running a ``find``
@@ -99,6 +106,12 @@ Or as :class:`numpy.ndarray` instances::
 In the NumPy case, the return value is a dictionary where the keys are field
 names and values are the corresponding arrays.
 
+Nested data (embedded documents) are also supported::
+
+  from pymongoarrow.api import Schema
+  schema = Schema({'_id': int, 'amount': float, 'account': { 'name': str, 'account_number': int}})
+  arrow_table = client.db.data.find_arrow_all({'amount': {'$gt': 0}}, schema=schema)
+
 Aggregate operations
 --------------------
 Running ``aggregate`` operations is similar to ``find``. Here is an example of
@@ -111,6 +124,14 @@ an aggregation that loads all records with an ``amount`` less than 10::
   # numpy
   ndarrays = client.db.data.aggregate_numpy_all([{'$match': {'amount': {'$lte': 10}}}], schema=schema)
 
+Nested data (embedded documents) are also supported::
+
+  from pymongoarrow.api import Schema
+  schema = Schema({'_id': int, 'amount': float, 'account': { 'name': str, 'account_number': int}})
+  arrow_table = client.db.data.find_arrow_all({'amount': {'$gt': 0}}, schema=schema)
+  arrow_table = client.db.data.aggregate_arrow_all([{'$match': {'amount': {'$lte': 10}}}], schema=schema)
+
+
 Writing to other formats
 ------------------------
 Result sets that have been loaded as Arrow's :class:`~pyarrow.Table` type can
@@ -128,6 +149,10 @@ referenced by the variable ``df`` to a CSV file ``out.csv``, run::
 
   df.to_csv('out.csv', index=False)
 
+.. note::
+
+  Nested data is supported for parquet read/write but is not well supported
+  by Arrow or Pandas for CSV read/write.
 
 Writing back to MongoDB
 -----------------------
 
@@ -17,6 +17,8 @@ Support for additional types will be added in subsequent releases.
      - Type Identifiers
    * - String
      - :class:`py.str`, an instance of :class:`pyarrow.string`
+   * - Embedded document
+     - :class:`py.dict`, and instance of :class:`pyarrow.struct`
    * - ObjectId
      - :class:`py.bytes`, :class:`bson.ObjectId`, an instance of :class:`pymongoarrow.types.ObjectIdType`, an instance of :class:`pyarrow.FixedSizeBinaryScalar`
    * - Boolean
 
@@ -16,6 +16,7 @@
 from pymongoarrow.lib import (
     BoolBuilder,
     DatetimeBuilder,
+    DocumentBuilder,
     DoubleBuilder,
     Int32Builder,
     Int64Builder,
@@ -33,6 +34,7 @@
     _BsonArrowTypes.decimal128_str: StringBuilder,
     _BsonArrowTypes.string: StringBuilder,
     _BsonArrowTypes.bool: BoolBuilder,
+    _BsonArrowTypes.document: DocumentBuilder,
 }
 
 
@@ -68,16 +70,22 @@ def from_schema(cls, schema, codec_options=DEFAULT_CODEC_OPTIONS):
             return cls(schema, {}, codec_options)
 
         builder_map = {}
+        tzinfo = codec_options.tzinfo
+
         str_type_map = _get_internal_typemap(schema.typemap)
         for fname, ftype in str_type_map.items():
             builder_cls = _TYPE_TO_BUILDER_CLS[ftype]
             encoded_fname = fname.encode("utf-8")
+
             # special-case initializing builders for parameterized types
             if builder_cls == DatetimeBuilder:
                 arrow_type = schema.typemap[fname]
-                if codec_options.tzinfo is not None and arrow_type.tz is None:
-                    arrow_type = timestamp(arrow_type.unit, tz=codec_options.tzinfo)
-                builder_map[encoded_fname] = builder_cls(dtype=arrow_type)
+                if tzinfo is not None and arrow_type.tz is None:
+                    arrow_type = timestamp(arrow_type.unit, tz=tzinfo)
+                builder_map[encoded_fname] = DatetimeBuilder(dtype=arrow_type)
+            elif builder_cls == DocumentBuilder:
+                arrow_type = schema.typemap[fname]
+                builder_map[encoded_fname] = DocumentBuilder(arrow_type, tzinfo)
             else:
                 builder_map[encoded_fname] = builder_cls()
         return cls(schema, builder_map)