@@ -67,6 +67,9 @@ class Document(proto.Message):
6767 representations use base64.
6868
6969 This field is a member of `oneof`_ ``source``.
70+ docid (str):
71+ Optional. An internal identifier for
72+ document. Should be loggable (no PII).
7073 mime_type (str):
7174 An IANA published `media type (MIME
7275 type) <https://www.iana.org/assignments/media-types/media-types.xhtml>`__.
@@ -108,6 +111,12 @@ class Document(proto.Message):
108111 Parsed layout of the document.
109112 chunked_document (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument):
110113 Document chunked based on chunking config.
114+ blob_assets (MutableSequence[google.cloud.documentai_v1beta3.types.Document.BlobAsset]):
115+ Optional. The blob assets in this document.
116+ This is used to store the content of the inline
117+ blobs in this document, e.g. image bytes, such
118+ that it can be referenced by other fields in the
119+ document via asset id.
111120 """
112121
113122 class ShardInfo (proto .Message ):
@@ -1819,6 +1828,20 @@ class TextChange(proto.Message):
18191828 message = "Document.Provenance" ,
18201829 )
18211830
1831+ class Annotations (proto .Message ):
1832+ r"""Represents the annotation of a block or a chunk.
1833+
1834+ Attributes:
1835+ description (str):
1836+ The description of the content with this
1837+ annotation.
1838+ """
1839+
1840+ description : str = proto .Field (
1841+ proto .STRING ,
1842+ number = 1 ,
1843+ )
1844+
18221845 class DocumentLayout (proto .Message ):
18231846 r"""Represents the parsed layout of a document as a collection of
18241847 blocks that the document is divided into.
@@ -1851,11 +1874,17 @@ class DocumentLayoutBlock(proto.Message):
18511874 list_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock):
18521875 Block consisting of list content/structure.
18531876
1877+ This field is a member of `oneof`_ ``block``.
1878+ image_block (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock):
1879+ Block consisting of image content.
1880+
18541881 This field is a member of `oneof`_ ``block``.
18551882 block_id (str):
18561883 ID of the block.
18571884 page_span (google.cloud.documentai_v1beta3.types.Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan):
18581885 Page span of the block.
1886+ bounding_box (google.cloud.documentai_v1beta3.types.BoundingPoly):
1887+ Identifies the bounding box for the block.
18591888 """
18601889
18611890 class LayoutPageSpan (proto .Message ):
@@ -2028,6 +2057,74 @@ class LayoutListEntry(proto.Message):
20282057 message = "Document.DocumentLayout.DocumentLayoutBlock" ,
20292058 )
20302059
2060+ class LayoutImageBlock (proto .Message ):
2061+ r"""Represents an image type block.
2062+
2063+ This message has `oneof`_ fields (mutually exclusive fields).
2064+ For each oneof, at most one member field can be set at the same time.
2065+ Setting any member of the oneof automatically clears all other
2066+ members.
2067+
2068+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2069+
2070+ Attributes:
2071+ blob_asset_id (str):
2072+ Optional. Asset id of the inline image. If set, find the
2073+ image content in the blob_assets field.
2074+
2075+ This field is a member of `oneof`_ ``image_source``.
2076+ gcs_uri (str):
2077+ Optional. Google Cloud Storage uri of the
2078+ image.
2079+
2080+ This field is a member of `oneof`_ ``image_source``.
2081+ data_uri (str):
2082+ Optional. Data uri of the image. It is composed of four
2083+ parts: a prefix (data:), a MIME type indicating the type of
2084+ data, an optional base64 token if non-textual, and the data
2085+ itself: data:[][;base64],
2086+
2087+ This field is a member of `oneof`_ ``image_source``.
2088+ mime_type (str):
2089+ Mime type of the image. An IANA published [media type (MIME
2090+ type)]
2091+ (https://www.iana.org/assignments/media-types/media-types.xhtml).
2092+ image_text (str):
2093+ Text extracted from the image using OCR or
2094+ alt text describing the image.
2095+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2096+ Annotation of the image block.
2097+ """
2098+
2099+ blob_asset_id : str = proto .Field (
2100+ proto .STRING ,
2101+ number = 4 ,
2102+ oneof = "image_source" ,
2103+ )
2104+ gcs_uri : str = proto .Field (
2105+ proto .STRING ,
2106+ number = 5 ,
2107+ oneof = "image_source" ,
2108+ )
2109+ data_uri : str = proto .Field (
2110+ proto .STRING ,
2111+ number = 6 ,
2112+ oneof = "image_source" ,
2113+ )
2114+ mime_type : str = proto .Field (
2115+ proto .STRING ,
2116+ number = 1 ,
2117+ )
2118+ image_text : str = proto .Field (
2119+ proto .STRING ,
2120+ number = 2 ,
2121+ )
2122+ annotations : "Document.Annotations" = proto .Field (
2123+ proto .MESSAGE ,
2124+ number = 3 ,
2125+ message = "Document.Annotations" ,
2126+ )
2127+
20312128 text_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutTextBlock" = proto .Field (
20322129 proto .MESSAGE ,
20332130 number = 2 ,
@@ -2046,6 +2143,12 @@ class LayoutListEntry(proto.Message):
20462143 oneof = "block" ,
20472144 message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutListBlock" ,
20482145 )
2146+ image_block : "Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock" = proto .Field (
2147+ proto .MESSAGE ,
2148+ number = 7 ,
2149+ oneof = "block" ,
2150+ message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutImageBlock" ,
2151+ )
20492152 block_id : str = proto .Field (
20502153 proto .STRING ,
20512154 number = 1 ,
@@ -2055,6 +2158,11 @@ class LayoutListEntry(proto.Message):
20552158 number = 5 ,
20562159 message = "Document.DocumentLayout.DocumentLayoutBlock.LayoutPageSpan" ,
20572160 )
2161+ bounding_box : geometry .BoundingPoly = proto .Field (
2162+ proto .MESSAGE ,
2163+ number = 6 ,
2164+ message = geometry .BoundingPoly ,
2165+ )
20582166
20592167 blocks : MutableSequence [
20602168 "Document.DocumentLayout.DocumentLayoutBlock"
@@ -2088,6 +2196,8 @@ class Chunk(proto.Message):
20882196 Page headers associated with the chunk.
20892197 page_footers (MutableSequence[google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ChunkPageFooter]):
20902198 Page footers associated with the chunk.
2199+ chunk_fields (MutableSequence[google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ChunkField]):
2200+ Chunk fields inside this chunk.
20912201 """
20922202
20932203 class ChunkPageSpan (proto .Message ):
@@ -2149,6 +2259,112 @@ class ChunkPageFooter(proto.Message):
21492259 message = "Document.ChunkedDocument.Chunk.ChunkPageSpan" ,
21502260 )
21512261
2262+ class ImageChunkField (proto .Message ):
2263+ r"""The image chunk field in the chunk.
2264+
2265+ This message has `oneof`_ fields (mutually exclusive fields).
2266+ For each oneof, at most one member field can be set at the same time.
2267+ Setting any member of the oneof automatically clears all other
2268+ members.
2269+
2270+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2271+
2272+ Attributes:
2273+ blob_asset_id (str):
2274+ Optional. Asset id of the inline image. If set, find the
2275+ image content in the blob_assets field.
2276+
2277+ This field is a member of `oneof`_ ``image_source``.
2278+ gcs_uri (str):
2279+ Optional. Google Cloud Storage uri of the
2280+ image.
2281+
2282+ This field is a member of `oneof`_ ``image_source``.
2283+ data_uri (str):
2284+ Optional. Data uri of the image. It is composed of four
2285+ parts: a prefix (data:), a MIME type indicating the type of
2286+ data, an optional base64 token if non-textual, and the data
2287+ itself: data:[][;base64],
2288+
2289+ This field is a member of `oneof`_ ``image_source``.
2290+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2291+ Annotation of the image chunk field.
2292+ """
2293+
2294+ blob_asset_id : str = proto .Field (
2295+ proto .STRING ,
2296+ number = 1 ,
2297+ oneof = "image_source" ,
2298+ )
2299+ gcs_uri : str = proto .Field (
2300+ proto .STRING ,
2301+ number = 2 ,
2302+ oneof = "image_source" ,
2303+ )
2304+ data_uri : str = proto .Field (
2305+ proto .STRING ,
2306+ number = 3 ,
2307+ oneof = "image_source" ,
2308+ )
2309+ annotations : "Document.Annotations" = proto .Field (
2310+ proto .MESSAGE ,
2311+ number = 4 ,
2312+ message = "Document.Annotations" ,
2313+ )
2314+
2315+ class TableChunkField (proto .Message ):
2316+ r"""The table chunk field in the chunk.
2317+
2318+ Attributes:
2319+ annotations (google.cloud.documentai_v1beta3.types.Document.Annotations):
2320+ Annotation of the table chunk field.
2321+ """
2322+
2323+ annotations : "Document.Annotations" = proto .Field (
2324+ proto .MESSAGE ,
2325+ number = 1 ,
2326+ message = "Document.Annotations" ,
2327+ )
2328+
2329+ class ChunkField (proto .Message ):
2330+ r"""The chunk field in the chunk. A chunk field could be one of
2331+ the various types (e.g. image, table) supported.
2332+
2333+ This message has `oneof`_ fields (mutually exclusive fields).
2334+ For each oneof, at most one member field can be set at the same time.
2335+ Setting any member of the oneof automatically clears all other
2336+ members.
2337+
2338+ .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
2339+
2340+ Attributes:
2341+ image_chunk_field (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.ImageChunkField):
2342+ The image chunk field in the chunk.
2343+
2344+ This field is a member of `oneof`_ ``field_type``.
2345+ table_chunk_field (google.cloud.documentai_v1beta3.types.Document.ChunkedDocument.Chunk.TableChunkField):
2346+ The table chunk field in the chunk.
2347+
2348+ This field is a member of `oneof`_ ``field_type``.
2349+ """
2350+
2351+ image_chunk_field : "Document.ChunkedDocument.Chunk.ImageChunkField" = (
2352+ proto .Field (
2353+ proto .MESSAGE ,
2354+ number = 1 ,
2355+ oneof = "field_type" ,
2356+ message = "Document.ChunkedDocument.Chunk.ImageChunkField" ,
2357+ )
2358+ )
2359+ table_chunk_field : "Document.ChunkedDocument.Chunk.TableChunkField" = (
2360+ proto .Field (
2361+ proto .MESSAGE ,
2362+ number = 2 ,
2363+ oneof = "field_type" ,
2364+ message = "Document.ChunkedDocument.Chunk.TableChunkField" ,
2365+ )
2366+ )
2367+
21522368 chunk_id : str = proto .Field (
21532369 proto .STRING ,
21542370 number = 1 ,
@@ -2180,13 +2396,50 @@ class ChunkPageFooter(proto.Message):
21802396 number = 6 ,
21812397 message = "Document.ChunkedDocument.Chunk.ChunkPageFooter" ,
21822398 )
2399+ chunk_fields : MutableSequence [
2400+ "Document.ChunkedDocument.Chunk.ChunkField"
2401+ ] = proto .RepeatedField (
2402+ proto .MESSAGE ,
2403+ number = 7 ,
2404+ message = "Document.ChunkedDocument.Chunk.ChunkField" ,
2405+ )
21832406
21842407 chunks : MutableSequence ["Document.ChunkedDocument.Chunk" ] = proto .RepeatedField (
21852408 proto .MESSAGE ,
21862409 number = 1 ,
21872410 message = "Document.ChunkedDocument.Chunk" ,
21882411 )
21892412
2413+ class BlobAsset (proto .Message ):
2414+ r"""Represents a blob asset. It's used to store the content of
2415+ the inline blob in this document, e.g. image bytes, such that it
2416+ can be referenced by other fields in the document via asset id.
2417+
2418+ Attributes:
2419+ asset_id (str):
2420+ Optional. The id of the blob asset.
2421+ content (bytes):
2422+ Optional. The content of the blob asset, e.g.
2423+ image bytes.
2424+ mime_type (str):
2425+ The mime type of the blob asset. An IANA published `media
2426+ type (MIME
2427+ type) <https://www.iana.org/assignments/media-types/media-types.xhtml>`__.
2428+ """
2429+
2430+ asset_id : str = proto .Field (
2431+ proto .STRING ,
2432+ number = 1 ,
2433+ )
2434+ content : bytes = proto .Field (
2435+ proto .BYTES ,
2436+ number = 2 ,
2437+ )
2438+ mime_type : str = proto .Field (
2439+ proto .STRING ,
2440+ number = 3 ,
2441+ )
2442+
21902443 uri : str = proto .Field (
21912444 proto .STRING ,
21922445 number = 1 ,
@@ -2197,6 +2450,10 @@ class ChunkPageFooter(proto.Message):
21972450 number = 2 ,
21982451 oneof = "source" ,
21992452 )
2453+ docid : str = proto .Field (
2454+ proto .STRING ,
2455+ number = 15 ,
2456+ )
22002457 mime_type : str = proto .Field (
22012458 proto .STRING ,
22022459 number = 3 ,
@@ -2255,6 +2512,11 @@ class ChunkPageFooter(proto.Message):
22552512 number = 18 ,
22562513 message = ChunkedDocument ,
22572514 )
2515+ blob_assets : MutableSequence [BlobAsset ] = proto .RepeatedField (
2516+ proto .MESSAGE ,
2517+ number = 19 ,
2518+ message = BlobAsset ,
2519+ )
22582520
22592521
22602522class RevisionRef (proto .Message ):
0 commit comments