@@ -67,7 +67,10 @@ def _retrieve_all_blueprints(self, project_arn: str):
6767 blueprint_arn = blueprint_arn , stage = "LIVE"
6868 )
6969 _blueprint = response .get ("blueprint" )
70- _blueprint ["blueprintVersion" ] = blueprint ["blueprintVersion" ]
70+ # Add blueprintVersion with default if missing
71+ _blueprint ["blueprintVersion" ] = blueprint .get (
72+ "blueprintVersion" , "1"
73+ )
7174 all_blueprints .append (_blueprint )
7275 logger .info (
7376 f"{ len (all_blueprints )} blueprints retrieved for { project_arn } "
@@ -88,20 +91,24 @@ def _retrieve_all_blueprints(self, project_arn: str):
8891
8992 def _transform_json_schema_to_bedrock_blueprint (self , json_schema : dict ) -> dict :
9093 """
91- Transform a standard JSON Schema to Bedrock Document Analysis blueprint format.
94+ Transform JSON Schema (draft 2020-12) to BDA blueprint format (draft-07) .
9295
93- Bedrock expects:
94- - "class" and "description" at top level (not $id)
95- - "instruction" and "inferenceType" for each field property
96- - Sections in definitions with references in properties
96+ BDA requirements based on working schemas:
97+ - Uses "definitions" (not "$defs") - JSON Schema draft-07
98+ - References use "#/definitions/" (not "#/$defs/")
99+ - Only LEAF properties get "inferenceType" and "instruction"
100+ - Object/array types do NOT get these fields
97101
98102 Args:
99- json_schema: Standard JSON Schema from migration
103+ json_schema: JSON Schema from configuration
100104
101105 Returns:
102- Blueprint schema in Bedrock format
106+ Blueprint schema in BDA-compatible draft-07 format
103107 """
104- # Start with the basic structure Bedrock expects
108+ # Extract $defs and convert to definitions
109+ defs = json_schema .get (DEFS_FIELD , {})
110+
111+ # Start with BDA-expected structure
105112 blueprint = {
106113 "$schema" : "http://json-schema.org/draft-07/schema#" ,
107114 "class" : json_schema .get (
@@ -111,17 +118,86 @@ def _transform_json_schema_to_bedrock_blueprint(self, json_schema: dict) -> dict
111118 SCHEMA_DESCRIPTION , "Document schema for data extraction"
112119 ),
113120 "type" : TYPE_OBJECT ,
114- "definitions" : deepcopy (json_schema .get (DEFS_FIELD , {})),
115- "properties" : {},
116121 }
117122
118- # Transform each property to add Bedrock-specific fields
123+ # Convert definitions and add BDA fields to leaf properties only
124+ if defs :
125+ blueprint ["definitions" ] = {}
126+ for def_name , def_value in defs .items ():
127+ blueprint ["definitions" ][def_name ] = self ._add_bda_fields_to_schema (
128+ def_value
129+ )
130+
131+ # Transform properties and update $ref paths
132+ blueprint ["properties" ] = {}
119133 for prop_name , prop_value in json_schema .get (SCHEMA_PROPERTIES , {}).items ():
120- transformed_prop = self ._add_bedrock_fields_to_property (prop_value )
121- blueprint ["properties" ][prop_name ] = transformed_prop
134+ transformed = self ._add_bda_fields_to_schema (prop_value )
135+ # Update $ref paths from #/$defs/ to #/definitions/
136+ if REF_FIELD in transformed :
137+ transformed [REF_FIELD ] = transformed [REF_FIELD ].replace (
138+ "/$defs/" , "/definitions/"
139+ )
140+ blueprint ["properties" ][prop_name ] = transformed
122141
123142 return blueprint
124143
144+ def _add_bda_fields_to_schema (self , schema : dict ) -> dict :
145+ """
146+ Add BDA fields (inferenceType, instruction) ONLY to leaf properties.
147+
148+ Critical BDA requirements (based on working schemas):
149+ - Pure $ref properties: ONLY the $ref field
150+ - Object/array types: ONLY type and properties (NO description, inferenceType, instruction)
151+ - Leaf types: type, inferenceType, instruction (NO description)
152+
153+ Args:
154+ schema: Property or definition schema
155+
156+ Returns:
157+ Schema with BDA fields, description removed
158+ """
159+ if not isinstance (schema , dict ):
160+ return schema
161+
162+ # If this has a $ref, return ONLY the $ref (strip all other fields)
163+ if REF_FIELD in schema :
164+ # Pure $ref should have nothing else - this is critical for BDA
165+ return {REF_FIELD : schema [REF_FIELD ].replace ("/$defs/" , "/definitions/" )}
166+
167+ # Make deep copy to avoid mutation
168+ result = deepcopy (schema )
169+
170+ # Remove description field - BDA doesn't use it (only instruction)
171+ result .pop (SCHEMA_DESCRIPTION , None )
172+
173+ prop_type = result .get (SCHEMA_TYPE , "string" )
174+
175+ # Add BDA fields ONLY for leaf/primitive types
176+ if prop_type not in [TYPE_OBJECT , TYPE_ARRAY ]:
177+ # This is a leaf property - add BDA fields
178+ if "inferenceType" not in result :
179+ result ["inferenceType" ] = "inferred"
180+
181+ if "instruction" not in result :
182+ # Use description if available before we removed it
183+ if SCHEMA_DESCRIPTION in schema :
184+ result ["instruction" ] = schema [SCHEMA_DESCRIPTION ]
185+ else :
186+ result ["instruction" ] = "Extract this field from the document"
187+
188+ # Recursively process nested structures
189+ if prop_type == TYPE_OBJECT and SCHEMA_PROPERTIES in result :
190+ result [SCHEMA_PROPERTIES ] = {
191+ name : self ._add_bda_fields_to_schema (value )
192+ for name , value in result [SCHEMA_PROPERTIES ].items ()
193+ }
194+
195+ # Handle array items (but don't add BDA fields to the array itself)
196+ if prop_type == TYPE_ARRAY and SCHEMA_ITEMS in result :
197+ result [SCHEMA_ITEMS ] = self ._add_bda_fields_to_schema (result [SCHEMA_ITEMS ])
198+
199+ return result
200+
125201 def _add_bedrock_fields_to_property (self , prop : dict ) -> dict :
126202 """
127203 Add Bedrock-specific fields (instruction, inferenceType) to a property.
0 commit comments