Skip to content

Commit b2a498b

Browse files
author
Bob Strahan
committed
fix: Handle missing blueprintVersion in BDA blueprint service and bump to v0.4.0-rc6
1 parent e2fd094 commit b2a498b

File tree

6 files changed

+519
-20
lines changed

6 files changed

+519
-20
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.4.0-rc5
1+
0.4.0-rc6

lib/idp_common_pkg/idp_common/bda/bda_blueprint_service.py

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ def _retrieve_all_blueprints(self, project_arn: str):
6767
blueprint_arn=blueprint_arn, stage="LIVE"
6868
)
6969
_blueprint = response.get("blueprint")
70-
_blueprint["blueprintVersion"] = blueprint["blueprintVersion"]
70+
# Add blueprintVersion with default if missing
71+
_blueprint["blueprintVersion"] = blueprint.get(
72+
"blueprintVersion", "1"
73+
)
7174
all_blueprints.append(_blueprint)
7275
logger.info(
7376
f"{len(all_blueprints)} blueprints retrieved for {project_arn}"
@@ -88,20 +91,24 @@ def _retrieve_all_blueprints(self, project_arn: str):
8891

8992
def _transform_json_schema_to_bedrock_blueprint(self, json_schema: dict) -> dict:
9093
"""
91-
Transform a standard JSON Schema to Bedrock Document Analysis blueprint format.
94+
Transform JSON Schema (draft 2020-12) to BDA blueprint format (draft-07).
9295
93-
Bedrock expects:
94-
- "class" and "description" at top level (not $id)
95-
- "instruction" and "inferenceType" for each field property
96-
- Sections in definitions with references in properties
96+
BDA requirements based on working schemas:
97+
- Uses "definitions" (not "$defs") - JSON Schema draft-07
98+
- References use "#/definitions/" (not "#/$defs/")
99+
- Only LEAF properties get "inferenceType" and "instruction"
100+
- Object/array types do NOT get these fields
97101
98102
Args:
99-
json_schema: Standard JSON Schema from migration
103+
json_schema: JSON Schema from configuration
100104
101105
Returns:
102-
Blueprint schema in Bedrock format
106+
Blueprint schema in BDA-compatible draft-07 format
103107
"""
104-
# Start with the basic structure Bedrock expects
108+
# Extract $defs and convert to definitions
109+
defs = json_schema.get(DEFS_FIELD, {})
110+
111+
# Start with BDA-expected structure
105112
blueprint = {
106113
"$schema": "http://json-schema.org/draft-07/schema#",
107114
"class": json_schema.get(
@@ -111,17 +118,86 @@ def _transform_json_schema_to_bedrock_blueprint(self, json_schema: dict) -> dict
111118
SCHEMA_DESCRIPTION, "Document schema for data extraction"
112119
),
113120
"type": TYPE_OBJECT,
114-
"definitions": deepcopy(json_schema.get(DEFS_FIELD, {})),
115-
"properties": {},
116121
}
117122

118-
# Transform each property to add Bedrock-specific fields
123+
# Convert definitions and add BDA fields to leaf properties only
124+
if defs:
125+
blueprint["definitions"] = {}
126+
for def_name, def_value in defs.items():
127+
blueprint["definitions"][def_name] = self._add_bda_fields_to_schema(
128+
def_value
129+
)
130+
131+
# Transform properties and update $ref paths
132+
blueprint["properties"] = {}
119133
for prop_name, prop_value in json_schema.get(SCHEMA_PROPERTIES, {}).items():
120-
transformed_prop = self._add_bedrock_fields_to_property(prop_value)
121-
blueprint["properties"][prop_name] = transformed_prop
134+
transformed = self._add_bda_fields_to_schema(prop_value)
135+
# Update $ref paths from #/$defs/ to #/definitions/
136+
if REF_FIELD in transformed:
137+
transformed[REF_FIELD] = transformed[REF_FIELD].replace(
138+
"/$defs/", "/definitions/"
139+
)
140+
blueprint["properties"][prop_name] = transformed
122141

123142
return blueprint
124143

144+
def _add_bda_fields_to_schema(self, schema: dict) -> dict:
145+
"""
146+
Add BDA fields (inferenceType, instruction) ONLY to leaf properties.
147+
148+
Critical BDA requirements (based on working schemas):
149+
- Pure $ref properties: ONLY the $ref field
150+
- Object/array types: ONLY type and properties (NO description, inferenceType, instruction)
151+
- Leaf types: type, inferenceType, instruction (NO description)
152+
153+
Args:
154+
schema: Property or definition schema
155+
156+
Returns:
157+
Schema with BDA fields, description removed
158+
"""
159+
if not isinstance(schema, dict):
160+
return schema
161+
162+
# If this has a $ref, return ONLY the $ref (strip all other fields)
163+
if REF_FIELD in schema:
164+
# Pure $ref should have nothing else - this is critical for BDA
165+
return {REF_FIELD: schema[REF_FIELD].replace("/$defs/", "/definitions/")}
166+
167+
# Make deep copy to avoid mutation
168+
result = deepcopy(schema)
169+
170+
# Remove description field - BDA doesn't use it (only instruction)
171+
result.pop(SCHEMA_DESCRIPTION, None)
172+
173+
prop_type = result.get(SCHEMA_TYPE, "string")
174+
175+
# Add BDA fields ONLY for leaf/primitive types
176+
if prop_type not in [TYPE_OBJECT, TYPE_ARRAY]:
177+
# This is a leaf property - add BDA fields
178+
if "inferenceType" not in result:
179+
result["inferenceType"] = "inferred"
180+
181+
if "instruction" not in result:
182+
# Use description if available before we removed it
183+
if SCHEMA_DESCRIPTION in schema:
184+
result["instruction"] = schema[SCHEMA_DESCRIPTION]
185+
else:
186+
result["instruction"] = "Extract this field from the document"
187+
188+
# Recursively process nested structures
189+
if prop_type == TYPE_OBJECT and SCHEMA_PROPERTIES in result:
190+
result[SCHEMA_PROPERTIES] = {
191+
name: self._add_bda_fields_to_schema(value)
192+
for name, value in result[SCHEMA_PROPERTIES].items()
193+
}
194+
195+
# Handle array items (but don't add BDA fields to the array itself)
196+
if prop_type == TYPE_ARRAY and SCHEMA_ITEMS in result:
197+
result[SCHEMA_ITEMS] = self._add_bda_fields_to_schema(result[SCHEMA_ITEMS])
198+
199+
return result
200+
125201
def _add_bedrock_fields_to_property(self, prop: dict) -> dict:
126202
"""
127203
Add Bedrock-specific fields (instruction, inferenceType) to a property.

lib/idp_common_pkg/tests/unit/bda/test_bda_blueprint_service.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,12 @@ def test_transform_does_not_mutate_input_schema(self, service):
478478
)
479479
assert blueprint["properties"]["invoiceNumber"]["inferenceType"] == "inferred"
480480

481-
def test_transform_preserves_ref_nodes(self, service):
482-
"""Ensure that $ref-only nodes are not augmented with Bedrock fields."""
481+
def test_transform_converts_defs_to_definitions(self, service):
482+
"""Ensure that $defs is converted to definitions for BDA draft-07 compatibility.
483+
484+
BDA uses JSON Schema draft-07 which uses "definitions", not "$defs".
485+
References should be preserved but updated to #/definitions/ path.
486+
"""
483487
schema = build_json_schema(
484488
doc_id="Document",
485489
description="Document schema",
@@ -491,17 +495,47 @@ def test_transform_preserves_ref_nodes(self, service):
491495
defs={
492496
"Address": {
493497
"type": "object",
498+
"description": "Address information",
494499
"properties": {
495500
"street": {
496501
"type": "string",
497502
"description": "Street line",
498-
}
503+
},
504+
"city": {
505+
"type": "string",
506+
"description": "City name",
507+
},
499508
},
500509
}
501510
},
502511
)
503512

504513
blueprint = service._transform_json_schema_to_bedrock_blueprint(schema)
505514

506-
# address property should remain a pure $ref
507-
assert blueprint["properties"]["address"] == {"$ref": "#/$defs/Address"}
515+
# Verify $defs was converted to definitions
516+
assert "definitions" in blueprint
517+
assert "$defs" not in blueprint
518+
assert "Address" in blueprint["definitions"]
519+
520+
# Verify $ref path was updated
521+
address_prop = blueprint["properties"]["address"]
522+
assert address_prop["$ref"] == "#/definitions/Address"
523+
524+
# Verify definition has proper structure (object should NOT have inferenceType/instruction)
525+
address_def = blueprint["definitions"]["Address"]
526+
assert address_def["type"] == "object"
527+
assert "inferenceType" not in address_def # Objects should not have this
528+
assert "instruction" not in address_def # Objects should not have this
529+
assert "properties" in address_def
530+
531+
# Verify leaf properties DO have BDA fields
532+
street_prop = address_def["properties"]["street"]
533+
assert street_prop["type"] == "string"
534+
assert "inferenceType" in street_prop
535+
assert "instruction" in street_prop
536+
assert street_prop["instruction"] == "Street line"
537+
538+
city_prop = address_def["properties"]["city"]
539+
assert city_prop["type"] == "string"
540+
assert "inferenceType" in city_prop
541+
assert "instruction" in city_prop
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# BDA Discovery Lambda Local Testing
2+
3+
This guide explains how to test the BDA Discovery Lambda function locally with the real AWS BDA service.
4+
5+
## Prerequisites
6+
7+
1. **AWS Credentials**: Ensure your AWS credentials are configured
8+
```bash
9+
aws configure
10+
# or
11+
export AWS_PROFILE=your-profile
12+
```
13+
14+
2. **Python Environment**: Python 3.12+ with dependencies installed
15+
```bash
16+
cd lib/idp_common_pkg
17+
pip install -e ".[extraction]"
18+
```
19+
20+
3. **Required Environment Variables**: The test script sets defaults but you can override:
21+
```bash
22+
export BDA_PROJECT_ARN="your-project-arn"
23+
export STACK_NAME="your-stack-name"
24+
export CONFIGURATION_TABLE_NAME="your-config-table"
25+
export DISCOVERY_TRACKING_TABLE="your-tracking-table"
26+
```
27+
28+
## Running the Test
29+
30+
From the project root:
31+
32+
```bash
33+
cd patterns/pattern-1
34+
python test-bda-discovery.py
35+
```
36+
37+
Or make it executable and run directly:
38+
39+
```bash
40+
./patterns/pattern-1/test-bda-discovery.py
41+
```
42+
43+
## What the Test Does
44+
45+
1. Loads the Lambda handler from `src/bda_discovery_function/index.py`
46+
2. Uses a real SQS event from CloudWatch logs
47+
3. Calls the actual AWS BDA service to create/update blueprints
48+
4. Reports success or failure with detailed output
49+
50+
## Expected Output
51+
52+
**Success:**
53+
```
54+
================================================================================
55+
Testing BDA Discovery Lambda Function
56+
================================================================================
57+
58+
Environment:
59+
BDA_PROJECT_ARN: arn:aws:bedrock:us-west-2:...:data-automation-project/...
60+
STACK_NAME: IDP-BDA-3
61+
CONFIGURATION_TABLE_NAME: IDP-BDA-3-ConfigurationTable-...
62+
LOG_LEVEL: INFO
63+
64+
================================================================================
65+
Invoking handler...
66+
================================================================================
67+
68+
[INFO logs from the function...]
69+
70+
================================================================================
71+
Result:
72+
================================================================================
73+
{
74+
"batchItemFailures": []
75+
}
76+
77+
================================================================================
78+
✅ Test completed successfully!
79+
================================================================================
80+
```
81+
82+
**Failure:**
83+
The script will show detailed error messages and stack traces to help diagnose issues.
84+
85+
## Troubleshooting
86+
87+
### Missing deepdiff Module
88+
```bash
89+
pip install deepdiff>=6.0.0
90+
```
91+
92+
### AWS Permissions
93+
Ensure your AWS credentials have permissions for:
94+
- `bedrock:GetDataAutomationProject`
95+
- `bedrock:ListBlueprints`
96+
- `bedrock:GetBlueprint`
97+
- `bedrock:CreateBlueprint`
98+
- `bedrock:UpdateBlueprint`
99+
- `bedrock:CreateBlueprintVersion`
100+
- `dynamodb:GetItem` (for configuration table)
101+
102+
### Invalid Blueprint Schema Errors
103+
If you see validation errors, check the CloudWatch logs output in the test for the actual schema being sent to BDA. Compare with working blueprints in your project.
104+
105+
## Modifying the Test
106+
107+
You can modify `test-bda-discovery.py` to:
108+
- Change environment variables
109+
- Use different test events
110+
- Add more detailed logging
111+
- Test specific configuration scenarios

0 commit comments

Comments
 (0)