Skip to content

Commit 0e25dd2

Browse files
committed
Merge branch 'feature/generic-discovery-flow' into 'develop'
Discovery feature See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!292
2 parents 390805a + 8567d88 commit 0e25dd2

35 files changed

+8317
-3
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ For detailed deployment and testing instructions, see the [Deployment Guide](./d
128128
- [Agent Analysis](./docs/agent-analysis.md) - Natural language analytics and data visualization feature
129129
- [Custom MCP Agent](./docs/custom-MCP-agent.md) - Integrating external MCP servers for custom tools and capabilities
130130
- [Configuration](./docs/configuration.md) - Configuration and customization options
131+
- [Discovery](./docs/discovery.md) - Pattern-neutral discovery process and BDA blueprint automation
131132
- [Classification](./docs/classification.md) - Customizing document classification
132133
- [Extraction](./docs/extraction.md) - Customizing information extraction
133134
- [Human-in-the-Loop Review](./docs/human-review.md) - Human review workflows with Amazon A2I

docs/discovery.md

Lines changed: 1367 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,380 @@
1+
"""
2+
Create Bedrock Document Analysis (BDA) blueprints based on extracted labels.
3+
"""
4+
5+
import json
6+
import logging
7+
8+
import boto3
9+
from botocore.exceptions import ClientError
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class BDABlueprintCreator:
15+
def __init__(self, region_name="us-west-2"):
16+
"""Initialize Bedrock client."""
17+
self.bedrock_client = boto3.client(
18+
service_name="bedrock-data-automation", region_name=region_name
19+
)
20+
21+
def update_data_automation_project(self, projectArn: str, blueprint):
22+
"""
23+
Update an existing Bedrock Data Automation project with the provided blueprint.
24+
25+
Args:
26+
projectArn (str): ARN of the project to update
27+
blueprint (dict): Blueprint configuration to apply
28+
29+
Returns:
30+
dict: Updated project details or None if error
31+
"""
32+
try:
33+
print(f"blueprint to update {blueprint}")
34+
project = self.bedrock_client.get_data_automation_project(
35+
projectArn=projectArn, projectStage="LIVE"
36+
)
37+
project = project.get("project", None)
38+
logger.info(f"Updating project: {project}")
39+
customOutputConfiguration = project.get("customOutputConfiguration", None)
40+
if customOutputConfiguration is None:
41+
customOutputConfiguration = {"blueprints": []}
42+
project["customOutputConfiguration"] = customOutputConfiguration
43+
44+
blueprints = customOutputConfiguration.get("blueprints")
45+
if blueprints is None:
46+
blueprints = []
47+
customOutputConfiguration["blueprints"] = blueprints
48+
_blueprint = {
49+
"blueprintArn": blueprint.get("blueprintArn"),
50+
}
51+
if blueprint.get("blueprintStage"):
52+
_blueprint["blueprintStage"] = blueprint.get("blueprintStage")
53+
if blueprint.get("blueprintVersion"):
54+
_blueprint["blueprintVersion"] = blueprint.get("blueprintVersion")
55+
56+
for _blueprint_tmp in blueprints:
57+
if _blueprint_tmp.get("blueprintArn") == blueprint.get("blueprintArn"):
58+
blueprints.remove(_blueprint_tmp)
59+
break
60+
blueprints.append(_blueprint)
61+
logger.info(f"Updating updated data automation project: {projectArn}")
62+
self.bedrock_client.update_data_automation_project(
63+
projectArn=projectArn,
64+
projectDescription=project.get("projectDescription"),
65+
projectStage=project.get("projectStage"),
66+
customOutputConfiguration=customOutputConfiguration,
67+
standardOutputConfiguration=project.get(
68+
"standardOutputConfiguration", None
69+
),
70+
)
71+
logger.info(f"Successfully updated data automation project: {projectArn}")
72+
return _blueprint
73+
except ClientError as e:
74+
logger.error(f"Failed to update data automation project: {e}")
75+
return None
76+
77+
def update_project_with_custom_configurations(
78+
self, projectArn: str, customConfiguration
79+
):
80+
"""
81+
Update an existing Bedrock Data Automation project with the provided blueprint.
82+
83+
Args:
84+
projectArn (str): ARN of the project to update
85+
blueprint (dict): Blueprint configuration to apply
86+
87+
Returns:
88+
dict: Updated project details or None if error
89+
"""
90+
try:
91+
project = self.bedrock_client.get_data_automation_project(
92+
projectArn=projectArn, projectStage="LIVE"
93+
)
94+
project = project.get("project", None)
95+
logger.info(f"Updating project: {project}")
96+
97+
logger.info(f"Updating updated data automation project: {projectArn}")
98+
response = self.bedrock_client.update_data_automation_project(
99+
projectArn=projectArn,
100+
projectDescription=project.get("projectDescription"),
101+
projectStage=project.get("projectStage"),
102+
customOutputConfiguration=customConfiguration,
103+
standardOutputConfiguration=project.get(
104+
"standardOutputConfiguration", None
105+
),
106+
)
107+
logger.info(f"Successfully updated data automation project: {projectArn}")
108+
return response
109+
except ClientError as e:
110+
logger.error(f"Failed to update data automation project: {e}")
111+
return None
112+
113+
def create_data_automation_project(self, project_name, description, blueprint_arn):
114+
"""
115+
Create a Bedrock Data Automation project.
116+
117+
Args:
118+
project_name (str): Name of the project
119+
description (str, optional): Project description
120+
121+
Returns:
122+
dict: Created project details or None if error
123+
TODO: Fix the signature accept blueprint object instead of schema
124+
"""
125+
try:
126+
params = {"name": project_name}
127+
128+
if description:
129+
params["description"] = description
130+
131+
response = self.bedrock_client.create_data_automation_project(
132+
projectName=project_name,
133+
projectDescription=description,
134+
projectStage="LIVE",
135+
standardOutputConfiguration={
136+
"document": {
137+
"extraction": {
138+
"granularity": {
139+
"types": [
140+
"DOCUMENT",
141+
]
142+
},
143+
"boundingBox": {"state": "ENABLED"},
144+
},
145+
"generativeField": {"state": "ENABLED"},
146+
"outputFormat": {
147+
"textFormat": {"types": ["PLAIN_TEXT"]},
148+
"additionalFileFormat": {"state": "ENABLED"},
149+
},
150+
},
151+
"image": {
152+
"extraction": {
153+
"category": {
154+
"state": "ENABLED",
155+
"types": ["TEXT_DETECTION"],
156+
},
157+
"boundingBox": {"state": "ENABLED"},
158+
},
159+
"generativeField": {
160+
"state": "ENABLED",
161+
"types": ["IMAGE_SUMMARY"],
162+
},
163+
},
164+
"video": {
165+
"extraction": {
166+
"category": {
167+
"state": "ENABLED",
168+
"types": [
169+
"CONTENT_MODERATION",
170+
"TEXT_DETECTION",
171+
],
172+
},
173+
"boundingBox": {"state": "ENABLED"},
174+
},
175+
"generativeField": {
176+
"state": "ENABLED",
177+
"types": ["VIDEO_SUMMARY"],
178+
},
179+
},
180+
"audio": {
181+
"extraction": {
182+
"category": {"state": "ENABLED", "types": ["TRANSCRIPT"]}
183+
},
184+
"generativeField": {
185+
"state": "ENABLED",
186+
"types": [
187+
"AUDIO_SUMMARY",
188+
],
189+
},
190+
},
191+
},
192+
customOutputConfiguration={
193+
"blueprints": [
194+
{"blueprintArn": blueprint_arn, "blueprintStage": "LIVE"},
195+
]
196+
},
197+
overrideConfiguration={"document": {"splitter": {"state": "ENABLED"}}},
198+
)
199+
200+
return response
201+
except ClientError as e:
202+
logger.error(f"Error creating Data Automation project: {e}")
203+
return None
204+
205+
def create_blueprint(self, document_type, blueprint_name, schema=None):
206+
"""
207+
Create a Bedrock Document Analysis blueprint.
208+
209+
Args:
210+
document_type (str): Type of document
211+
blueprint_name (str): Name for the blueprint
212+
region (str): AWS region
213+
labels (list, optional): List of labels for the document
214+
215+
Returns:
216+
dict: Created blueprint details or None if error
217+
"""
218+
try:
219+
if schema is None:
220+
raise ValueError(
221+
"Schema cannot be None. Please provide a valid schema."
222+
)
223+
# Print schema for debugging
224+
logger.info(f"Schema: {json.dumps(schema, indent=2)}")
225+
226+
# Create the blueprint
227+
response = self.bedrock_client.create_blueprint(
228+
blueprintName=blueprint_name,
229+
type=document_type,
230+
blueprintStage="LIVE",
231+
schema=schema,
232+
)
233+
blueprint_response = response["blueprint"]
234+
if blueprint_response is None:
235+
raise ValueError(
236+
"Blueprint creation failed. No blueprint response received."
237+
)
238+
239+
logger.info(
240+
f"Blueprint created successfully: {blueprint_response['blueprintArn']}"
241+
)
242+
return {"status": "success", "blueprint": blueprint_response}
243+
except ClientError as e:
244+
logger.error(f"Error creating BDA blueprint: {e}")
245+
raise e
246+
except Exception as e:
247+
logger.error(f"Error creating blueprint: {e}")
248+
raise e
249+
250+
def create_blueprint_version(self, blueprint_arn, project_arn):
251+
"""
252+
Create a version of a Bedrock Document Analysis blueprint.
253+
254+
Args:
255+
blueprint_name (str): Name of the blueprint
256+
schema (dict): Schema for the blueprint
257+
258+
Returns:
259+
dict: Created blueprint version details or None if error
260+
"""
261+
try:
262+
response = self.bedrock_client.create_blueprint_version(
263+
blueprintArn=blueprint_arn
264+
)
265+
blueprint_response = response["blueprint"]
266+
if blueprint_response is None:
267+
raise ValueError(
268+
"Blueprint version creation failed. No blueprint response received."
269+
)
270+
271+
self.update_data_automation_project(project_arn, blueprint_response)
272+
273+
logger.info(
274+
f"Blueprint version created successfully: {blueprint_response['blueprintArn']}"
275+
)
276+
return {"status": "success", "blueprint": blueprint_response}
277+
except ClientError as e:
278+
logger.error(f"Error creating BDA blueprint version: {e}")
279+
raise e
280+
except Exception as e:
281+
logger.error(f"Error creating blueprint version: {e}")
282+
raise e
283+
284+
def update_blueprint(self, blueprint_arn, stage, schema):
285+
"""
286+
Update a Bedrock Document Analysis blueprint.
287+
288+
Args:
289+
blueprint_name (str): Name of the blueprint
290+
schema (dict): Updated schema for the blueprint
291+
292+
Returns:
293+
dict: Updated blueprint details or None if error
294+
"""
295+
try:
296+
"""
297+
version_response = self.bedrock_client.create_blueprint_version(
298+
blueprintArn=blueprint_arn
299+
)
300+
if "blueprint" not in version_response:
301+
raise ValueError("Blueprint update failed. No blueprint response received.")
302+
new_blueprint = version_response["blueprint"]
303+
version = new_blueprint.get("blueprintVersion", None)
304+
"""
305+
response = self.bedrock_client.update_blueprint(
306+
blueprintArn=blueprint_arn, blueprintStage=stage, schema=schema
307+
)
308+
blueprint_response = response["blueprint"]
309+
if blueprint_response is None:
310+
raise ValueError(
311+
"Blueprint update failed. No blueprint response received."
312+
)
313+
314+
logger.info(
315+
f"Blueprint updated successfully: {blueprint_response['blueprintArn']}"
316+
)
317+
return {"status": "success", "blueprint": blueprint_response}
318+
except ClientError as e:
319+
logger.error(f"Error Updating BDA blueprint: {e}")
320+
raise e
321+
except Exception as e:
322+
logger.error(f"Error updating blueprint: {e}")
323+
raise e
324+
325+
def get_blueprint(self, blueprint_arn, stage):
326+
"""
327+
Update a Bedrock Document Analysis blueprint.
328+
329+
Args:
330+
blueprint_name (str): Name of the blueprint
331+
schema (dict): Updated schema for the blueprint
332+
333+
Returns:
334+
dict: Updated blueprint details or None if error
335+
"""
336+
try:
337+
response = self.bedrock_client.get_blueprint(
338+
blueprintArn=blueprint_arn, blueprintStage=stage
339+
)
340+
blueprint_response = response["blueprint"]
341+
if blueprint_response is None:
342+
raise ValueError(
343+
"Blueprint update failed. No blueprint response received."
344+
)
345+
346+
logger.info(
347+
f"Blueprint updated successfully: {blueprint_response['blueprintArn']}"
348+
)
349+
return {"status": "success", "blueprint": blueprint_response}
350+
except ClientError as e:
351+
logger.error(f"Error Updating BDA blueprint: {e}")
352+
raise e
353+
except Exception as e:
354+
logger.error(f"Error updating blueprint: {e}")
355+
raise e
356+
357+
def list_blueprints(self, projectArn, projectStage):
358+
try:
359+
project = self.bedrock_client.get_data_automation_project(
360+
projectArn=projectArn, projectStage="LIVE"
361+
)
362+
project = project.get("project", None)
363+
logger.info(f"Updating project: {project}")
364+
customOutputConfiguration = project.get("customOutputConfiguration", None)
365+
366+
return customOutputConfiguration
367+
368+
except Exception as e:
369+
logger.error(f"Error updating blueprint: {e}")
370+
raise e
371+
372+
def delete_blueprint(self, blueprint_arn, blueprint_version):
373+
try:
374+
return self.bedrock_client.delete_blueprint(
375+
blueprintArn=blueprint_arn, blueprintVersion=blueprint_version
376+
)
377+
378+
except Exception as e:
379+
logger.error(f"Error delete_blueprint: {e}")
380+
raise e

0 commit comments

Comments
 (0)