|
| 1 | +""" |
| 2 | +Create Bedrock Document Analysis (BDA) blueprints based on extracted labels. |
| 3 | +""" |
| 4 | + |
| 5 | +import json |
| 6 | +import logging |
| 7 | + |
| 8 | +import boto3 |
| 9 | +from botocore.exceptions import ClientError |
| 10 | + |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class BDABlueprintCreator: |
| 15 | + def __init__(self, region_name="us-west-2"): |
| 16 | + """Initialize Bedrock client.""" |
| 17 | + self.bedrock_client = boto3.client( |
| 18 | + service_name="bedrock-data-automation", region_name=region_name |
| 19 | + ) |
| 20 | + |
| 21 | + def update_data_automation_project(self, projectArn: str, blueprint): |
| 22 | + """ |
| 23 | + Update an existing Bedrock Data Automation project with the provided blueprint. |
| 24 | +
|
| 25 | + Args: |
| 26 | + projectArn (str): ARN of the project to update |
| 27 | + blueprint (dict): Blueprint configuration to apply |
| 28 | +
|
| 29 | + Returns: |
| 30 | + dict: Updated project details or None if error |
| 31 | + """ |
| 32 | + try: |
| 33 | + print(f"blueprint to update {blueprint}") |
| 34 | + project = self.bedrock_client.get_data_automation_project( |
| 35 | + projectArn=projectArn, projectStage="LIVE" |
| 36 | + ) |
| 37 | + project = project.get("project", None) |
| 38 | + logger.info(f"Updating project: {project}") |
| 39 | + customOutputConfiguration = project.get("customOutputConfiguration", None) |
| 40 | + if customOutputConfiguration is None: |
| 41 | + customOutputConfiguration = {"blueprints": []} |
| 42 | + project["customOutputConfiguration"] = customOutputConfiguration |
| 43 | + |
| 44 | + blueprints = customOutputConfiguration.get("blueprints") |
| 45 | + if blueprints is None: |
| 46 | + blueprints = [] |
| 47 | + customOutputConfiguration["blueprints"] = blueprints |
| 48 | + _blueprint = { |
| 49 | + "blueprintArn": blueprint.get("blueprintArn"), |
| 50 | + } |
| 51 | + if blueprint.get("blueprintStage"): |
| 52 | + _blueprint["blueprintStage"] = blueprint.get("blueprintStage") |
| 53 | + if blueprint.get("blueprintVersion"): |
| 54 | + _blueprint["blueprintVersion"] = blueprint.get("blueprintVersion") |
| 55 | + |
| 56 | + for _blueprint_tmp in blueprints: |
| 57 | + if _blueprint_tmp.get("blueprintArn") == blueprint.get("blueprintArn"): |
| 58 | + blueprints.remove(_blueprint_tmp) |
| 59 | + break |
| 60 | + blueprints.append(_blueprint) |
| 61 | + logger.info(f"Updating updated data automation project: {projectArn}") |
| 62 | + self.bedrock_client.update_data_automation_project( |
| 63 | + projectArn=projectArn, |
| 64 | + projectDescription=project.get("projectDescription"), |
| 65 | + projectStage=project.get("projectStage"), |
| 66 | + customOutputConfiguration=customOutputConfiguration, |
| 67 | + standardOutputConfiguration=project.get( |
| 68 | + "standardOutputConfiguration", None |
| 69 | + ), |
| 70 | + ) |
| 71 | + logger.info(f"Successfully updated data automation project: {projectArn}") |
| 72 | + return _blueprint |
| 73 | + except ClientError as e: |
| 74 | + logger.error(f"Failed to update data automation project: {e}") |
| 75 | + return None |
| 76 | + |
| 77 | + def update_project_with_custom_configurations( |
| 78 | + self, projectArn: str, customConfiguration |
| 79 | + ): |
| 80 | + """ |
| 81 | + Update an existing Bedrock Data Automation project with the provided blueprint. |
| 82 | +
|
| 83 | + Args: |
| 84 | + projectArn (str): ARN of the project to update |
| 85 | + blueprint (dict): Blueprint configuration to apply |
| 86 | +
|
| 87 | + Returns: |
| 88 | + dict: Updated project details or None if error |
| 89 | + """ |
| 90 | + try: |
| 91 | + project = self.bedrock_client.get_data_automation_project( |
| 92 | + projectArn=projectArn, projectStage="LIVE" |
| 93 | + ) |
| 94 | + project = project.get("project", None) |
| 95 | + logger.info(f"Updating project: {project}") |
| 96 | + |
| 97 | + logger.info(f"Updating updated data automation project: {projectArn}") |
| 98 | + response = self.bedrock_client.update_data_automation_project( |
| 99 | + projectArn=projectArn, |
| 100 | + projectDescription=project.get("projectDescription"), |
| 101 | + projectStage=project.get("projectStage"), |
| 102 | + customOutputConfiguration=customConfiguration, |
| 103 | + standardOutputConfiguration=project.get( |
| 104 | + "standardOutputConfiguration", None |
| 105 | + ), |
| 106 | + ) |
| 107 | + logger.info(f"Successfully updated data automation project: {projectArn}") |
| 108 | + return response |
| 109 | + except ClientError as e: |
| 110 | + logger.error(f"Failed to update data automation project: {e}") |
| 111 | + return None |
| 112 | + |
| 113 | + def create_data_automation_project(self, project_name, description, blueprint_arn): |
| 114 | + """ |
| 115 | + Create a Bedrock Data Automation project. |
| 116 | +
|
| 117 | + Args: |
| 118 | + project_name (str): Name of the project |
| 119 | + description (str, optional): Project description |
| 120 | +
|
| 121 | + Returns: |
| 122 | + dict: Created project details or None if error |
| 123 | + TODO: Fix the signature accept blueprint object instead of schema |
| 124 | + """ |
| 125 | + try: |
| 126 | + params = {"name": project_name} |
| 127 | + |
| 128 | + if description: |
| 129 | + params["description"] = description |
| 130 | + |
| 131 | + response = self.bedrock_client.create_data_automation_project( |
| 132 | + projectName=project_name, |
| 133 | + projectDescription=description, |
| 134 | + projectStage="LIVE", |
| 135 | + standardOutputConfiguration={ |
| 136 | + "document": { |
| 137 | + "extraction": { |
| 138 | + "granularity": { |
| 139 | + "types": [ |
| 140 | + "DOCUMENT", |
| 141 | + ] |
| 142 | + }, |
| 143 | + "boundingBox": {"state": "ENABLED"}, |
| 144 | + }, |
| 145 | + "generativeField": {"state": "ENABLED"}, |
| 146 | + "outputFormat": { |
| 147 | + "textFormat": {"types": ["PLAIN_TEXT"]}, |
| 148 | + "additionalFileFormat": {"state": "ENABLED"}, |
| 149 | + }, |
| 150 | + }, |
| 151 | + "image": { |
| 152 | + "extraction": { |
| 153 | + "category": { |
| 154 | + "state": "ENABLED", |
| 155 | + "types": ["TEXT_DETECTION"], |
| 156 | + }, |
| 157 | + "boundingBox": {"state": "ENABLED"}, |
| 158 | + }, |
| 159 | + "generativeField": { |
| 160 | + "state": "ENABLED", |
| 161 | + "types": ["IMAGE_SUMMARY"], |
| 162 | + }, |
| 163 | + }, |
| 164 | + "video": { |
| 165 | + "extraction": { |
| 166 | + "category": { |
| 167 | + "state": "ENABLED", |
| 168 | + "types": [ |
| 169 | + "CONTENT_MODERATION", |
| 170 | + "TEXT_DETECTION", |
| 171 | + ], |
| 172 | + }, |
| 173 | + "boundingBox": {"state": "ENABLED"}, |
| 174 | + }, |
| 175 | + "generativeField": { |
| 176 | + "state": "ENABLED", |
| 177 | + "types": ["VIDEO_SUMMARY"], |
| 178 | + }, |
| 179 | + }, |
| 180 | + "audio": { |
| 181 | + "extraction": { |
| 182 | + "category": {"state": "ENABLED", "types": ["TRANSCRIPT"]} |
| 183 | + }, |
| 184 | + "generativeField": { |
| 185 | + "state": "ENABLED", |
| 186 | + "types": [ |
| 187 | + "AUDIO_SUMMARY", |
| 188 | + ], |
| 189 | + }, |
| 190 | + }, |
| 191 | + }, |
| 192 | + customOutputConfiguration={ |
| 193 | + "blueprints": [ |
| 194 | + {"blueprintArn": blueprint_arn, "blueprintStage": "LIVE"}, |
| 195 | + ] |
| 196 | + }, |
| 197 | + overrideConfiguration={"document": {"splitter": {"state": "ENABLED"}}}, |
| 198 | + ) |
| 199 | + |
| 200 | + return response |
| 201 | + except ClientError as e: |
| 202 | + logger.error(f"Error creating Data Automation project: {e}") |
| 203 | + return None |
| 204 | + |
| 205 | + def create_blueprint(self, document_type, blueprint_name, schema=None): |
| 206 | + """ |
| 207 | + Create a Bedrock Document Analysis blueprint. |
| 208 | +
|
| 209 | + Args: |
| 210 | + document_type (str): Type of document |
| 211 | + blueprint_name (str): Name for the blueprint |
| 212 | + region (str): AWS region |
| 213 | + labels (list, optional): List of labels for the document |
| 214 | +
|
| 215 | + Returns: |
| 216 | + dict: Created blueprint details or None if error |
| 217 | + """ |
| 218 | + try: |
| 219 | + if schema is None: |
| 220 | + raise ValueError( |
| 221 | + "Schema cannot be None. Please provide a valid schema." |
| 222 | + ) |
| 223 | + # Print schema for debugging |
| 224 | + logger.info(f"Schema: {json.dumps(schema, indent=2)}") |
| 225 | + |
| 226 | + # Create the blueprint |
| 227 | + response = self.bedrock_client.create_blueprint( |
| 228 | + blueprintName=blueprint_name, |
| 229 | + type=document_type, |
| 230 | + blueprintStage="LIVE", |
| 231 | + schema=schema, |
| 232 | + ) |
| 233 | + blueprint_response = response["blueprint"] |
| 234 | + if blueprint_response is None: |
| 235 | + raise ValueError( |
| 236 | + "Blueprint creation failed. No blueprint response received." |
| 237 | + ) |
| 238 | + |
| 239 | + logger.info( |
| 240 | + f"Blueprint created successfully: {blueprint_response['blueprintArn']}" |
| 241 | + ) |
| 242 | + return {"status": "success", "blueprint": blueprint_response} |
| 243 | + except ClientError as e: |
| 244 | + logger.error(f"Error creating BDA blueprint: {e}") |
| 245 | + raise e |
| 246 | + except Exception as e: |
| 247 | + logger.error(f"Error creating blueprint: {e}") |
| 248 | + raise e |
| 249 | + |
| 250 | + def create_blueprint_version(self, blueprint_arn, project_arn): |
| 251 | + """ |
| 252 | + Create a version of a Bedrock Document Analysis blueprint. |
| 253 | +
|
| 254 | + Args: |
| 255 | + blueprint_name (str): Name of the blueprint |
| 256 | + schema (dict): Schema for the blueprint |
| 257 | +
|
| 258 | + Returns: |
| 259 | + dict: Created blueprint version details or None if error |
| 260 | + """ |
| 261 | + try: |
| 262 | + response = self.bedrock_client.create_blueprint_version( |
| 263 | + blueprintArn=blueprint_arn |
| 264 | + ) |
| 265 | + blueprint_response = response["blueprint"] |
| 266 | + if blueprint_response is None: |
| 267 | + raise ValueError( |
| 268 | + "Blueprint version creation failed. No blueprint response received." |
| 269 | + ) |
| 270 | + |
| 271 | + self.update_data_automation_project(project_arn, blueprint_response) |
| 272 | + |
| 273 | + logger.info( |
| 274 | + f"Blueprint version created successfully: {blueprint_response['blueprintArn']}" |
| 275 | + ) |
| 276 | + return {"status": "success", "blueprint": blueprint_response} |
| 277 | + except ClientError as e: |
| 278 | + logger.error(f"Error creating BDA blueprint version: {e}") |
| 279 | + raise e |
| 280 | + except Exception as e: |
| 281 | + logger.error(f"Error creating blueprint version: {e}") |
| 282 | + raise e |
| 283 | + |
| 284 | + def update_blueprint(self, blueprint_arn, stage, schema): |
| 285 | + """ |
| 286 | + Update a Bedrock Document Analysis blueprint. |
| 287 | +
|
| 288 | + Args: |
| 289 | + blueprint_name (str): Name of the blueprint |
| 290 | + schema (dict): Updated schema for the blueprint |
| 291 | +
|
| 292 | + Returns: |
| 293 | + dict: Updated blueprint details or None if error |
| 294 | + """ |
| 295 | + try: |
| 296 | + """ |
| 297 | + version_response = self.bedrock_client.create_blueprint_version( |
| 298 | + blueprintArn=blueprint_arn |
| 299 | + ) |
| 300 | + if "blueprint" not in version_response: |
| 301 | + raise ValueError("Blueprint update failed. No blueprint response received.") |
| 302 | + new_blueprint = version_response["blueprint"] |
| 303 | + version = new_blueprint.get("blueprintVersion", None) |
| 304 | + """ |
| 305 | + response = self.bedrock_client.update_blueprint( |
| 306 | + blueprintArn=blueprint_arn, blueprintStage=stage, schema=schema |
| 307 | + ) |
| 308 | + blueprint_response = response["blueprint"] |
| 309 | + if blueprint_response is None: |
| 310 | + raise ValueError( |
| 311 | + "Blueprint update failed. No blueprint response received." |
| 312 | + ) |
| 313 | + |
| 314 | + logger.info( |
| 315 | + f"Blueprint updated successfully: {blueprint_response['blueprintArn']}" |
| 316 | + ) |
| 317 | + return {"status": "success", "blueprint": blueprint_response} |
| 318 | + except ClientError as e: |
| 319 | + logger.error(f"Error Updating BDA blueprint: {e}") |
| 320 | + raise e |
| 321 | + except Exception as e: |
| 322 | + logger.error(f"Error updating blueprint: {e}") |
| 323 | + raise e |
| 324 | + |
| 325 | + def get_blueprint(self, blueprint_arn, stage): |
| 326 | + """ |
| 327 | + Update a Bedrock Document Analysis blueprint. |
| 328 | +
|
| 329 | + Args: |
| 330 | + blueprint_name (str): Name of the blueprint |
| 331 | + schema (dict): Updated schema for the blueprint |
| 332 | +
|
| 333 | + Returns: |
| 334 | + dict: Updated blueprint details or None if error |
| 335 | + """ |
| 336 | + try: |
| 337 | + response = self.bedrock_client.get_blueprint( |
| 338 | + blueprintArn=blueprint_arn, blueprintStage=stage |
| 339 | + ) |
| 340 | + blueprint_response = response["blueprint"] |
| 341 | + if blueprint_response is None: |
| 342 | + raise ValueError( |
| 343 | + "Blueprint update failed. No blueprint response received." |
| 344 | + ) |
| 345 | + |
| 346 | + logger.info( |
| 347 | + f"Blueprint updated successfully: {blueprint_response['blueprintArn']}" |
| 348 | + ) |
| 349 | + return {"status": "success", "blueprint": blueprint_response} |
| 350 | + except ClientError as e: |
| 351 | + logger.error(f"Error Updating BDA blueprint: {e}") |
| 352 | + raise e |
| 353 | + except Exception as e: |
| 354 | + logger.error(f"Error updating blueprint: {e}") |
| 355 | + raise e |
| 356 | + |
| 357 | + def list_blueprints(self, projectArn, projectStage): |
| 358 | + try: |
| 359 | + project = self.bedrock_client.get_data_automation_project( |
| 360 | + projectArn=projectArn, projectStage="LIVE" |
| 361 | + ) |
| 362 | + project = project.get("project", None) |
| 363 | + logger.info(f"Updating project: {project}") |
| 364 | + customOutputConfiguration = project.get("customOutputConfiguration", None) |
| 365 | + |
| 366 | + return customOutputConfiguration |
| 367 | + |
| 368 | + except Exception as e: |
| 369 | + logger.error(f"Error updating blueprint: {e}") |
| 370 | + raise e |
| 371 | + |
| 372 | + def delete_blueprint(self, blueprint_arn, blueprint_version): |
| 373 | + try: |
| 374 | + return self.bedrock_client.delete_blueprint( |
| 375 | + blueprintArn=blueprint_arn, blueprintVersion=blueprint_version |
| 376 | + ) |
| 377 | + |
| 378 | + except Exception as e: |
| 379 | + logger.error(f"Error delete_blueprint: {e}") |
| 380 | + raise e |
0 commit comments