aws-solutions-library-samples
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config_library/pattern-1/lending-package-sample/config.yaml‎
Lines changed: 4 additions & 0 deletions b/‎config_library/pattern-1/lending-package-sample/config.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 8 additions & 0 deletions b/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 7 additions & 0 deletions b/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎idp_cli/idp_cli/cli.py‎
Lines changed: 13 additions & 0 deletions b/‎idp_cli/idp_cli/cli.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/assessment/granular_service.py‎
Lines changed: 12 additions & 0 deletions b/‎lib/idp_common_pkg/idp_common/assessment/granular_service.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/assessment/service.py‎
Lines changed: 8 additions & 0 deletions b/‎lib/idp_common_pkg/idp_common/assessment/service.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/bedrock/client.py‎
Lines changed: 36 additions & 11 deletions b/‎lib/idp_common_pkg/idp_common/bedrock/client.py‎
Lines changed: 36 additions & 11 deletions
diff --git a/‎lib/idp_common_pkg/idp_common/classification/service.py‎
Lines changed: 10 additions & 0 deletions b/‎lib/idp_common_pkg/idp_common/classification/service.py‎
Lines changed: 10 additions & 0 deletions
@@ -25,6 +25,9 @@ notebooks/examples/data
 *tmp-dev-assets*
 scratch/
 
+# Service tier implementation artifacts
+service_tier_*.md
+
 # Node.js / npm
 node_modules/
 package-lock.json
 
@@ -2,10 +2,14 @@
 # SPDX-License-Identifier: MIT-0
 
 notes: Processing configuration in BDA project.
+# Global service tier setting (priority, standard, flex)
+service_tier: "standard"
 assessment:
+  service_tier: null # null = use global service_tier
   default_confidence_threshold: '0.8'
 summarization:
   enabled: true
+  service_tier: null # null = use global service_tier
   top_p: "0.0"
   max_tokens: '4096'
   top_k: '5'
 
@@ -1,9 +1,13 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: MIT-0
 notes: Default settings for lending-package-sample configuration
+# Global service tier setting (priority, standard, flex)
+# This applies to all operations unless overridden at operation level
+service_tier: "standard"
 ocr:
   backend: "textract" # Default to Textract for backward compatibility
   model_id: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+  service_tier: null # null = use global service_tier
   system_prompt: "You are an expert OCR system. Extract all text from the provided image accurately, preserving layout where possible."
   task_prompt: "Extract all text from this document image. Preserve the layout, including paragraphs, tables, and formatting."
   features:
@@ -1189,6 +1193,7 @@ classification:
   classificationMethod: multimodalPageLevelClassification
   maxPagesForClassification: "ALL"
   sectionSplitting: llm_determined
+  service_tier: null # null = use global service_tier
   image:
     target_height: ""
     target_width: ""
@@ -1250,6 +1255,7 @@ classification:
     4. Outputting in the exact JSON format specified in <output-format>
     </final-instructions>
 extraction:
+  service_tier: null # null = use global service_tier
   agentic:
     enabled: false
     review_agent: false
@@ -1351,6 +1357,7 @@ extraction:
     You are a document assistant. Respond only with JSON. Never make up data, only provide data found in the document being provided.
 summarization:
   enabled: true
+  service_tier: null # null = use global service_tier
   top_p: "0.0"
   max_tokens: "4096"
   top_k: "5"
@@ -1425,6 +1432,7 @@ summarization:
     You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
 assessment:
   enabled: true
+  service_tier: null # null = use global service_tier
   validation_enabled: false
   image:
     target_height: ""
 
@@ -2,9 +2,12 @@
 # SPDX-License-Identifier: MIT-0
 
 notes: Default settings
+# Global service tier setting (priority, standard, flex)
+service_tier: "standard"
 ocr:
   backend: "textract" # Default to Textract for backward compatibility
   model_id: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+  service_tier: null # null = use global service_tier
   system_prompt: "You are an expert OCR system. Extract all text from the provided image accurately, preserving layout where possible."
   task_prompt: "Extract all text from this document image. Preserve the layout, including paragraphs, tables, and formatting."
   features:
@@ -765,7 +768,9 @@ classes:
           labeled 'notes', 'remarks', or 'comments'.
 classification:
   model: Custom fine tuned UDOP model
+  service_tier: null # null = use global service_tier (UDOP doesn't use Bedrock, but kept for consistency)
 extraction:
+  service_tier: null # null = use global service_tier
   image:
     target_width: ""
     target_height: ""
@@ -864,6 +869,7 @@ extraction:
     You are a document assistant. Respond only with JSON. Never make up data, only provide data found in the document being provided.
 summarization:
   enabled: true
+  service_tier: null # null = use global service_tier
   top_p: "0.0"
   max_tokens: "4096"
   top_k: "5"
@@ -926,6 +932,7 @@ summarization:
     You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
 assessment:
   enabled: true
+  service_tier: null # null = use global service_tier
   image:
     target_height: ""
     target_width: ""
 
@@ -198,6 +198,12 @@ def cli():
     "--custom-config",
     help="Path to local config file or S3 URI (e.g., ./config.yaml or s3://bucket/config.yaml)",
 )
+@click.option(
+    "--service-tier",
+    type=click.Choice(["priority", "standard", "flex"]),
+    default="standard",
+    help="Service tier for Bedrock API calls (default: standard)",
+)
 @click.option("--parameters", help="Additional parameters as key=value,key2=value2")
 @click.option("--wait", is_flag=True, help="Wait for stack creation to complete")
 @click.option(
@@ -215,6 +221,7 @@ def deploy(
     enable_hitl: str,
     pattern_config: Optional[str],
     custom_config: Optional[str],
+    service_tier: str,
     parameters: Optional[str],
     wait: bool,
     no_rollback: bool,
@@ -915,6 +922,11 @@ def rerun_inference(
     type=int,
     help="Seconds between status checks (default: 5)",
 )
+@click.option(
+    "--service-tier",
+    type=click.Choice(["priority", "standard", "flex"]),
+    help="Service tier for Bedrock API calls (overrides configuration)",
+)
 @click.option("--region", help="AWS region (optional)")
 def run_inference(
     stack_name: str,
@@ -928,6 +940,7 @@ def run_inference(
     batch_prefix: str,
     monitor: bool,
     refresh_interval: int,
+    service_tier: Optional[str],
     region: Optional[str],
 ):
     """
 
@@ -745,6 +745,7 @@ def _process_assessment_task(
         top_k: float,
         top_p: float,
         max_tokens: Optional[int],
+        service_tier: Optional[str] = None,
     ) -> AssessmentResult:
         """
         Process a single assessment task.
@@ -759,6 +760,7 @@ def _process_assessment_task(
             top_k: Top-k parameter
             top_p: Top-p parameter
             max_tokens: Max tokens parameter
+            service_tier: Service tier for Bedrock API
 
         Returns:
             Assessment result
@@ -785,6 +787,7 @@ def _process_assessment_task(
                 top_p=top_p,
                 max_tokens=max_tokens,
                 context="GranularAssessment",
+                service_tier=service_tier,
             )
 
             # Extract text from response
@@ -1584,6 +1587,13 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
             max_tokens = self.config.assessment.max_tokens
             system_prompt = self.config.assessment.system_prompt
 
+            # Get service tier from config (operation-specific or global)
+            service_tier = None
+            if hasattr(self.config.assessment, "service_tier"):
+                service_tier = self.config.assessment.service_tier
+            if not service_tier and hasattr(self.config, "service_tier"):
+                service_tier = self.config.service_tier
+
             # Get schema for this document class
             class_schema = self._get_class_schema(class_label)
             if not class_schema:
@@ -1669,6 +1679,7 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
                                 top_k,
                                 top_p,
                                 max_tokens,
+                                service_tier,
                             ): task
                             for task in tasks_to_process
                         }
@@ -1721,6 +1732,7 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
                                 top_k,
                                 top_p,
                                 max_tokens,
+                                service_tier,
                             )
                             all_task_results.append(result)
 
 
@@ -852,6 +852,13 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
             # Time the model invocation
             request_start_time = time.time()
 
+            # Get service tier from config (operation-specific or global)
+            service_tier = None
+            if hasattr(self.config.assessment, "service_tier"):
+                service_tier = self.config.assessment.service_tier
+            if not service_tier and hasattr(self.config, "service_tier"):
+                service_tier = self.config.service_tier
+
             # Invoke Bedrock with the common library
             response_with_metering = bedrock.invoke_model(
                 model_id=model_id,
@@ -862,6 +869,7 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
                 top_p=top_p,
                 max_tokens=max_tokens,
                 context="Assessment",
+                service_tier=service_tier,
             )
 
             total_duration = time.time() - request_start_time
 
@@ -8,21 +8,21 @@
 with built-in retry logic, metrics tracking, and configuration options.
 """
 
-import boto3
+import copy
 import json
-import os
-import time
 import logging
-import copy
+import os
 import random
-import socket
-from typing import Dict, Any, List, Optional, Union, Tuple, Type
+import time
+from typing import Any, Dict, List, Optional, Union
+
+import boto3
 from botocore.config import Config
 from botocore.exceptions import (
     ClientError,
-    ReadTimeoutError,
     ConnectTimeoutError,
     EndpointConnectionError,
+    ReadTimeoutError,
 )
 from urllib3.exceptions import ReadTimeoutError as Urllib3ReadTimeoutError
 
@@ -42,9 +42,11 @@ class _RequestsConnectTimeout(Exception):
 
 try:
     from requests.exceptions import (
-        ReadTimeout as RequestsReadTimeout,
         ConnectTimeout as RequestsConnectTimeout,
     )
+    from requests.exceptions import (
+        ReadTimeout as RequestsReadTimeout,
+    )
 except ImportError:
     # Fallback if requests is not available - use dummy exception classes
     RequestsReadTimeout = _RequestsReadTimeout  # type: ignore[misc,assignment]
@@ -87,6 +89,7 @@ class _RequestsConnectTimeout(Exception):
     "eu.amazon.nova-2-lite-v1:0",
 ]
 
+
 class BedrockClient:
     """Client for interacting with Amazon Bedrock models."""
 
@@ -139,6 +142,7 @@ def __call__(
         max_tokens: Optional[Union[int, str]] = None,
         max_retries: Optional[int] = None,
         context: str = "Unspecified",
+        service_tier: Optional[str] = None,
     ) -> Dict[str, Any]:
         """
         Make the instance callable with the same signature as the original function.
@@ -154,6 +158,7 @@ def __call__(
             top_p: Optional top_p parameter (float or string)
             max_tokens: Optional max_tokens parameter (int or string)
             max_retries: Optional override for the instance's max_retries setting
+            service_tier: Optional service tier (priority, standard, flex)
 
         Returns:
             Bedrock response object with metering information
@@ -173,6 +178,7 @@ def __call__(
             max_tokens=max_tokens,
             max_retries=effective_max_retries,
             context=context,
+            service_tier=service_tier,
         )
 
     def _preprocess_content_for_cachepoint(
@@ -264,6 +270,7 @@ def invoke_model(
         max_tokens: Optional[Union[int, str]] = None,
         max_retries: Optional[int] = None,
         context: str = "Unspecified",
+        service_tier: Optional[str] = None,
     ) -> Dict[str, Any]:
         """
         Invoke a Bedrock model with retry logic.
@@ -277,6 +284,7 @@ def invoke_model(
             top_p: Optional top_p parameter (float or string)
             max_tokens: Optional max_tokens parameter (int or string)
             max_retries: Optional override for the instance's max_retries setting
+            service_tier: Optional service tier (priority, standard, flex)
 
         Returns:
             Bedrock response object with metering information
@@ -368,9 +376,7 @@ def invoke_model(
                 inference_config["topP"] = top_p
                 # Remove temperature when using top_p to avoid conflicts
                 del inference_config["temperature"]
-                logger.debug(
-                    f"Using top_p={top_p} for inference (temperature ignored)"
-                )
+                logger.debug(f"Using top_p={top_p} for inference (temperature ignored)")
             else:
                 logger.debug(
                     f"Using temperature={temperature} for inference (top_p is 0 or None)"
@@ -438,6 +444,20 @@ def invoke_model(
         if not additional_model_fields:
             additional_model_fields = None
 
+        # Normalize and validate service tier
+        normalized_service_tier = None
+        if service_tier:
+            tier_lower = service_tier.lower().strip()
+            if tier_lower in ["priority", "flex"]:
+                normalized_service_tier = tier_lower
+            elif tier_lower in ["standard", "default"]:
+                normalized_service_tier = "default"
+            else:
+                logger.warning(
+                    f"Invalid service_tier value '{service_tier}'. "
+                    f"Valid values are: priority, standard, flex. Using default tier."
+                )
+
         # Get guardrail configuration if available
         guardrail_config = self.get_guardrail_config()
 
@@ -450,6 +470,11 @@ def invoke_model(
             "additionalModelRequestFields": additional_model_fields,
         }
 
+        # Add service tier if specified
+        if normalized_service_tier:
+            converse_params["serviceTier"] = normalized_service_tier
+            logger.info(f"Using service tier: {normalized_service_tier}")
+
         # Add guardrail config if available
         if guardrail_config:
             converse_params["guardrailConfig"] = guardrail_config
 
@@ -594,6 +594,10 @@ def _get_classification_config(self) -> Dict[str, Any]:
             "max_tokens": self.config.classification.max_tokens,
         }
 
+        # Add service tier (operation-specific or global)
+        if hasattr(self.config.classification, "service_tier"):
+            config["service_tier"] = self.config.classification.service_tier
+
         # Validate system prompt
         system_prompt = self.config.classification.system_prompt
         if not system_prompt:
@@ -1222,6 +1226,11 @@ def _invoke_bedrock_model(
         Returns:
             Dictionary with response and metering data
         """
+        # Get service tier from config (operation-specific or global)
+        service_tier = config.get("service_tier")
+        if not service_tier and hasattr(self.config, "service_tier"):
+            service_tier = self.config.service_tier
+
         return bedrock.invoke_model(
             model_id=config["model_id"],
             system_prompt=config["system_prompt"],
@@ -1231,6 +1240,7 @@ def _invoke_bedrock_model(
             top_p=config["top_p"],
             max_tokens=config["max_tokens"],
             context="Classification",
+            service_tier=service_tier,
         )
 
     def _create_unclassified_result(