Skip to content

Commit 8567d88

Browse files
committed
added validation for json file valdation, updated discovery config to add claude 4, fixed BDA to handle boolean type
1 parent 553155a commit 8567d88

File tree

12 files changed

+121
-97
lines changed

12 files changed

+121
-97
lines changed

lib/idp_common_pkg/idp_common/bda/schema_converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def _create_field_schema(self, field: Dict[str, Any]) -> Dict[str, Any]:
128128
"number": "number",
129129
"currency": "number",
130130
"checkbox": "boolean",
131+
"boolean": "boolean",
131132
"date": "string",
132133
"table": "object",
133134
}

lib/idp_common_pkg/idp_common/discovery/classes_discovery.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ def __init__(
2121
input_bucket: str,
2222
input_prefix: str,
2323
config: Optional[dict] = None,
24-
bedrock_model_id: Optional[str] = None, # Keep for backward compatibility
2524
region: Optional[str] = "us-west-2",
2625
):
2726
self.input_bucket = input_bucket
@@ -38,11 +37,6 @@ def __init__(
3837
self.without_gt_config = self.discovery_config.get("without_ground_truth", {})
3938
self.with_gt_config = self.discovery_config.get("with_ground_truth", {})
4039

41-
# Backward compatibility: use bedrock_model_id if provided
42-
if bedrock_model_id:
43-
self.without_gt_config["model_id"] = bedrock_model_id
44-
self.with_gt_config["model_id"] = bedrock_model_id
45-
4640
# Initialize Bedrock client using the common pattern
4741
self.bedrock_client = bedrock.BedrockClient(region=self.region)
4842

lib/idp_common_pkg/tests/integration/test_discovery_config_integration.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -313,34 +313,8 @@ def test_config_validation_and_defaults(
313313
self.assertEqual(top_p, 0.1)
314314
self.assertEqual(max_tokens, 10000)
315315

316-
@patch("idp_common.discovery.classes_discovery.boto3.resource")
317-
@patch("idp_common.discovery.classes_discovery.bedrock.BedrockClient")
318-
def test_backward_compatibility_override(
319-
self, mock_bedrock_client, mock_boto3_resource
320-
):
321-
"""Test that legacy bedrock_model_id parameter overrides config."""
322-
# Setup mocks
323-
mock_table = Mock()
324-
mock_boto3_resource.return_value.Table.return_value = mock_table
325-
326-
legacy_model_id = "legacy-model-override"
327-
328-
# Initialize with both config and legacy parameter
329-
discovery = ClassesDiscovery(
330-
input_bucket=self.test_bucket,
331-
input_prefix=self.test_prefix,
332-
config=self.config_dict,
333-
bedrock_model_id=legacy_model_id, # This should override config
334-
region=self.test_region,
335-
)
336-
337-
# Verify legacy parameter overrides config
338-
self.assertEqual(discovery.without_gt_config["model_id"], legacy_model_id)
339-
self.assertEqual(discovery.with_gt_config["model_id"], legacy_model_id)
340-
341-
# But other config values should remain
342-
self.assertEqual(discovery.without_gt_config["temperature"], 0.8)
343-
self.assertEqual(discovery.with_gt_config["temperature"], 0.6)
316+
# Note: bedrock_model_id parameter was removed from ClassesDiscovery constructor
317+
# Model configuration is now handled through the config parameter only
344318

345319
def test_yaml_config_parsing(self):
346320
"""Test that YAML configuration is parsed correctly."""

lib/idp_common_pkg/tests/unit/discovery/test_classes_discovery.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def service(self):
123123
service = ClassesDiscovery(
124124
input_bucket="test-bucket",
125125
input_prefix="test-document.pdf",
126-
bedrock_model_id="anthropic.claude-3-sonnet-20240229-v1:0",
127126
region="us-west-2",
128127
)
129128

@@ -143,7 +142,6 @@ def test_init(self):
143142
service = ClassesDiscovery(
144143
input_bucket="test-bucket",
145144
input_prefix="test-document.pdf",
146-
bedrock_model_id="anthropic.claude-3-sonnet-20240229-v1:0",
147145
region="us-west-2",
148146
)
149147

@@ -180,7 +178,6 @@ def test_init_with_default_region(self):
180178
service = ClassesDiscovery(
181179
input_bucket="test-bucket",
182180
input_prefix="test-document.pdf",
183-
bedrock_model_id="anthropic.claude-3-sonnet-20240229-v1:0",
184181
region=None, # Explicitly pass None to trigger environment lookup
185182
)
186183

lib/idp_common_pkg/tests/unit/discovery/test_classes_discovery_config.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -108,30 +108,8 @@ def test_init_without_config_uses_defaults(
108108
self.assertIn("with_ground_truth", discovery.discovery_config)
109109
self.assertIn("output_format", discovery.discovery_config)
110110

111-
@patch("idp_common.discovery.classes_discovery.boto3.resource")
112-
@patch("idp_common.discovery.classes_discovery.bedrock.BedrockClient")
113-
def test_backward_compatibility_with_bedrock_model_id(
114-
self, mock_bedrock_client, mock_boto3_resource
115-
):
116-
"""Test backward compatibility when bedrock_model_id is provided."""
117-
# Setup mocks
118-
mock_table = Mock()
119-
mock_boto3_resource.return_value.Table.return_value = mock_table
120-
121-
test_model_id = "legacy-model-id"
122-
123-
# Initialize with legacy bedrock_model_id parameter
124-
discovery = ClassesDiscovery(
125-
input_bucket=self.test_bucket,
126-
input_prefix=self.test_prefix,
127-
config=self.sample_config,
128-
bedrock_model_id=test_model_id,
129-
region=self.test_region,
130-
)
131-
132-
# Verify legacy model_id overrides config
133-
self.assertEqual(discovery.without_gt_config["model_id"], test_model_id)
134-
self.assertEqual(discovery.with_gt_config["model_id"], test_model_id)
111+
# Note: bedrock_model_id parameter was removed from ClassesDiscovery constructor
112+
# Configuration is now handled through the config parameter
135113

136114
@patch("idp_common.discovery.classes_discovery.boto3.resource")
137115
@patch("idp_common.discovery.classes_discovery.bedrock.BedrockClient")

lib/idp_common_pkg/tests/unit/discovery/test_classes_discovery_integration.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ def service_with_mocks(self):
162162
service = ClassesDiscovery(
163163
input_bucket="test-discovery-bucket",
164164
input_prefix="forms/w4-sample.pdf",
165-
bedrock_model_id="anthropic.claude-3-sonnet-20240229-v1:0",
166165
region="us-west-2",
167166
)
168167

patterns/pattern-1/template.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ Resources:
457457
model_id:
458458
type: string
459459
description: Bedrock model ID for discovery without ground truth
460-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
461-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
460+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
461+
default: "us.amazon.nova-pro-v1:0"
462462
order: 0
463463
temperature:
464464
type: number
@@ -502,8 +502,8 @@ Resources:
502502
model_id:
503503
type: string
504504
description: Bedrock model ID for discovery with ground truth
505-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
506-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
505+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
506+
default: "us.amazon.nova-pro-v1:0"
507507
order: 0
508508
temperature:
509509
type: number
@@ -724,7 +724,6 @@ Resources:
724724
METRIC_NAMESPACE: !Ref StackName
725725
STACK_NAME: !Ref StackName
726726
LOG_LEVEL: !Ref LogLevel
727-
BEDROCK_MODEL_ID: !Ref BedrockModelId
728727
DISCOVERY_TRACKING_TABLE: !Ref DiscoveryTrackingTable
729728
CONFIGURATION_TABLE_NAME: !Ref ConfigurationTable
730729
BDA_PROJECT_ARN: !Ref BDAProjectArn

patterns/pattern-2/template.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,8 @@ Resources:
830830
model_id:
831831
type: string
832832
description: Bedrock model ID for discovery without ground truth
833-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
834-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
833+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
834+
default: "us.amazon.nova-pro-v1:0"
835835
order: 0
836836
temperature:
837837
type: number
@@ -875,8 +875,8 @@ Resources:
875875
model_id:
876876
type: string
877877
description: Bedrock model ID for discovery with ground truth
878-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
879-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
878+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
879+
default: "us.amazon.nova-pro-v1:0"
880880
order: 0
881881
temperature:
882882
type: number

patterns/pattern-3/template.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -739,8 +739,8 @@ Resources:
739739
model_id:
740740
type: string
741741
description: Bedrock model ID for discovery without ground truth
742-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
743-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
742+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
743+
default: "us.amazon.nova-pro-v1:0"
744744
order: 0
745745
temperature:
746746
type: number
@@ -784,8 +784,8 @@ Resources:
784784
model_id:
785785
type: string
786786
description: Bedrock model ID for discovery with ground truth
787-
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0"]
788-
default: "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
787+
enum: ["us.amazon.nova-lite-v1:0", "us.amazon.nova-pro-v1:0", "us.amazon.nova-premier-v1:0", "us.anthropic.claude-3-haiku-20240307-v1:0", "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0", "us.anthropic.claude-sonnet-4-20250514-v1:0:1m", "us.anthropic.claude-opus-4-20250514-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0"]
788+
default: "us.amazon.nova-pro-v1:0"
789789
order: 0
790790
temperature:
791791
type: number

src/ui/src/components/common/utilities.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,32 @@ export const getTimestampStr = () => {
1515
const formattedDate = `${year}-${month}-${day}-${hour}:${minute}:${second}.${millisecond}`;
1616
return formattedDate;
1717
};
18+
19+
export const getJsonValidationError = (error) => {
20+
const message = error.message || error.toString();
21+
22+
// Common JSON syntax errors with user-friendly messages
23+
if (message.includes('Unexpected token')) {
24+
const match = message.match(/Unexpected token (.+?) in JSON at position (\d+)/);
25+
if (match) {
26+
const [, token, position] = match;
27+
return `Invalid character '${token}' found at position ${position}. Check for missing quotes, commas, or brackets.`;
28+
}
29+
return 'Invalid JSON syntax. Check for missing quotes, commas, or brackets.';
30+
}
31+
32+
if (message.includes('Unexpected end of JSON input')) {
33+
return 'Incomplete JSON file. The file appears to be cut off or missing closing brackets.';
34+
}
35+
36+
if (message.includes('Expected property name or')) {
37+
return 'Invalid property name. Property names must be enclosed in double quotes.';
38+
}
39+
40+
if (message.includes('Unexpected string in JSON')) {
41+
return 'Invalid string format. Check for unescaped quotes or missing commas between properties.';
42+
}
43+
44+
// Return the original message if we can't provide a better one
45+
return message;
46+
};

0 commit comments

Comments
 (0)