Skip to content

Commit 657b707

Browse files
committed
Merge branch 'feature/test-set-improvements' into 'develop'
feat: improve test set file copying and UI selection handling See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!450
2 parents 073c460 + d6becd8 commit 657b707

File tree

4 files changed

+177
-49
lines changed

4 files changed

+177
-49
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
4+
import pytest
5+
6+
7+
@pytest.mark.unit
8+
def test_path_extraction_logic():
9+
"""Test the path extraction logic for testset bucket files"""
10+
# Simulate the path extraction logic from _copy_input_files_from_test_set_bucket
11+
file_key = "fcc_benchmark/input/fcc_benchmark/033f718b16cb597c065930410752c294.pdf"
12+
test_set_id = "fcc_demo_test_set"
13+
14+
# Extract actual file path from test_set/input/file_path
15+
path_parts = file_key.split("/")
16+
if len(path_parts) >= 3 and path_parts[1] == "input":
17+
actual_file_path = "/".join(path_parts[2:])
18+
dest_key = f"{test_set_id}/input/{actual_file_path}"
19+
else:
20+
dest_key = f"{test_set_id}/input/{file_key}"
21+
22+
expected = (
23+
"fcc_demo_test_set/input/fcc_benchmark/033f718b16cb597c065930410752c294.pdf"
24+
)
25+
assert dest_key == expected
26+
27+
28+
@pytest.mark.unit
29+
def test_baseline_path_extraction_logic():
30+
"""Test the path extraction logic for baseline files from testset bucket"""
31+
# Simulate the path extraction logic from _copy_baseline_from_testset
32+
file_key = "fcc_benchmark/input/fcc_benchmark/033f718b16cb597c065930410752c294.pdf"
33+
test_set_id = "demo_test_set"
34+
35+
# Extract test set name and file name from path (format: test_set_name/input/file_name)
36+
path_parts = file_key.split("/")
37+
if len(path_parts) >= 3 and path_parts[1] == "input":
38+
source_test_set_name = path_parts[0]
39+
file_name = "/".join(path_parts[2:]) # Get full path after 'input/'
40+
41+
# Source baseline path in testset bucket
42+
source_baseline_prefix = f"{source_test_set_name}/baseline/{file_name}/"
43+
# Destination baseline path
44+
dest_baseline_prefix = f"{test_set_id}/baseline/{file_name}/"
45+
46+
expected_source = (
47+
"fcc_benchmark/baseline/fcc_benchmark/033f718b16cb597c065930410752c294.pdf/"
48+
)
49+
expected_dest = (
50+
"demo_test_set/baseline/fcc_benchmark/033f718b16cb597c065930410752c294.pdf/"
51+
)
52+
53+
assert source_baseline_prefix == expected_source
54+
assert dest_baseline_prefix == expected_dest
55+
56+
57+
@pytest.mark.unit
58+
def test_path_extraction_edge_cases():
59+
"""Test edge cases for path extraction"""
60+
test_set_id = "test-set-1"
61+
62+
# Test normal file without input path
63+
file_key = "simple_file.pdf"
64+
path_parts = file_key.split("/")
65+
if len(path_parts) >= 3 and path_parts[1] == "input":
66+
actual_file_path = "/".join(path_parts[2:])
67+
dest_key = f"{test_set_id}/input/{actual_file_path}"
68+
else:
69+
dest_key = f"{test_set_id}/input/{file_key}"
70+
71+
assert dest_key == "test-set-1/input/simple_file.pdf"
72+
73+
# Test malformed path
74+
file_key = "malformed/path.pdf"
75+
path_parts = file_key.split("/")
76+
if len(path_parts) >= 3 and path_parts[1] == "input":
77+
actual_file_path = "/".join(path_parts[2:])
78+
dest_key = f"{test_set_id}/input/{actual_file_path}"
79+
else:
80+
dest_key = f"{test_set_id}/input/{file_key}"
81+
82+
assert dest_key == "test-set-1/input/malformed/path.pdf"

src/lambda/test_set_file_copier/index.py

Lines changed: 76 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def handler(event, context):
6060
path_parts = file_key.split('/')
6161
if len(path_parts) >= 3 and path_parts[1] == 'input':
6262
test_set_name = path_parts[0]
63-
file_name = path_parts[2]
63+
file_name = '/'.join(path_parts[2:]) # Get full path after 'input/'
6464
baseline_prefix = f"{test_set_name}/baseline/{file_name}/"
6565
baseline_check_bucket = source_bucket
6666
else:
@@ -84,14 +84,20 @@ def handler(event, context):
8484
if missing_baselines:
8585
raise ValueError(f"Missing baseline folders for: {', '.join(missing_baselines)}")
8686

87+
# Update status to COPYING before starting file operations
88+
_update_test_set_status(tracking_table, test_set_id, 'COPYING')
89+
8790
# Copy input files to test set bucket
88-
_copy_files_to_test_set(source_bucket, test_set_bucket, test_set_id, 'input', matching_files)
91+
if bucket_type == 'testset':
92+
_copy_input_files_from_test_set_bucket(test_set_id, matching_files)
93+
else:
94+
_copy_input_files_from_input_bucket(test_set_id, matching_files)
8995

9096
# Copy baseline folders to test set bucket
9197
if bucket_type == 'testset':
92-
_copy_baseline_from_testset(source_bucket, test_set_bucket, test_set_id, matching_files)
98+
_copy_baseline_from_testset(test_set_id, matching_files)
9399
else:
94-
_copy_files_to_test_set(baseline_bucket, test_set_bucket, test_set_id, 'baseline', matching_files)
100+
_copy_baseline_from_baseline_bucket(test_set_id, matching_files)
95101

96102
logger.info(f"Copied {len(matching_files)} input files and {len(matching_files)} baseline folders")
97103

@@ -107,57 +113,79 @@ def handler(event, context):
107113

108114
return {'statusCode': 200}
109115

110-
def _copy_files_to_test_set(source_bucket, dest_bucket, test_set_id, folder_type, files):
111-
"""Copy files from source bucket to test set bucket folder"""
116+
def _copy_input_files_from_input_bucket(test_set_id, files):
117+
"""Copy input files from input bucket to test set bucket"""
118+
input_bucket = os.environ['INPUT_BUCKET']
119+
test_set_bucket = os.environ['TEST_SET_BUCKET']
112120

113121
for file_key in files:
114-
if folder_type == 'baseline':
115-
# For baseline, copy entire folder structure
116-
baseline_prefix = f"{file_key}/"
117-
dest_prefix = f"{test_set_id}/baseline/{file_key}/"
118-
119-
# List all objects in the baseline folder
120-
paginator = s3.get_paginator('list_objects_v2')
121-
pages = paginator.paginate(Bucket=source_bucket, Prefix=baseline_prefix)
122-
123-
for page in pages:
124-
if 'Contents' in page:
125-
for obj in page['Contents']:
126-
source_key = obj['Key']
127-
# Replace the baseline prefix with the test set prefix
128-
dest_key = source_key.replace(baseline_prefix, dest_prefix, 1)
129-
130-
# Copy file
131-
s3.copy_object(
132-
CopySource={'Bucket': source_bucket, 'Key': source_key},
133-
Bucket=dest_bucket,
134-
Key=dest_key
135-
)
136-
137-
logger.info(f"Copied baseline file: {source_key} -> {dest_bucket}/{dest_key}")
122+
dest_key = f"{test_set_id}/input/{file_key}"
123+
124+
s3.copy_object(
125+
CopySource={'Bucket': input_bucket, 'Key': file_key},
126+
Bucket=test_set_bucket,
127+
Key=dest_key
128+
)
129+
130+
logger.info(f"Copied input file: {file_key} -> {test_set_bucket}/{dest_key}")
131+
132+
def _copy_input_files_from_test_set_bucket(test_set_id, files):
133+
"""Copy input files from test set bucket to new test set folder"""
134+
test_set_bucket = os.environ['TEST_SET_BUCKET']
135+
136+
for file_key in files:
137+
# Extract actual file path from test_set/input/file_path
138+
path_parts = file_key.split('/')
139+
if len(path_parts) >= 3 and path_parts[1] == 'input':
140+
actual_file_path = '/'.join(path_parts[2:])
141+
dest_key = f"{test_set_id}/input/{actual_file_path}"
138142
else:
139-
# For input files, copy individual file
140-
source_key = file_key
141-
dest_key = f"{test_set_id}/{folder_type}/{file_key}"
142-
143-
# Copy file
144-
s3.copy_object(
145-
CopySource={'Bucket': source_bucket, 'Key': source_key},
146-
Bucket=dest_bucket,
147-
Key=dest_key
148-
)
149-
150-
logger.info(f"Copied {folder_type} file: {source_key} -> {dest_bucket}/{dest_key}")
143+
dest_key = f"{test_set_id}/input/{file_key}"
144+
145+
s3.copy_object(
146+
CopySource={'Bucket': test_set_bucket, 'Key': file_key},
147+
Bucket=test_set_bucket,
148+
Key=dest_key
149+
)
150+
151+
logger.info(f"Copied input file: {file_key} -> {test_set_bucket}/{dest_key}")
152+
153+
def _copy_baseline_from_baseline_bucket(test_set_id, files):
154+
"""Copy baseline folders from baseline bucket to test set bucket"""
155+
baseline_bucket = os.environ['BASELINE_BUCKET']
156+
test_set_bucket = os.environ['TEST_SET_BUCKET']
157+
158+
for file_key in files:
159+
baseline_prefix = f"{file_key}/"
160+
dest_prefix = f"{test_set_id}/baseline/{file_key}/"
161+
162+
paginator = s3.get_paginator('list_objects_v2')
163+
pages = paginator.paginate(Bucket=baseline_bucket, Prefix=baseline_prefix)
164+
165+
for page in pages:
166+
if 'Contents' in page:
167+
for obj in page['Contents']:
168+
source_key = obj['Key']
169+
dest_key = source_key.replace(baseline_prefix, dest_prefix, 1)
170+
171+
s3.copy_object(
172+
CopySource={'Bucket': baseline_bucket, 'Key': source_key},
173+
Bucket=test_set_bucket,
174+
Key=dest_key
175+
)
176+
177+
logger.info(f"Copied baseline file: {source_key} -> {test_set_bucket}/{dest_key}")
151178

152-
def _copy_baseline_from_testset(source_bucket, dest_bucket, test_set_id, files):
179+
def _copy_baseline_from_testset(test_set_id, files):
153180
"""Copy baseline files from testset bucket where baselines are in test_set/baseline/ path"""
181+
test_set_bucket = os.environ['TEST_SET_BUCKET']
154182

155183
for file_key in files:
156184
# Extract test set name and file name from path (format: test_set_name/input/file_name)
157185
path_parts = file_key.split('/')
158186
if len(path_parts) >= 3 and path_parts[1] == 'input':
159187
source_test_set_name = path_parts[0]
160-
file_name = path_parts[2]
188+
file_name = '/'.join(path_parts[2:]) # Get full path after 'input/'
161189

162190
# Source baseline path in testset bucket
163191
source_baseline_prefix = f"{source_test_set_name}/baseline/{file_name}/"
@@ -166,7 +194,7 @@ def _copy_baseline_from_testset(source_bucket, dest_bucket, test_set_id, files):
166194

167195
# List all objects in the source baseline folder
168196
paginator = s3.get_paginator('list_objects_v2')
169-
pages = paginator.paginate(Bucket=source_bucket, Prefix=source_baseline_prefix)
197+
pages = paginator.paginate(Bucket=test_set_bucket, Prefix=source_baseline_prefix)
170198

171199
for page in pages:
172200
if 'Contents' in page:
@@ -177,12 +205,12 @@ def _copy_baseline_from_testset(source_bucket, dest_bucket, test_set_id, files):
177205

178206
# Copy file
179207
s3.copy_object(
180-
CopySource={'Bucket': source_bucket, 'Key': source_key},
181-
Bucket=dest_bucket,
208+
CopySource={'Bucket': test_set_bucket, 'Key': source_key},
209+
Bucket=test_set_bucket,
182210
Key=dest_key
183211
)
184212

185-
logger.info(f"Copied testset baseline file: {source_key} -> {dest_bucket}/{dest_key}")
213+
logger.info(f"Copied testset baseline file: {source_key} -> {test_set_bucket}/{dest_key}")
186214
else:
187215
logger.warning(f"Unexpected file path format for testset baseline: {file_key}")
188216

src/ui/src/components/test-studio/TestSets.jsx

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ const TestSets = () => {
8282
}
8383
};
8484

85+
// Preserve selections when testSets array changes
86+
React.useEffect(() => {
87+
if (selectedItems.length > 0) {
88+
const selectedIds = new Set(selectedItems.map((item) => item.id));
89+
const updatedSelections = testSets.filter((ts) => selectedIds.has(ts.id));
90+
if (updatedSelections.length !== selectedItems.length || !updatedSelections.every((item, index) => item === selectedItems[index])) {
91+
setSelectedItems(updatedSelections);
92+
}
93+
}
94+
}, [testSets]);
95+
8596
React.useEffect(() => {
8697
loadTestSets();
8798
}, []);
@@ -518,6 +529,7 @@ const TestSets = () => {
518529
selectedItems={selectedItems}
519530
onSelectionChange={({ detail }) => setSelectedItems(detail.selectedItems)}
520531
selectionType="multi"
532+
isItemDisabled={(item) => item.status !== 'COMPLETED' && item.status !== 'FAILED'}
521533
empty={
522534
<Box textAlign="center" color="inherit">
523535
<b>No test sets</b>
@@ -837,7 +849,7 @@ const TestSets = () => {
837849
>
838850
<Box>
839851
{matchingFiles.length > 0 ? (
840-
<ul>
852+
<ul style={{ fontSize: '12px' }}>
841853
{matchingFiles.map((file) => (
842854
<li key={file}>{file}</li>
843855
))}

template.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9020,6 +9020,12 @@ Outputs:
90209020
S3ConfigurationBucketConsoleURL:
90219021
Description: Configuration S3 bucket console URL
90229022
Value: !Sub https://s3.console.aws.amazon.com/s3/buckets/${ConfigurationBucket}
9023+
S3TestSetBucketName:
9024+
Description: Test Set S3 bucket name
9025+
Value: !Ref TestSetBucket
9026+
S3TestSetBucketConsoleURL:
9027+
Description: Test Set S3 bucket console URL
9028+
Value: !Sub https://s3.console.aws.amazon.com/s3/buckets/${TestSetBucket}
90239029
StateMachineArn:
90249030
Description: Step Functions State machine ARN
90259031
Value: !If

0 commit comments

Comments
 (0)