Skip to content

Commit 9410787

Browse files
author
Taniya Mathur
committed
feat: migrate from poetry-based to idp-cli deployment approach
Replace old poetry/make workflow with new idp-cli scripts while maintaining backward compatibility
1 parent cc2f0d6 commit 9410787

27 files changed

+537
-10162
lines changed

.gitlab-ci.yml

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,34 @@ developer_tests:
4949
junit: lib/idp_common_pkg/test-reports/test-results.xml
5050
expire_in: 1 week
5151

52+
deployment_validation:
53+
stage: deployment_validation
54+
rules:
55+
- when: on_success
56+
57+
before_script:
58+
- apt-get update -y
59+
- apt-get install curl unzip python3-pip -y
60+
# Install AWS CLI
61+
- curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
62+
- unzip awscliv2.zip
63+
- ./aws/install
64+
# Install PyYAML for template analysis
65+
- pip install PyYAML
66+
67+
script:
68+
# Check if service role has sufficient permissions for main stack deployment
69+
- python3 scripts/validate_service_role_permissions.py
70+
5271
integration_tests:
5372
stage: integration_tests
73+
timeout: 2h
5474
# variables:
5575
# # In order to run tests in another account, add a AWS_CREDS_TARGET_ROLE variable to the Gitlab pipeline variables.
5676
# AWS_CREDS_TARGET_ROLE: ${AWS_CREDS_TARGET_ROLE}
5777
# AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION}
5878
# IDP_ACCOUNT_ID: ${IDP_ACCOUNT_ID}
5979

60-
# Add rules to only run on develop branch
6180
# Add rules to only run on develop branch
6281
rules:
6382
- if: $CI_COMMIT_BRANCH == "develop"
@@ -73,44 +92,23 @@ integration_tests:
7392
- when: manual
7493

7594
before_script:
76-
- python --version
7795
- apt-get update -y
78-
- apt-get install zip unzip curl make -y
79-
80-
# Install Poetry
81-
- curl -sSL https://install.python-poetry.org | python3 -
82-
- export PATH="/root/.local/bin:$PATH"
83-
- poetry --version
84-
96+
- apt-get install zip unzip curl python3-pip -y
8597
# Install AWS CLI
8698
- curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
8799
- unzip awscliv2.zip
88100
- ./aws/install
101+
# Install boto3 for Python script
102+
- pip install boto3
89103

90104
script:
91105
- aws --version
92106
- aws sts get-caller-identity --no-cli-pager
93-
- cd ./scripts/sdlc/idp-cli
94-
- poetry install
95-
- make put
96-
- make wait
97-
98-
deployment_validation:
99-
stage: deployment_validation
100-
rules:
101-
- when: on_success
102-
103-
before_script:
104-
- apt-get update -y
105-
- apt-get install curl unzip python3-pip -y
106-
# Install AWS CLI
107-
- curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
108-
- unzip awscliv2.zip
109-
- ./aws/install
110-
# Install PyYAML for template analysis
111-
- pip install PyYAML
112-
113-
script:
114-
# Check if service role has sufficient permissions for main stack deployment
115-
- python3 scripts/validate_service_role_permissions.py
116-
107+
108+
# Set environment variables for Python script
109+
- export IDP_ACCOUNT_ID=${IDP_ACCOUNT_ID:-020432867916}
110+
- export AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-us-east-1}
111+
- export IDP_PIPELINE_NAME=idp-sdlc-deploy-pipeline
112+
113+
# Run integration test deployment
114+
- python3 scripts/integration_test_deployment.py

scripts/codebuild_deployment.py

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
#!/usr/bin/env python3
2+
"""
3+
CodeBuild Deployment Script
4+
5+
Handles IDP stack deployment and testing in AWS CodeBuild environment.
6+
"""
7+
8+
import os
9+
import re
10+
import subprocess
11+
import sys
12+
from concurrent.futures import ThreadPoolExecutor, as_completed
13+
from datetime import datetime
14+
15+
# Configuration for patterns to deploy
16+
DEPLOY_PATTERNS = [
17+
{
18+
"name": "Pattern 1 - BDA",
19+
"id": "pattern-1",
20+
"suffix": "p1",
21+
"sample_file": "lending_package.pdf",
22+
"verify_string": "ANYTOWN, USA 12345",
23+
"result_location": "pages/0/result.json",
24+
"content_path": "pages.0.representation.markdown",
25+
},
26+
{
27+
"name": "Pattern 2 - OCR + Bedrock",
28+
"id": "pattern-2",
29+
"suffix": "p2",
30+
"sample_file": "lending_package.pdf",
31+
"verify_string": "ANYTOWN, USA 12345",
32+
"result_location": "pages/1/result.json",
33+
"content_path": "text",
34+
},
35+
# {"name": "Pattern 3 - UDOP + Bedrock", "id": "pattern-3", "suffix": "p3", "sample_file": "rvl_cdip_package.pdf", "verify_string": "WESTERN DARK FIRED TOBACCO GROWERS", "result_location": "pages/1/result.json", "content_path": "text"},
36+
]
37+
38+
39+
def run_command(cmd, check=True):
40+
"""Run shell command and return result"""
41+
print(f"Running: {cmd}")
42+
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
43+
if result.stdout:
44+
print(result.stdout)
45+
if result.stderr:
46+
print(result.stderr, file=sys.stderr)
47+
if check and result.returncode != 0:
48+
print(f"Command failed with exit code {result.returncode}")
49+
sys.exit(1)
50+
return result
51+
52+
53+
def get_env_var(name, default=None):
54+
"""Get environment variable with optional default"""
55+
value = os.environ.get(name, default)
56+
if value is None:
57+
print(f"Error: Environment variable {name} is required")
58+
sys.exit(1)
59+
return value
60+
61+
62+
def generate_stack_prefix():
63+
"""Generate unique stack prefix with timestamp"""
64+
timestamp = datetime.now().strftime("%m%d-%H%M") # Shorter format: MMDD-HHMM
65+
return f"idp-{timestamp}"
66+
67+
68+
def publish_templates():
69+
"""Run publish.py to build and upload templates to S3"""
70+
print("📦 Publishing templates to S3...")
71+
72+
# Get AWS account ID and region
73+
account_id = get_env_var("IDP_ACCOUNT_ID", "020432867916")
74+
region = get_env_var("AWS_DEFAULT_REGION", "us-east-1")
75+
76+
# Generate bucket name and prefix
77+
bucket_basename = f"idp-sdlc-sourcecode-{account_id}-{region}"
78+
prefix = f"codebuild-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
79+
80+
# Run publish.py
81+
cmd = f"python3 publish.py {bucket_basename} {prefix} {region}"
82+
result = run_command(cmd)
83+
84+
# Extract template URL from output
85+
template_url_pattern = (
86+
r"https://s3\.[^/]+\.amazonaws\.com/[^/]+/[^/]+/idp-main\.yaml"
87+
)
88+
template_url_match = re.search(template_url_pattern, result.stdout)
89+
90+
if template_url_match:
91+
template_url = template_url_match.group(0)
92+
print(f"✅ Template published: {template_url}")
93+
return template_url
94+
else:
95+
print("❌ Failed to extract template URL from publish output")
96+
sys.exit(1)
97+
98+
99+
def deploy_and_test_pattern(stack_prefix, pattern_config, admin_email, template_url):
100+
"""Deploy and test a specific IDP pattern"""
101+
pattern_name = pattern_config["name"]
102+
pattern_id = pattern_config["id"]
103+
pattern_suffix = pattern_config["suffix"]
104+
sample_file = pattern_config["sample_file"]
105+
verify_string = pattern_config["verify_string"]
106+
result_location = pattern_config["result_location"]
107+
content_path = pattern_config["content_path"]
108+
109+
stack_name = f"{stack_prefix}-{pattern_suffix}"
110+
batch_id = f"test-{pattern_suffix}"
111+
112+
print(f"[{pattern_name}] Starting deployment: {stack_name}")
113+
114+
try:
115+
# Step 1: Deploy using template URL
116+
print(f"[{pattern_name}] Step 1: Deploying stack...")
117+
cmd = f"idp-cli deploy --stack-name {stack_name} --template-url {template_url} --pattern {pattern_id} --admin-email {admin_email} --wait"
118+
run_command(cmd)
119+
print(f"[{pattern_name}] ✅ Deployment completed")
120+
121+
# Step 2: Test stack status
122+
print(f"[{pattern_name}] Step 2: Verifying stack status...")
123+
cmd = f"aws cloudformation describe-stacks --stack-name {stack_name} --query 'Stacks[0].StackStatus' --output text"
124+
result = run_command(cmd)
125+
126+
if "COMPLETE" not in result.stdout:
127+
print(f"[{pattern_name}] ❌ Stack status: {result.stdout.strip()}")
128+
return {
129+
"stack_name": stack_name,
130+
"pattern_name": pattern_name,
131+
"success": False,
132+
}
133+
134+
print(f"[{pattern_name}] ✅ Stack is healthy")
135+
136+
# Step 3: Run inference test
137+
print(f"[{pattern_name}] Step 3: Running inference test with {sample_file}...")
138+
cmd = f"idp-cli run-inference --stack-name {stack_name} --dir samples --file-pattern {sample_file} --batch-id {batch_id} --monitor"
139+
run_command(cmd)
140+
print(f"[{pattern_name}] ✅ Inference completed")
141+
142+
# Step 4: Download and verify results
143+
print(f"[{pattern_name}] Step 4: Downloading results...")
144+
results_dir = f"/tmp/results-{pattern_suffix}"
145+
146+
cmd = f"idp-cli download-results --stack-name {stack_name} --batch-id {batch_id} --output-dir {results_dir}"
147+
run_command(cmd)
148+
149+
# Step 5: Verify result content
150+
print(f"[{pattern_name}] Step 5: Verifying result content...")
151+
152+
# Find the result file at the specified location
153+
cmd = f"find {results_dir} -path '*/{result_location}' | head -1"
154+
result = run_command(cmd)
155+
result_file = result.stdout.strip()
156+
157+
if not result_file:
158+
print(f"[{pattern_name}] ❌ No result file found at {result_location}")
159+
return {
160+
"stack_name": stack_name,
161+
"pattern_name": pattern_name,
162+
"success": False,
163+
}
164+
165+
# Verify the result file contains expected content
166+
try:
167+
import json
168+
169+
with open(result_file, "r") as f:
170+
result_json = json.load(f)
171+
172+
# Extract text content using the specified path
173+
text_content = result_json
174+
for key in content_path.split("."):
175+
if key.isdigit():
176+
text_content = text_content[int(key)]
177+
else:
178+
text_content = text_content[key]
179+
180+
# Verify expected string in content
181+
if verify_string not in text_content:
182+
print(
183+
f"[{pattern_name}] ❌ Text content does not contain expected string: '{verify_string}'"
184+
)
185+
print(
186+
f"[{pattern_name}] Actual text starts with: '{text_content[:100]}...'"
187+
)
188+
return {
189+
"stack_name": stack_name,
190+
"pattern_name": pattern_name,
191+
"success": False,
192+
}
193+
194+
print(
195+
f"[{pattern_name}] ✅ Found expected verification string: '{verify_string}'"
196+
)
197+
return {
198+
"stack_name": stack_name,
199+
"pattern_name": pattern_name,
200+
"success": True,
201+
}
202+
203+
except Exception as e:
204+
print(f"[{pattern_name}] ❌ Failed to validate result content: {e}")
205+
return {
206+
"stack_name": stack_name,
207+
"pattern_name": pattern_name,
208+
"success": False,
209+
}
210+
211+
except Exception as e:
212+
print(f"[{pattern_name}] ❌ Testing failed: {e}")
213+
return {
214+
"stack_name": stack_name,
215+
"pattern_name": pattern_name,
216+
"success": False,
217+
}
218+
219+
220+
def cleanup_stack(stack_name, pattern_name):
221+
"""Clean up a deployed stack"""
222+
print(f"[{pattern_name}] Cleaning up: {stack_name}")
223+
try:
224+
run_command(f"idp-cli delete --stack-name {stack_name} --force", check=False)
225+
print(f"[{pattern_name}] ✅ Cleanup completed")
226+
except Exception as e:
227+
print(f"[{pattern_name}] ⚠️ Cleanup failed: {e}")
228+
229+
230+
def main():
231+
"""Main execution function"""
232+
print("Starting CodeBuild deployment process...")
233+
234+
admin_email = get_env_var("IDP_ADMIN_EMAIL", "[email protected]")
235+
stack_prefix = generate_stack_prefix()
236+
237+
print(f"Stack Prefix: {stack_prefix}")
238+
print(f"Admin Email: {admin_email}")
239+
print(f"Patterns to deploy: {[p['name'] for p in DEPLOY_PATTERNS]}")
240+
241+
# Step 1: Publish templates to S3
242+
template_url = publish_templates()
243+
244+
deployed_stacks = []
245+
all_success = True
246+
247+
# Step 2: Deploy and test patterns concurrently
248+
print("🚀 Starting concurrent deployment of all patterns...")
249+
with ThreadPoolExecutor(max_workers=len(DEPLOY_PATTERNS)) as executor:
250+
# Submit all deployment tasks
251+
future_to_pattern = {
252+
executor.submit(
253+
deploy_and_test_pattern,
254+
stack_prefix,
255+
pattern_config,
256+
admin_email,
257+
template_url,
258+
): pattern_config
259+
for pattern_config in DEPLOY_PATTERNS
260+
}
261+
262+
# Collect results as they complete
263+
for future in as_completed(future_to_pattern):
264+
pattern_config = future_to_pattern[future]
265+
try:
266+
result = future.result()
267+
deployed_stacks.append(result)
268+
if not result["success"]:
269+
all_success = False
270+
print(f"[{pattern_config['name']}] ❌ Failed")
271+
else:
272+
print(f"[{pattern_config['name']}] ✅ Success")
273+
except Exception as e:
274+
print(f"[{pattern_config['name']}] ❌ Exception: {e}")
275+
all_success = False
276+
277+
# Step 3: Cleanup all stacks concurrently
278+
print("🧹 Starting concurrent cleanup of all stacks...")
279+
with ThreadPoolExecutor(max_workers=len(deployed_stacks)) as executor:
280+
cleanup_futures = [
281+
executor.submit(cleanup_stack, result["stack_name"], result["pattern_name"])
282+
for result in deployed_stacks
283+
]
284+
285+
# Wait for all cleanups to complete
286+
for future in as_completed(cleanup_futures):
287+
future.result() # Wait for completion
288+
289+
if all_success:
290+
print("🎉 All pattern deployments completed successfully!")
291+
sys.exit(0)
292+
else:
293+
print("💥 Some deployments failed!")
294+
sys.exit(1)
295+
296+
297+
if __name__ == "__main__":
298+
main()

0 commit comments

Comments
 (0)