Skip to content
This repository was archived by the owner on Jul 21, 2025. It is now read-only.

Commit 4863140

Browse files
hrodmngadomski
andauthored
feat: Add AWS CDK deployment (#44)
This adds an AWS CDK (Python) application that can be used to deploy a stac-fastapi-geoparquet application to a Lambda function with an HTTP API. Remaining tasks: - [x] Pick an AWS account to deploy this in - [x] Set up OIDC role for Github Actions (if not already done) - [x] Set the role arn in ci.yml Stretch goals - [ ] See if there is a better way to do IP-based throttling @alukach @zacdezgeo is there a particular AWS account you suggest using? Maybe `Development Seed - Main`? Resolves #9 --------- Co-authored-by: Pete Gadomski <[email protected]>
1 parent ba5b888 commit 4863140

File tree

14 files changed

+591
-3
lines changed

14 files changed

+591
-3
lines changed

.github/workflows/ci.yml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ on:
55
branches:
66
- main
77
pull_request:
8+
workflow_dispatch:
9+
release:
10+
types: [published]
811

912
jobs:
1013
lint-and-test:
@@ -23,3 +26,58 @@ jobs:
2326
run: scripts/test
2427
- name: Validate
2528
run: scripts/validate
29+
30+
deploy:
31+
name: Deploy
32+
needs: [lint-and-test]
33+
permissions:
34+
id-token: write
35+
contents: read
36+
runs-on: ubuntu-latest
37+
if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
38+
env:
39+
STACK_NAME: stac-fastapi-geoparquet-labs-375
40+
STACK_STAGE: dev
41+
STACK_OWNER: labs-375
42+
STACK_RELEASE: ${{ github.event.release.tag_name }}
43+
STACK_BUCKET_NAME: stac-fastapi-geoparquet-devseed
44+
STACK_GEOPARQUET_KEY: naip.parquet
45+
STACK_RATE_LIMIT: 10
46+
47+
defaults:
48+
run:
49+
working-directory: infrastructure/aws
50+
51+
steps:
52+
- uses: actions/checkout@v4
53+
54+
- name: Configure AWS credentials
55+
uses: aws-actions/configure-aws-credentials@v2
56+
with:
57+
role-to-assume: arn:aws:iam::390960605471:role/labs-375-stac-fastapi-geoparquet-github-actions
58+
role-session-name: stac-fastapi-geoparquet-deploy
59+
aws-region: us-east-1
60+
61+
- name: Set up node
62+
uses: actions/setup-node@v2
63+
with:
64+
node-version: 22
65+
66+
- name: Install uv
67+
uses: astral-sh/setup-uv@v3
68+
69+
- name: Install dependencies
70+
run: |
71+
uv sync --only-group deploy
72+
uv run --only-group deploy npm install
73+
74+
- name: CDK Synth
75+
run: uv run --only-group deploy npm run cdk -- synth
76+
77+
- name: CDK Deploy
78+
run: |
79+
uv run --only-group deploy npm run cdk -- deploy --require-approval never
80+
aws s3 cp ../../data/naip.parquet s3://${STACK_BUCKET_NAME}/${STACK_GEOPARQUET_KEY}
81+
82+
API_URL=$(aws cloudformation describe-stacks --stack-name ${STACK_NAME}-${STACK_STAGE} --query 'Stacks[0].Outputs[?OutputKey==`ApiURL`].OutputValue' --output text)
83+
echo "::notice title=API URL::${API_URL}"

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,10 @@ cython_debug/
172172
# PyPI configuration file
173173
.pypirc
174174
.vscode/
175+
176+
# AWS deployment
177+
infrastructure/aws/node_modules/
178+
infrastructure/aws/cdk.out/
179+
infrastructure/aws/.env
180+
infrastructure/aws/cdk-outputs.json
181+

infrastructure/aws/.env.local

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
STACK_NAME=stac-fastapi-geoparquet-test
2+
STACK_STAGE=test
3+
STACK_OWNER=hrodmn
4+
STACK_RELEASE=dev
5+
STACK_BUCKET_NAME=stac-fastapi-geoparquet-test
6+
STACK_GEOPARQUET_KEY=naip.parquet
7+
STACK_RATE_LIMIT=50

infrastructure/aws/__init__.py

Whitespace-only changes.

infrastructure/aws/app.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
"""AWS CDK application for the stac-fastapi-geoparquet Stack
2+
3+
Generates a Lambda function with an API Gateway trigger and an S3 bucket.
4+
5+
After deploying the stack you will need to make sure the geoparquet file
6+
specified in the config gets uploaded to the bucket associated with this stack!
7+
"""
8+
9+
import os
10+
from typing import Any
11+
12+
from aws_cdk import (
13+
App,
14+
CfnOutput,
15+
Duration,
16+
RemovalPolicy,
17+
Stack,
18+
Tags,
19+
)
20+
from aws_cdk.aws_apigatewayv2 import HttpApi, HttpStage, ThrottleSettings
21+
from aws_cdk.aws_apigatewayv2_integrations import HttpLambdaIntegration
22+
from aws_cdk.aws_iam import AnyPrincipal, Effect, PolicyStatement
23+
from aws_cdk.aws_lambda import Code, Function, Runtime
24+
from aws_cdk.aws_logs import RetentionDays
25+
from aws_cdk.aws_s3 import BlockPublicAccess, Bucket
26+
from aws_cdk.custom_resources import (
27+
AwsCustomResource,
28+
AwsCustomResourcePolicy,
29+
AwsSdkCall,
30+
PhysicalResourceId,
31+
)
32+
from config import Config
33+
from constructs import Construct
34+
35+
36+
class StacFastApiGeoparquetStack(Stack):
37+
def __init__(
38+
self,
39+
scope: Construct,
40+
construct_id: str,
41+
config: Config,
42+
runtime: Runtime = Runtime.PYTHON_3_12,
43+
**kwargs: Any,
44+
) -> None:
45+
super().__init__(scope, construct_id, **kwargs)
46+
47+
for key, value in config.tags.items():
48+
Tags.of(self).add(key, value)
49+
50+
bucket = Bucket(
51+
scope=self,
52+
id="bucket",
53+
bucket_name=config.bucket_name,
54+
versioned=True,
55+
removal_policy=RemovalPolicy.RETAIN
56+
if config.stage != "test"
57+
else RemovalPolicy.DESTROY,
58+
public_read_access=True,
59+
block_public_access=BlockPublicAccess(
60+
block_public_acls=False,
61+
block_public_policy=False,
62+
ignore_public_acls=False,
63+
restrict_public_buckets=False,
64+
),
65+
)
66+
67+
# make the bucket public, requester-pays
68+
bucket.add_to_resource_policy(
69+
PolicyStatement(
70+
actions=["s3:GetObject"],
71+
resources=[bucket.arn_for_objects("*")],
72+
principals=[AnyPrincipal()],
73+
effect=Effect.ALLOW,
74+
)
75+
)
76+
77+
add_request_pay = AwsSdkCall(
78+
action="putBucketRequestPayment",
79+
service="S3",
80+
region=self.region,
81+
parameters={
82+
"Bucket": bucket.bucket_name,
83+
"RequestPaymentConfiguration": {"Payer": "Requester"},
84+
},
85+
physical_resource_id=PhysicalResourceId.of(bucket.bucket_name),
86+
)
87+
88+
aws_custom_resource = AwsCustomResource(
89+
self,
90+
"RequesterPaysCustomResource",
91+
policy=AwsCustomResourcePolicy.from_sdk_calls(
92+
resources=[bucket.bucket_arn]
93+
),
94+
on_create=add_request_pay,
95+
on_update=add_request_pay,
96+
)
97+
98+
aws_custom_resource.node.add_dependency(bucket)
99+
100+
CfnOutput(self, "BucketName", value=bucket.bucket_name)
101+
102+
api_lambda = Function(
103+
scope=self,
104+
id="lambda",
105+
runtime=runtime,
106+
handler="handler.handler",
107+
memory_size=config.memory,
108+
log_retention=RetentionDays.ONE_WEEK,
109+
timeout=Duration.seconds(config.timeout),
110+
code=Code.from_docker_build(
111+
path=os.path.abspath("../.."),
112+
file="infrastructure/aws/lambda/Dockerfile",
113+
build_args={
114+
"PYTHON_VERSION": runtime.to_string().replace("python", ""),
115+
},
116+
),
117+
environment={
118+
"STAC_FASTAPI_GEOPARQUET_HREF": f"s3://{bucket.bucket_name}/{config.geoparquet_key}",
119+
"HOME": "/tmp", # for duckdb's home_directory
120+
},
121+
)
122+
123+
bucket.grant_read(api_lambda)
124+
125+
api = HttpApi(
126+
scope=self,
127+
id="api",
128+
default_integration=HttpLambdaIntegration(
129+
"api-integration",
130+
handler=api_lambda,
131+
),
132+
default_domain_mapping=None, # TODO: enable custom domain name
133+
create_default_stage=False, # Important: disable default stage creation
134+
)
135+
136+
stage = HttpStage(
137+
self,
138+
"api-stage",
139+
http_api=api,
140+
auto_deploy=True,
141+
stage_name="$default",
142+
throttle=ThrottleSettings(
143+
rate_limit=config.rate_limit,
144+
burst_limit=config.rate_limit * 2,
145+
)
146+
if config.rate_limit
147+
else None,
148+
)
149+
150+
assert stage.url
151+
CfnOutput(self, "ApiURL", value=stage.url)
152+
153+
154+
app = App()
155+
config = Config()
156+
StacFastApiGeoparquetStack(
157+
app,
158+
config.stack_name,
159+
config=config,
160+
)
161+
app.synth()

infrastructure/aws/cdk.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"app": "python3 app.py"
3+
}

infrastructure/aws/config.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""STACK Configs."""
2+
3+
from typing import Annotated, Optional
4+
5+
from pydantic import field_validator
6+
from pydantic_settings import BaseSettings
7+
8+
9+
class Config(BaseSettings):
10+
"""Application settings"""
11+
12+
name: str = "stac-fastapi-geoparquet"
13+
stage: str = "dev"
14+
owner: str = "labs-375" # Add owner field for tracking
15+
project: str = "stac-fastapi-geoparquet" # Add project field for tracking
16+
release: str = "dev"
17+
18+
bucket_name: str = "stac-fastapi-geoparquet"
19+
geoparquet_key: Annotated[
20+
Optional[str], "storage key for the geoparquet file within the S3 bucket"
21+
] = None
22+
23+
timeout: int = 30
24+
memory: int = 3009
25+
26+
# The maximum of concurrent executions you want to reserve for the function.
27+
# Default: - No specific limit - account limit.
28+
max_concurrent: Optional[int] = None
29+
30+
# rate limiting settings
31+
rate_limit: Annotated[
32+
Optional[int],
33+
"maximum average requests per second over an extended period of time",
34+
] = 10
35+
36+
@field_validator("geoparquet_key")
37+
def validate_geoparquet_key(cls, v: str | None) -> str:
38+
if v is None:
39+
raise ValueError("geoparquet_key must be provided")
40+
return v
41+
42+
@property
43+
def stack_name(self) -> str:
44+
"""Generate consistent resource prefix."""
45+
return f"{self.name}-{self.stage}"
46+
47+
@property
48+
def tags(self) -> dict[str, str]:
49+
"""Generate consistent tags for resources."""
50+
return {
51+
"Project": self.project,
52+
"Owner": self.owner,
53+
"Stage": self.stage,
54+
"Name": self.name,
55+
"Release": self.release,
56+
}
57+
58+
class Config:
59+
"""model config"""
60+
61+
env_file = ".env"
62+
env_prefix = "STACK_"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
ARG PYTHON_VERSION=3.12
2+
3+
FROM public.ecr.aws/lambda/python:${PYTHON_VERSION}
4+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
5+
6+
# Install required utilities
7+
RUN dnf install -y findutils binutils && \
8+
dnf clean all && \
9+
rm -rf /var/cache/dnf
10+
11+
WORKDIR /tmp
12+
COPY pyproject.toml pyproject.toml
13+
COPY README.md README.md
14+
COPY src/stac_fastapi/ src/stac_fastapi/
15+
16+
RUN uv pip install --compile-bytecode .[lambda] --target /asset
17+
18+
# Reduce package size and remove useless files
19+
WORKDIR /asset
20+
RUN find . -type f -name '*.pyc' | while read f; do n=$(echo $f | sed 's/__pycache__\///' | sed 's/.cpython-[0-9]*//'); cp $f $n; done;
21+
RUN find . -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf
22+
RUN find . -type f -a -name '*.py' -print0 | xargs -0 rm -f
23+
RUN find . -type d -a -name 'tests' -print0 | xargs -0 rm -rf
24+
25+
# Strip debug symbols from compiled C/C++ code
26+
RUN find . -type f -name '*.so*' -exec strip --strip-unneeded {} \;
27+
28+
COPY infrastructure/aws/lambda/handler.py /asset/handler.py
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
"""AWS Lambda handler."""
2+
3+
import logging
4+
5+
from mangum import Mangum
6+
7+
from stac_fastapi.geoparquet.main import app
8+
9+
logging.getLogger("mangum.lifespan").setLevel(logging.ERROR)
10+
logging.getLogger("mangum.http").setLevel(logging.ERROR)
11+
12+
handler = Mangum(app, lifespan="on")

0 commit comments

Comments
 (0)