Skip to content
This repository was archived by the owner on Jul 21, 2025. It is now read-only.

Commit 38715be

Browse files
authored
deps: update (#77)
Some infra updates, heavily inspired by @alukach's changes to ASO's stack: - Move CDK to top-level - Split out stacks - Add more constructs Additionally, I've re-done how extensions work for DuckDB to _hopefully_ not fetch anything on lambda boot 🤞🏼. I mixed the two streams of work b/c I wasn't able to deploy a dev stack b/c of some bucket conflicts, which reminded me of @alukach's infra changes to ASO which I wanted to copy to understand better. Review not required, just requested in case you're interested or if you have feedback on the approach. 🙇🏼 > [!NOTE] > The duckdb extensions got big enough that I had to switch to a docker image for the lambda.
1 parent 7c6b9aa commit 38715be

File tree

25 files changed

+1042
-623
lines changed

25 files changed

+1042
-623
lines changed

.env.local

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
STAC_FASTAPI_GEOPARQUET_HREF = "data/naip.parquet"
1+
STACK_STAGE=test
2+
STACK_OWNER=labs-375
3+
STACK_GEOPARQUET_KEY=naip.parquet

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010
types: [published]
1111

1212
env:
13-
duckdb-version: "1.2.0"
13+
duckdb-version: "1.2.2"
1414

1515
jobs:
1616
lint:
@@ -50,7 +50,7 @@ jobs:
5050
STACK_STAGE: ${{ vars.STAGE }}
5151
STACK_OWNER: labs-375
5252
STACK_RELEASE: ${{ github.event.release.tag_name }}
53-
STACK_BUCKET_NAME: stac-fastapi-geoparquet-devseed
53+
STACK_BUCKET_NAME: stac-fastapi-geoparquet-lab-375
5454
STACK_GEOPARQUET_KEY: naip.parquet
5555
STACK_RATE_LIMIT: 10
5656
environment:

.gitignore

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,7 @@ cython_debug/
174174
.vscode/
175175

176176
# AWS deployment
177-
infrastructure/aws/node_modules/
178-
infrastructure/aws/cdk.out/
179-
infrastructure/aws/.env
180-
infrastructure/aws/cdk-outputs.json
181-
infrastructure/aws/cdk.context.json
177+
node_modules/
178+
cdk.out/
179+
cdk-outputs.json
180+
cdk.context.json

README.md

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,6 @@ If you need to manually release, use the [Github releases interface](https://git
6363
We use [CalVer](https://calver.org/) with the following scheme: `vYYYY.MM.DD.n`, where `n` is the release count for the day.
6464
If you're releasing more than ten times in a day, stop, take a breath, and come back tomorrow.
6565

66-
## Core assumptions
67-
68-
- We want to be public-by-default (with appropriate throttling) with all of our services.
69-
We want to show this off to the world, not keep it secret.
70-
- We'd like to use [stac-rs](https://github.com/stac-utils/stac-rs) and its Python friend, [stacrs](https://github.com/gadomski/stacrs), as much as possible.
71-
This is partially a sop to @gadomski, but we think that the performance and reusability benefits of Rust will be part of what makes this project special.
72-
7366
## Project management
7467

7568
We love 🌳.

cdk.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"app": "python3 infrastructure/aws/app.py"
3+
}

docs/katas/0_full_scan.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@
155155
},
156156
{
157157
"cell_type": "code",
158-
"execution_count": 8,
158+
"execution_count": null,
159159
"metadata": {},
160160
"outputs": [
161161
{
@@ -167,7 +167,7 @@
167167
}
168168
],
169169
"source": [
170-
"from stacrs import DuckdbClient\n",
170+
"from rustac import DuckdbClient\n",
171171
"\n",
172172
"from labs_375 import NAIP_GEOPARQUET_URI\n",
173173
"\n",

infrastructure/aws/.env.local

Lines changed: 0 additions & 7 deletions
This file was deleted.

infrastructure/aws/app.py

Lines changed: 14 additions & 262 deletions
Original file line numberDiff line numberDiff line change
@@ -1,271 +1,23 @@
1-
"""AWS CDK application for the stac-fastapi-geoparquet Stack
1+
"""AWS CDK application for the stac-fastapi-geoparquet stack, with a pgstac
2+
database to compare."""
23

3-
Generates a Lambda function with an API Gateway trigger and an S3 bucket.
4-
5-
After deploying the stack you will need to make sure the geoparquet file
6-
specified in the config gets uploaded to the bucket associated with this stack!
7-
8-
Also includes a pgstac for side-by-side testing.
9-
"""
10-
11-
import os
12-
from typing import Any
13-
14-
from aws_cdk import (
15-
App,
16-
CfnOutput,
17-
Duration,
18-
RemovalPolicy,
19-
Stack,
20-
Tags,
21-
)
22-
from aws_cdk.aws_apigatewayv2 import HttpApi, HttpStage, ThrottleSettings
23-
from aws_cdk.aws_apigatewayv2_integrations import HttpLambdaIntegration
24-
from aws_cdk.aws_ec2 import (
25-
GatewayVpcEndpointAwsService,
26-
InstanceType,
27-
InterfaceVpcEndpointAwsService,
28-
Peer,
29-
Port,
30-
SubnetConfiguration,
31-
SubnetSelection,
32-
SubnetType,
33-
Vpc,
34-
)
35-
from aws_cdk.aws_iam import AnyPrincipal, Effect, PolicyStatement
36-
from aws_cdk.aws_lambda import Code, Function, Runtime
37-
from aws_cdk.aws_logs import RetentionDays
38-
from aws_cdk.aws_rds import DatabaseInstanceEngine, PostgresEngineVersion
39-
from aws_cdk.aws_s3 import BlockPublicAccess, Bucket
40-
from aws_cdk.custom_resources import (
41-
AwsCustomResource,
42-
AwsCustomResourcePolicy,
43-
AwsSdkCall,
44-
PhysicalResourceId,
45-
)
4+
from aws_cdk import App
465
from config import Config
47-
from constructs import Construct
48-
from eoapi_cdk import PgStacApiLambda, PgStacDatabase
49-
50-
51-
class VpcStack(Stack):
52-
def __init__(
53-
self, scope: Construct, config: Config, id: str, **kwargs: Any
54-
) -> None:
55-
super().__init__(scope, id=id, tags=config.tags, **kwargs)
56-
57-
self.vpc = Vpc(
58-
self,
59-
"vpc",
60-
subnet_configuration=[
61-
SubnetConfiguration(
62-
name="ingress", subnet_type=SubnetType.PUBLIC, cidr_mask=24
63-
),
64-
SubnetConfiguration(
65-
name="application",
66-
subnet_type=SubnetType.PRIVATE_WITH_EGRESS,
67-
cidr_mask=24,
68-
),
69-
SubnetConfiguration(
70-
name="rds",
71-
subnet_type=SubnetType.PRIVATE_ISOLATED,
72-
cidr_mask=24,
73-
),
74-
],
75-
nat_gateways=config.nat_gateway_count,
76-
)
77-
self.vpc.add_interface_endpoint(
78-
"SecretsManagerEndpoint",
79-
service=InterfaceVpcEndpointAwsService.SECRETS_MANAGER,
80-
)
81-
self.vpc.add_interface_endpoint(
82-
"CloudWatchEndpoint",
83-
service=InterfaceVpcEndpointAwsService.CLOUDWATCH_LOGS,
84-
)
85-
self.vpc.add_gateway_endpoint("S3", service=GatewayVpcEndpointAwsService.S3)
86-
self.export_value(
87-
self.vpc.select_subnets(subnet_type=SubnetType.PUBLIC).subnets[0].subnet_id
88-
)
89-
self.export_value(
90-
self.vpc.select_subnets(subnet_type=SubnetType.PUBLIC).subnets[1].subnet_id
91-
)
92-
93-
94-
class StacFastApiGeoparquetStack(Stack):
95-
def __init__(
96-
self,
97-
scope: Construct,
98-
construct_id: str,
99-
config: Config,
100-
runtime: Runtime = Runtime.PYTHON_3_12,
101-
**kwargs: Any,
102-
) -> None:
103-
super().__init__(scope, construct_id, **kwargs)
104-
105-
for key, value in config.tags.items():
106-
Tags.of(self).add(key, value)
107-
108-
bucket = Bucket(
109-
scope=self,
110-
id="bucket",
111-
bucket_name=config.bucket_name,
112-
versioned=True,
113-
removal_policy=RemovalPolicy.RETAIN
114-
if config.stage != "test"
115-
else RemovalPolicy.DESTROY,
116-
public_read_access=True,
117-
block_public_access=BlockPublicAccess(
118-
block_public_acls=False,
119-
block_public_policy=False,
120-
ignore_public_acls=False,
121-
restrict_public_buckets=False,
122-
),
123-
)
124-
125-
# make the bucket public, requester-pays
126-
bucket.add_to_resource_policy(
127-
PolicyStatement(
128-
actions=["s3:GetObject"],
129-
resources=[bucket.arn_for_objects("*")],
130-
principals=[AnyPrincipal()],
131-
effect=Effect.ALLOW,
132-
)
133-
)
134-
135-
add_request_pay = AwsSdkCall(
136-
action="putBucketRequestPayment",
137-
service="S3",
138-
region=self.region,
139-
parameters={
140-
"Bucket": bucket.bucket_name,
141-
"RequestPaymentConfiguration": {"Payer": "Requester"},
142-
},
143-
physical_resource_id=PhysicalResourceId.of(bucket.bucket_name),
144-
)
145-
146-
aws_custom_resource = AwsCustomResource(
147-
self,
148-
"RequesterPaysCustomResource",
149-
policy=AwsCustomResourcePolicy.from_sdk_calls(
150-
resources=[bucket.bucket_arn]
151-
),
152-
on_create=add_request_pay,
153-
on_update=add_request_pay,
154-
)
155-
156-
aws_custom_resource.node.add_dependency(bucket)
157-
158-
CfnOutput(self, "BucketName", value=bucket.bucket_name)
159-
160-
api_lambda = Function(
161-
scope=self,
162-
id="lambda",
163-
runtime=runtime,
164-
handler="handler.handler",
165-
memory_size=config.memory,
166-
log_retention=RetentionDays.ONE_WEEK,
167-
timeout=Duration.seconds(config.timeout),
168-
code=Code.from_docker_build(
169-
path=os.path.abspath("../.."),
170-
file="infrastructure/aws/lambda/Dockerfile",
171-
build_args={
172-
"PYTHON_VERSION": runtime.to_string().replace("python", ""),
173-
},
174-
),
175-
environment={
176-
"STAC_FASTAPI_GEOPARQUET_HREF": f"s3://{bucket.bucket_name}/{config.geoparquet_key}",
177-
# find pre-fetched extensions
178-
"STAC_FASTAPI_DUCKDB_EXTENSION_DIRECTORY": "/tmp/duckdb-extensions",
179-
"HOME": "/tmp", # for duckdb's home_directory
180-
},
181-
)
182-
183-
bucket.grant_read(api_lambda)
184-
185-
api = HttpApi(
186-
scope=self,
187-
id="api",
188-
default_integration=HttpLambdaIntegration(
189-
"api-integration",
190-
handler=api_lambda,
191-
),
192-
default_domain_mapping=None, # TODO: enable custom domain name
193-
create_default_stage=False, # Important: disable default stage creation
194-
)
195-
196-
stage = HttpStage(
197-
self,
198-
"api-stage",
199-
http_api=api,
200-
auto_deploy=True,
201-
stage_name="$default",
202-
throttle=ThrottleSettings(
203-
rate_limit=config.rate_limit,
204-
burst_limit=config.rate_limit * 2,
205-
)
206-
if config.rate_limit
207-
else None,
208-
)
209-
210-
assert stage.url
211-
CfnOutput(self, "ApiURL", value=stage.url)
212-
213-
214-
class StacFastApiPgstacStack(Stack):
215-
def __init__(
216-
self,
217-
scope: Construct,
218-
vpc: Vpc,
219-
id: str,
220-
config: Config,
221-
**kwargs: Any,
222-
) -> None:
223-
super().__init__(
224-
scope,
225-
id=id,
226-
tags=config.tags,
227-
**kwargs,
228-
)
229-
pgstac_db = PgStacDatabase(
230-
self,
231-
"pgstac-db",
232-
vpc=vpc,
233-
engine=DatabaseInstanceEngine.postgres(
234-
version=PostgresEngineVersion.VER_16
235-
),
236-
vpc_subnets=SubnetSelection(subnet_type=(SubnetType.PUBLIC)),
237-
allocated_storage=config.pgstac_db_allocated_storage,
238-
instance_type=InstanceType(config.pgstac_db_instance_type),
239-
removal_policy=RemovalPolicy.DESTROY,
240-
)
241-
# allow connections from any ipv4 to pgbouncer instance security group
242-
assert pgstac_db.security_group
243-
pgstac_db.security_group.add_ingress_rule(Peer.any_ipv4(), Port.tcp(5432))
244-
pgstac_api = PgStacApiLambda(
245-
self,
246-
"stac-api",
247-
api_env={
248-
"NAME": "stac-fastapi-pgstac",
249-
"description": f"{config.stage} STAC API",
250-
},
251-
db=pgstac_db.connection_target,
252-
db_secret=pgstac_db.pgstac_secret,
253-
stac_api_domain_name=None,
254-
)
255-
256-
assert pgstac_api.url
257-
CfnOutput(self, "ApiURL", value=pgstac_api.url)
258-
6+
from stacks.app import AppStack
7+
from stacks.infra import InfraStack
2598

2609
app = App()
26110
config = Config()
262-
vpc_stack = VpcStack(scope=app, config=config, id=f"vpc-{config.name}")
263-
StacFastApiPgstacStack(
264-
scope=app, vpc=vpc_stack.vpc, config=config, id=f"{config.name}-pgstac"
11+
infra_stack = InfraStack(
12+
scope=app,
13+
id=config.stack_name("infra"),
14+
config=config,
26515
)
266-
StacFastApiGeoparquetStack(
267-
app,
268-
config.stack_name,
16+
AppStack(
17+
scope=app,
18+
id=config.stack_name("app"),
19+
pgstac_db=infra_stack.pgstac_db,
20+
bucket=infra_stack.bucket,
26921
config=config,
27022
)
27123
app.synth()

infrastructure/aws/cdk.json

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)