diff --git a/.dockerignore b/.dockerignore index cc90e7d..9c10098 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,6 @@ -# Ignore cdk folder -cdk.out -.history -.tox .git +infrastructure/aws/cdk.out +.venv +.mypy_cache +.pytest_cache +.ruff_cache diff --git a/CHANGELOG.md b/CHANGELOG.md index aa310b6..74c01d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## unreleased + +* Convert Lambda to a containerized function + ## 0.3.1 * Upgrade to `titiler>=0.21,<0.22` diff --git a/infrastructure/aws/cdk/app.py b/infrastructure/aws/cdk/app.py index eec6fb3..38af43d 100644 --- a/infrastructure/aws/cdk/app.py +++ b/infrastructure/aws/cdk/app.py @@ -14,6 +14,7 @@ from aws_cdk import aws_sns as sns from aws_cdk import aws_sns_subscriptions as subscriptions from aws_cdk.aws_apigatewayv2_integrations import HttpLambdaIntegration +from aws_cdk.aws_ecr_assets import Platform from config import AppSettings, StackSettings from constructs import Construct @@ -121,19 +122,14 @@ def __init__( role_arn=app_settings.reader_role_arn, ) - lambda_function = aws_lambda.Function( + lambda_function = aws_lambda.DockerImageFunction( self, f"{id}-lambda", - runtime=runtime, - code=aws_lambda.Code.from_docker_build( - path=os.path.abspath(context_dir), + code=aws_lambda.DockerImageCode.from_image_asset( + directory=os.path.abspath(context_dir), file="infrastructure/aws/lambda/Dockerfile", - platform="linux/amd64", - build_args={ - "PYTHON_VERSION": runtime.to_string().replace("python", ""), - }, + platform=Platform.LINUX_AMD64, ), - handler="handler.handler", memory_size=memory, reserved_concurrent_executions=concurrent, timeout=Duration.seconds(timeout), diff --git a/infrastructure/aws/lambda/Dockerfile b/infrastructure/aws/lambda/Dockerfile index 86c99ce..e53f88e 100644 --- a/infrastructure/aws/lambda/Dockerfile +++ b/infrastructure/aws/lambda/Dockerfile @@ -1,37 +1,79 @@ ARG PYTHON_VERSION=3.12 -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:${PYTHON_VERSION} +# Build stage - includes all build tools and dependencies +FROM public.ecr.aws/lambda/python:${PYTHON_VERSION} AS builder + +# Copy uv for faster dependency management COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -WORKDIR /tmp +# Install system dependencies needed for compilation +RUN dnf install -y gcc-c++ && dnf clean all -# Install system dependencies to compile (numexpr) -RUN dnf install -y gcc-c++ +# Set working directory for build +WORKDIR /build -COPY uv.lock .python-version pyproject.toml LICENSE README.md ./ +# Copy dependency files first for better caching +COPY README.md uv.lock .python-version pyproject.toml ./ COPY src/titiler/ ./src/titiler/ +# Install dependencies to temporary directory with Lambda-specific optimizations RUN uv export --locked --no-editable --no-dev --extra lambda --format requirements.txt -o requirements.txt && \ - uv pip install --compile-bytecode --no-binary pydantic --target /asset -r requirements.txt + uv pip install \ + --compile-bytecode \ + --no-binary pydantic \ + --target /deps \ + --no-cache-dir \ + --disable-pip-version-check \ + -r requirements.txt + +# Aggressive cleanup to minimize size and optimize for Lambda container +WORKDIR /deps +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN </dev/null || true +# Create a manifest file for debugging +du -sh . > /tmp/package_size.txt +EOF + +# Final runtime stage - minimal Lambda image optimized for container runtime +FROM public.ecr.aws/lambda/python:${PYTHON_VERSION} -# copy libexpat.so.1 into /asset which is included in LD_LIBRARY_PATH -RUN cp /usr/lib64/libexpat.so.1 /asset/ +# Set Lambda-specific environment variables for optimal performance +ENV PYTHONPATH=${LAMBDA_RUNTIME_DIR} \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + AWS_LWA_ENABLE_COMPRESSION=true -# Reduce package size and remove useless files -RUN cd /asset && find . -type f -name '*.pyc' | while read f; do n=$(echo $f | sed 's/__pycache__\///' | sed 's/.cpython-[0-9]*//'); cp $f $n; done; -RUN cd /asset && find . -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf -RUN cd /asset && find . -type f -a -name '*.py' -print0 | xargs -0 rm -f -RUN find /asset -type d -a -name 'tests' -print0 | xargs -0 rm -rf -RUN rm -rdf /asset/numpy/doc/ /asset/bin /asset/geos_license /asset/Misc -RUN rm -rdf /asset/boto3* -RUN rm -rdf /asset/botocore* +# Copy only the cleaned dependencies from builder stage +# Copy required system library +COPY --from=builder /deps /usr/lib64/libexpat.so.1 ${LAMBDA_RUNTIME_DIR}/ -# Strip debug symbols from compiled C/C++ code (except for numpy.libs!) -RUN cd /asset && \ - find . -type f -name '*.so*' \ - -not -path "./numpy.libs/*" \ - -exec strip --strip-unneeded {} \; +# Copy application handler +COPY infrastructure/aws/lambda/handler.py ${LAMBDA_RUNTIME_DIR}/ -COPY infrastructure/aws/lambda/handler.py /asset/handler.py +# Ensure handler is executable and optimize permissions +RUN < Dict[str, Any]: + """Lambda handler with container-specific optimizations.""" + response = handler(event, context) + + return response + + +handler.lambda_handler = lambda_handler diff --git a/infrastructure/aws/package-lock.json b/infrastructure/aws/package-lock.json index ad52264..89a2f4b 100644 --- a/infrastructure/aws/package-lock.json +++ b/infrastructure/aws/package-lock.json @@ -9,35 +9,37 @@ "version": "0.1.0", "license": "MIT", "dependencies": { - "cdk": "2.76.0-alpha.0" + "cdk": "^2.177.0" } }, "node_modules/aws-cdk": { - "version": "2.76.0", - "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.76.0.tgz", - "integrity": "sha512-y6VHtqUpYenn6mGIBFbcGGXIoXfKA3o0eGL/eeD/gUJ9TcPrgMLQM1NxSMb5JVsOk5BPPXzGmvB0gBu40utGqg==", + "version": "2.1029.3", + "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.1029.3.tgz", + "integrity": "sha512-otRJP5a4r07S+SLKs/WvJH+0auZHkaRMnv1vtD4fpp1figV8Vr9MKdB4QPNjfKdLGyK9f95OEHwVlIW9xpjPBg==", + "license": "Apache-2.0", "bin": { "cdk": "bin/cdk" }, "engines": { - "node": ">= 14.15.0" + "node": ">= 18.0.0" }, "optionalDependencies": { "fsevents": "2.3.2" } }, "node_modules/cdk": { - "version": "2.76.0-alpha.0", - "resolved": "https://registry.npmjs.org/cdk/-/cdk-2.76.0-alpha.0.tgz", - "integrity": "sha512-HNfX5c7MU18LxthZRcapqEhG0IFgQeNOhtsTR1QiL/7dhy2TjvK26dYcJ67KIHfzMfm5EUjvOXdP1SPdW+eOOA==", + "version": "2.1029.3", + "resolved": "https://registry.npmjs.org/cdk/-/cdk-2.1029.3.tgz", + "integrity": "sha512-VZMLvllt1H4e1BIeitw6ZCSoAg7TEGQisN/0WsOAjsUoEk0Bwkpl0yPBu4JzY9MX/x1AdYPqGYkSO04LvBcLnQ==", + "license": "Apache-2.0", "dependencies": { - "aws-cdk": "2.76.0" + "aws-cdk": "2.1029.3" }, "bin": { "cdk": "bin/cdk" }, "engines": { - "node": ">= 8.10.0" + "node": ">= 18.0.0" } }, "node_modules/fsevents": { @@ -45,6 +47,7 @@ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", "hasInstallScript": true, + "license": "MIT", "optional": true, "os": [ "darwin" @@ -56,19 +59,19 @@ }, "dependencies": { "aws-cdk": { - "version": "2.76.0", - "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.76.0.tgz", - "integrity": "sha512-y6VHtqUpYenn6mGIBFbcGGXIoXfKA3o0eGL/eeD/gUJ9TcPrgMLQM1NxSMb5JVsOk5BPPXzGmvB0gBu40utGqg==", + "version": "2.1029.3", + "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.1029.3.tgz", + "integrity": "sha512-otRJP5a4r07S+SLKs/WvJH+0auZHkaRMnv1vtD4fpp1figV8Vr9MKdB4QPNjfKdLGyK9f95OEHwVlIW9xpjPBg==", "requires": { "fsevents": "2.3.2" } }, "cdk": { - "version": "2.76.0-alpha.0", - "resolved": "https://registry.npmjs.org/cdk/-/cdk-2.76.0-alpha.0.tgz", - "integrity": "sha512-HNfX5c7MU18LxthZRcapqEhG0IFgQeNOhtsTR1QiL/7dhy2TjvK26dYcJ67KIHfzMfm5EUjvOXdP1SPdW+eOOA==", + "version": "2.1029.3", + "resolved": "https://registry.npmjs.org/cdk/-/cdk-2.1029.3.tgz", + "integrity": "sha512-VZMLvllt1H4e1BIeitw6ZCSoAg7TEGQisN/0WsOAjsUoEk0Bwkpl0yPBu4JzY9MX/x1AdYPqGYkSO04LvBcLnQ==", "requires": { - "aws-cdk": "2.76.0" + "aws-cdk": "2.1029.3" } }, "fsevents": { diff --git a/scripts/benchmark.py b/scripts/benchmark.py new file mode 100644 index 0000000..9dcf056 --- /dev/null +++ b/scripts/benchmark.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +""" +Benchmark script for titiler-multidim Lambda performance testing. + +This script tests Lambda performance by: +1. Warming up with a tilejson request to get dataset bounds +2. Generating tile coordinates that intersect the dataset bounds using morecantile +3. Measuring tile loading performance at the specified zoom level +4. Providing comprehensive statistics + +Usage Examples: + # Basic usage with default parameters (zoom 4, hardcoded dataset) + uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com + + # Specify custom zoom level + uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com --zoom 6 + + # Use dataset parameters from JSON file + uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com --dataset-json dataset.json --zoom 5 + + # Use dataset parameters from STDIN + echo '{"url": "s3://bucket/data.zarr", "variable": "temp"}' | uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com --dataset-stdin + + # Export results to CSV + uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com --zoom 4 --export-csv + + # Combine multiple options + uv run benchmark.py --api-url https://your-lambda-url.amazonaws.com --dataset-json my-dataset.json --zoom 7 --max-concurrent 30 --export-csv + +Dataset JSON format: + { + "url": "s3://bucket/path/to/dataset.zarr", + "variable": "temperature", + "sel": "time=2023-01-01T00:00:00.000000000", + "rescale": "250,350", + "colormap_name": "viridis" + } +""" + +import argparse +import asyncio +import csv +import json +import os +import statistics +import sys +import time +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +import httpx +import morecantile + +# Test parameters +DATASET_PARAMS = { + "url": "s3://mur-sst/zarr-v1", + "variable": "analysed_sst", + "sel": "time=2018-03-02T09:00:00.000000000", + "rescale": "250,350", + "colormap_name": "viridis", +} + +# Default zoom level (can be overridden by command line argument) +DEFAULT_ZOOM_LEVEL = 4 + + +def load_dataset_params( + json_file: Optional[str] = None, use_stdin: bool = False +) -> Dict: + """Load dataset parameters from JSON file, STDIN, or use defaults.""" + if use_stdin: + try: + return json.load(sys.stdin) + except json.JSONDecodeError as e: + print(f"Error parsing JSON from STDIN: {e}") + sys.exit(1) + elif json_file: + try: + with open(json_file, "r") as f: + return json.load(f) + except FileNotFoundError: + print(f"Error: Dataset JSON file '{json_file}' not found") + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error parsing JSON from file '{json_file}': {e}") + sys.exit(1) + else: + return DATASET_PARAMS + + +def get_tiles_for_bounds(bounds: List[float], zoom: int) -> List[Tuple[int, int]]: + """Generate tile coordinates for the given bounds and zoom level using morecantile.""" + west, south, east, north = bounds + + tms = morecantile.tms.get("WebMercatorQuad") + + # Generate tiles that intersect with the bounds + tiles = list(tms.tiles(west, south, east, north, [zoom])) + + # Return as (x, y) coordinate tuples + return [(tile.x, tile.y) for tile in tiles] + + +@dataclass +class BenchmarkResult: + """Container for benchmark results.""" + + zoom_level: int = 0 + warmup_time: float = 0.0 + warmup_success: bool = False + tile_times: List[float] = field(default_factory=list) + tile_failures: List[Tuple[int, int]] = field(default_factory=list) + tile_coords: List[Tuple[int, int]] = field(default_factory=list) + total_runtime: float = 0.0 + start_time: float = 0.0 + + +async def fetch_tilejson( + client: httpx.AsyncClient, api_url: str, dataset_params: Dict +) -> Tuple[float, bool, Optional[Dict]]: + """Fetch tilejson to warm up the Lambda and get bounds information.""" + url = f"{api_url}/WebMercatorQuad/tilejson.json" + + start_time = time.time() + try: + response = await client.get(url, params=dataset_params, timeout=60.0) + elapsed = time.time() - start_time + + if response.status_code == 200: + return elapsed, True, response.json() + else: + print(f"Tilejson request failed with status {response.status_code}") + return elapsed, False, None + + except Exception as e: + elapsed = time.time() - start_time + print(f"Tilejson request failed: {e}") + return elapsed, False, None + + +async def fetch_tile( + client: httpx.AsyncClient, + api_url: str, + x: int, + y: int, + zoom: int, + dataset_params: Dict, + semaphore: asyncio.Semaphore, +) -> Tuple[int, int, float, bool]: + """Fetch a single tile and return timing information.""" + async with semaphore: + url = f"{api_url}/tiles/WebMercatorQuad/{zoom}/{x}/{y}.png" + + start_time = time.time() + try: + response = await client.get(url, params=dataset_params, timeout=30.0) + elapsed = time.time() - start_time + success = response.status_code == 200 + return x, y, elapsed, success + + except Exception as e: + elapsed = time.time() - start_time + print(f"Tile {x},{y} failed: {e}") + return x, y, elapsed, False + + +async def benchmark_tiles( + client: httpx.AsyncClient, + api_url: str, + zoom: int, + dataset_params: Dict, + max_concurrent: int = 20, +) -> BenchmarkResult: + """Run the complete benchmark test.""" + result = BenchmarkResult() + result.zoom_level = zoom + result.start_time = time.time() + + # Step 1: Warmup with tilejson request and get bounds + print("πŸš€ Warming up Lambda with tilejson request...") + warmup_time, warmup_success, tilejson_data = await fetch_tilejson( + client, api_url, dataset_params + ) + result.warmup_time = warmup_time + result.warmup_success = warmup_success + + if warmup_success: + print(f"βœ… Warmup successful in {warmup_time:.2f}s") + + # Display dataset information from tilejson + if tilejson_data: + print("πŸ—ΊοΈ Dataset info:") + if "bounds" in tilejson_data: + print(f" Bounds: {tilejson_data['bounds']}") + if "center" in tilejson_data: + print(f" Center: {tilejson_data['center']}") + if "minzoom" in tilejson_data and "maxzoom" in tilejson_data: + print( + f" Zoom range: {tilejson_data['minzoom']} - {tilejson_data['maxzoom']}" + ) + print("πŸ“‹ Full TileJSON response:") + print(f" {json.dumps(tilejson_data, indent=2)}") + print() + else: + print(f"❌ Warmup failed after {warmup_time:.2f}s") + return result + + # Step 2: Extract bounds and generate tile coordinates + if not tilejson_data or "bounds" not in tilejson_data: + print("❌ No bounds found in tilejson response, falling back to world bounds") + bounds = [-180.0, -90.0, 180.0, 90.0] # World bounds + else: + bounds = tilejson_data["bounds"] + + print(f"πŸ“ Using bounds: {bounds}") + print(f"πŸ“ Generating tile coordinates for zoom level {zoom}...") + + tile_coords = get_tiles_for_bounds(bounds, zoom) + result.tile_coords = tile_coords # Store for CSV export + + print(f"πŸ“ Found {len(tile_coords)} tiles intersecting dataset bounds") + + # Step 3: Fetch all tiles concurrently + print(f"🌍 Fetching zoom {zoom} tiles (max {max_concurrent} concurrent)...") + semaphore = asyncio.Semaphore(max_concurrent) + + tasks = [ + fetch_tile(client, api_url, x, y, zoom, dataset_params, semaphore) + for x, y in tile_coords + ] + + # Show progress as tiles complete + completed = 0 + progress_interval = max(1, len(tile_coords) // 10) if len(tile_coords) >= 10 else 1 + + for task in asyncio.as_completed(tasks): + x, y, elapsed, success = await task + completed += 1 + + if success: + result.tile_times.append(elapsed) + else: + result.tile_failures.append((x, y)) + + # Show progress + if completed % progress_interval == 0 or completed == len(tile_coords): + progress = (completed / len(tile_coords)) * 100 + print(f" Progress: {progress:.0f}% ({completed}/{len(tile_coords)} tiles)") + + result.total_runtime = time.time() - result.start_time + return result + + +def print_summary(result: BenchmarkResult): + """Print comprehensive benchmark statistics.""" + print("\n" + "=" * 60) + print("🏁 BENCHMARK SUMMARY") + print("=" * 60) + + # Warmup stats + print("Warmup Request:") + print(f" Status: {'βœ… Success' if result.warmup_success else '❌ Failed'}") + print(f" Time: {result.warmup_time:.2f}s") + print() + + # Overall stats + print(f"Total Runtime: {result.total_runtime:.2f}s") + print() + + # Tile request stats + total_tiles = len(result.tile_times) + len(result.tile_failures) + success_count = len(result.tile_times) + failure_count = len(result.tile_failures) + success_rate = (success_count / total_tiles * 100) if total_tiles > 0 else 0 + + print("Tile Request Summary:") + print(f" Zoom level: {result.zoom_level}") + print(f" Total tiles: {total_tiles}") + print(f" Successful: {success_count} ({success_rate:.1f}%)") + print(f" Failed: {failure_count} ({100 - success_rate:.1f}%)") + print() + + if result.tile_times: + # Response time statistics + avg_time = statistics.mean(result.tile_times) + min_time = min(result.tile_times) + max_time = max(result.tile_times) + median_time = statistics.median(result.tile_times) + + # Calculate percentiles + sorted_times = sorted(result.tile_times) + p95_idx = int(0.95 * len(sorted_times)) + p95_time = sorted_times[p95_idx] + + print("Response Time Analysis:") + print(f" Average: {avg_time:.3f}s") + print(f" Minimum: {min_time:.3f}s") + print(f" Maximum: {max_time:.3f}s") + print(f" Median: {median_time:.3f}s") + print(f" 95th percentile: {p95_time:.3f}s") + print() + + # Throughput metrics + tile_loading_time = result.total_runtime - result.warmup_time + throughput = success_count / tile_loading_time if tile_loading_time > 0 else 0 + + print("Throughput Metrics:") + print(f" Tiles per second: {throughput:.1f}") + print(f" Tile loading time: {tile_loading_time:.2f}s") + + if result.tile_failures: + print( + f"\nFailed Tiles: {result.tile_failures[:10]}{'...' if len(result.tile_failures) > 10 else ''}" + ) + + +def export_csv(result: BenchmarkResult, filename: str = "benchmark_results.csv"): + """Export detailed results to CSV.""" + if not result.tile_coords: + print("❌ No tile coordinates available for CSV export") + return + + with open(filename, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["tile_x", "tile_y", "response_time_s", "success"]) + + # Create mapping of failed tiles + failure_coords = set(result.tile_failures) + + # Keep track of successful tiles in order (they align with tile_times) + tile_idx = 0 + + for x, y in result.tile_coords: + if (x, y) in failure_coords: + writer.writerow([x, y, "N/A", False]) + elif tile_idx < len(result.tile_times): + writer.writerow([x, y, f"{result.tile_times[tile_idx]:.3f}", True]) + tile_idx += 1 + else: + # This shouldn't happen, but just in case + writer.writerow([x, y, "N/A", "Unknown"]) + + print(f"πŸ“Š Detailed results exported to {filename}") + + +async def main(): + """Main benchmark execution.""" + parser = argparse.ArgumentParser( + description="Benchmark titiler-multidim Lambda performance" + ) + parser.add_argument("--api-url", required=True, help="Lambda API URL") + parser.add_argument( + "--max-concurrent", type=int, default=20, help="Maximum concurrent requests" + ) + parser.add_argument( + "--export-csv", action="store_true", help="Export results to CSV" + ) + parser.add_argument( + "--zoom", type=int, default=4, help="Zoom level for tile requests (default: 4)" + ) + parser.add_argument( + "--dataset-json", help="JSON file path containing dataset parameters" + ) + parser.add_argument( + "--dataset-stdin", + action="store_true", + help="Read dataset parameters from STDIN as JSON", + ) + + args = parser.parse_args() + + # Load dataset parameters + dataset_params = load_dataset_params(args.dataset_json, args.dataset_stdin) + + # Override with environment variable if set + api_url = os.environ.get("API_URL", args.api_url) + + print(f"🎯 Benchmarking Lambda at: {api_url}") + print("πŸ“Š Dataset parameters:") + for key, value in dataset_params.items(): + print(f" {key}: {value}") + print(f"πŸ” Zoom level: {args.zoom}") + print(f"⚑ Max concurrent requests: {args.max_concurrent}") + print() + + # Configure httpx client with appropriate timeouts + timeout = httpx.Timeout(60.0, connect=10.0) + limits = httpx.Limits(max_connections=args.max_concurrent * 2) + + async with httpx.AsyncClient(timeout=timeout, limits=limits) as client: + result = await benchmark_tiles( + client, api_url, args.zoom, dataset_params, args.max_concurrent + ) + + print_summary(result) + + if args.export_csv: + export_csv(result) + + +if __name__ == "__main__": + asyncio.run(main())