diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..80bee474 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +ADMIN_EMAIL= +SUBSCRIBE_EMAILS= +GITHUB_PERSONAL_ACCESS_TOKEN= +FEATURE_SERVICE_URL=https://feature.b12x.org +DOCKER_USERNAME= +DOCKER_PASSWORD= diff --git a/Makefile b/Makefile index f90970e9..d2a0418d 100644 --- a/Makefile +++ b/Makefile @@ -21,10 +21,9 @@ export PURGE_LOGS ?= false # Conditionally required variable defaults export CREATE_VPC ?= true -export USE_PRIVATE_SUBNET ?= true export SKIP_CHECK_DEPENDENCIES ?= false export DEPLOY_NAT_GATEWAY ?= true -export DEPLOY_BASTION_SERVER ?= true +export DEPLOY_BASTION_SERVER ?= false export DEPLOY_VPC_ENDPOINTS ?= true export VPC_ID ?= export PUBLIC_SUBNET_ID ?= @@ -39,7 +38,7 @@ export DATABASE_VOLUME_SIZE ?= 64 export DATA_BUCKET_NAME ?= ${STAGE}-${APP_NAME}-${AWS_ACCOUNT}-${AWS_REGION} export ECR_BASE_URI := ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com export BUILD_REPOSITORY_NAME ?= ${STAGE}-${APP_NAME}-build-service -export EC2_KEY_PAIR_NAME := $${STAGE}-$${APP_NAME}-$${AWS_REGION}-neo4j-key +export EC2_KEY_PAIR_NAME := ${STAGE}-${APP_NAME}-${AWS_REGION}-neo4j-key export INSTANCE_ID = $(shell aws ssm get-parameters \ --names "/${APP_NAME}/${STAGE}/${AWS_REGION}/Neo4jDatabaseInstanceId" \ --output json \ @@ -67,7 +66,7 @@ REQUIRED_VARS := STAGE APP_NAME AWS_ACCOUNT AWS_REGION AWS_PROFILE SUBSCRIBE_EMA GITHUB_REPOSITORY_OWNER GITHUB_REPOSITORY_NAME GITHUB_PERSONAL_ACCESS_TOKEN \ ADMIN_EMAIL NEO4J_PASSWORD GDS_VERSION -BOOLEAN_VARS := CREATE_VPC USE_PRIVATE_SUBNET DEPLOY_NAT_GATEWAY DEPLOY_BASTION_SERVER DEPLOY_VPC_ENDPOINTS SKIP_CHECK_DEPENDENCIES +BOOLEAN_VARS := CREATE_VPC DEPLOY_NAT_GATEWAY DEPLOY_BASTION_SERVER DEPLOY_VPC_ENDPOINTS SKIP_CHECK_DEPENDENCIES # stdout colors # blue: runtime message, no action required @@ -222,13 +221,12 @@ env.validate.subdomain: [[ $$res = "" ]] && echo "\033[0;31mERROR: No Route53 domain found for $$fqdn\033[0m" && exit 1 || true env.validate.use-private-subnet.vars: -ifeq ($(USE_PRIVATE_SUBNET),true) ifeq ($(DEPLOY_NAT_GATEWAY),) - $(call red, "\`DEPLOY_NAT_GATEWAY\` must be set when \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`DEPLOY_NAT_GATEWAY\` must be set.") @exit 1 endif ifeq ($(DEPLOY_BASTION_SERVER),) - $(call red, "\`DEPLOY_BASTION_SERVER\` must be set when \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`DEPLOY_BASTION_SERVER\` must be set.") @exit 1 else ifeq ($(DEPLOY_BASTION_SERVER),true) ifeq ($(ADMIN_IP),) @@ -237,87 +235,28 @@ ifeq ($(ADMIN_IP),) endif endif ifeq ($(DEPLOY_VPC_ENDPOINTS),) - $(call red, "\`DEPLOY_VPC_ENDPOINTS\` must be set when \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`DEPLOY_VPC_ENDPOINTS\` must be set.") @exit 1 endif ifeq ($(CREATE_VPC),false) ifeq ($(PUBLIC_SUBNET_ID),) - $(call red, "\`PUBLIC_SUBNET_ID\` must be set as an environment variable when \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`PUBLIC_SUBNET_ID\` must be set as an environment variable.") @exit 1 else $(call green, "Found PUBLIC_SUBNET_ID: ${PUBLIC_SUBNET_ID}") endif ifeq ($(PRIVATE_SUBNET_ID),) - $(call red, "\`PRIVATE_SUBNET_ID\` must be set as an environment variable when \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`PRIVATE_SUBNET_ID\` must be set as an environment variable.") @exit 1 else $(call green, "Found PRIVATE_SUBNET_ID: ${PRIVATE_SUBNET_ID}") endif else ifeq ($(CREATE_VPC),true) ifneq ($(DEPLOY_NAT_GATEWAY),true) - $(call red, "\`DEPLOY_NAT_GATEWAY\` must be set to \`true\` when \`CREATE_VPC\` is \`true\` and \`USE_PRIVATE_SUBNET\` is \`true\`") + $(call red, "\`DEPLOY_NAT_GATEWAY\` must be set to \`true\` when \`CREATE_VPC\` is \`true\`.") @exit 1 endif endif -else ifeq ($(USE_PRIVATE_SUBNET),false) - $(call blue, "**** This deployment uses a public subnet for Neo4j ****") -ifneq ($(DEPLOY_NAT_GATEWAY),) - $(call red, "\`DEPLOY_NAT_GATEWAY\` must not be set when \`USE_PRIVATE_SUBNET\` is \`false\`") - @exit 1 -endif -ifneq ($(DEPLOY_BASTION_SERVER),) - $(call red, "\`DEPLOY_BASTION_SERVER\` must not be set when \`USE_PRIVATE_SUBNET\` is \`false\`") - @exit 1 -endif -ifeq ($(HOST_DOMAIN),) - $(call red, "\`HOST_DOMAIN\` must be set as an environment variable when \`USE_PRIVATE_SUBNET\` is \`false\`") - @exit 1 -else - $(call green, "Found HOST_DOMAIN: ${HOST_DOMAIN}") -endif -ifeq ($(SUBDOMAIN),) - $(call red, "\`SUBDOMAIN\` must be set as an environment variable when \`USE_PRIVATE_SUBNET\` is \`false\`") - @exit 1 -else - $(call green, "Found SUBDOMAIN: ${SUBDOMAIN}") -endif -ifeq ($(HOSTED_ZONE_ID),) - $(call red, "\`HOSTED_ZONE_ID\` must be set as an environment variable when \`USE_PRIVATE_SUBNET\` is \`false\`") - @exit 1 -else - $(call green, "Found HOSTED_ZONE_ID: ${HOSTED_ZONE_ID}") -endif - $(call blue, Validating Route53 configuration...) - $(MAKE) env.validate.subdomain fqdn="${SUBDOMAIN}.${HOST_DOMAIN}." - $(call green, Found configuration for ${SUBDOMAIN}.${HOST_DOMAIN}) -ifeq ($(CREATE_VPC),false) -ifeq ($(VPC_ID),) - $(call red, "\`VPC_ID\` must be set as an environment variable when \`CREATE_VPC\` is \`false\`") - @exit 1 -endif -ifeq ($(PUBLIC_SUBNET_ID),) - $(call red, "\`PUBLIC_SUBNET_ID\` must be set as an environment variable when \`CREATE_VPC\` is \`false\`") - @exit 1 -endif -ifneq ($(PRIVATE_SUBNET_ID),) - $(call red, "\`PRIVATE_SUBNET_ID\` must not be set when \`CREATE_VPC\` is \`false\`") - @exit 1 -endif -else ifeq ($(CREATE_VPC),true) -ifneq ($(VPC_ID),) - $(call red, "\`VPC_ID\` must not be set as an environment variable when \`CREATE_VPC\` is \`true\`") - @exit 1 -endif -ifneq ($(PUBLIC_SUBNET_ID),) - $(call red, "\`PUBLIC_SUBNET_ID\` must not be set as an environment variable when \`CREATE_VPC\` is \`true\`") - @exit 1 -endif -ifneq ($(PRIVATE_SUBNET_ID),) - $(call red, "\`PRIVATE_SUBNET_ID\` must not be set when \`CREATE_VPC\` is \`false\`") - @exit 1 -endif -endif -endif env.validate.create-neo4j-users: @if [ -n "${CREATE_NEO4J_USERS}" ]; then \ @@ -421,18 +360,10 @@ database.service.deploy: $(MAKE) -C ${APP_NAME}/database/ service.deploy database.connect: -ifeq ($(USE_PRIVATE_SUBNET),true) $(MAKE) infrastructure.access-services.bastion-server.connect -else - $(MAKE) -C ${APP_NAME}/database/ service.connect -endif database.ui.connect: -ifeq ($(USE_PRIVATE_SUBNET),true) $(MAKE) -C ${APP_NAME}/infrastructure/access-services/bastion-server/ service.ui.connect -else ifeq ($(USE_PRIVATE_SUBNET),false) - $(MAKE) database.get.endpoint -endif pipeline.deploy: $(MAKE) -C ${APP_NAME}/pipeline/ deploy @@ -549,11 +480,7 @@ database.status: jq --arg iid "${INSTANCE_ID}" '.Reservations[].Instances[] | (.InstanceId == $$iid) | {InstanceId, InstanceType, "Status": .State.Name, StateTransitionReason, ImageId}' database.get.endpoint: -ifeq ($(USE_PRIVATE_SUBNET),true) @echo "http://localhost:7474/browser/" -else ifeq ($(USE_PRIVATE_SUBNET),false) - @echo "https://${SUBDOMAIN}.${HOST_DOMAIN}:7473/browser/" -endif database.get.credentials: @secret_string=$$(aws secretsmanager get-secret-value --secret-id /${APP_NAME}/${STAGE}/${AWS_REGION}/Neo4jCredentials | jq -r '.SecretString') && \ @@ -567,13 +494,6 @@ database.get.private-ip: | jq -r '.Parameters[0].Value') && \ echo "$${private_ip}" -database.get.public-ip: - @public_ip=$$(aws ssm get-parameters \ - --names "/${APP_NAME}/${STAGE}/${AWS_REGION}/Neo4jPublicIp" \ - --output json \ - | jq -r '.Parameters[0].Value') && \ - echo "$${public_ip}" - database.get.instance-id: @echo "${INSTANCE_ID}" @@ -660,36 +580,24 @@ define HELP_MESSAGE CREATE_VPC: "${CREATE_VPC}" Description: (boolean) Create a new VPC or use an existing one - USE_PRIVATE_SUBNET: "${USE_PRIVATE_SUBNET}" - Description: (boolean) Use a private subnet for Neo4j - DEPLOY_VPC_ENDPOINTS: "${DEPLOY_VPC_ENDPOINTS}" Description: (boolean) Deploy VPC endpoints for S3 and DynamoDB DEPLOY_NAT_GATEWAY: "${DEPLOY_NAT_GATEWAY}" - Description: (boolean) Deploy a NAT Gateway or use an existing one, required when USE_PRIVATE_SUBNET is true + Description: (boolean) Deploy a NAT Gateway or use an existing one DEPLOY_BASTION_SERVER: "${DEPLOY_BASTION_SERVER}" - Description: (boolean) Deploy a Bastion Server or use an existing one, required when USE_PRIVATE_SUBNET is true + Description: (boolean) Deploy a Bastion Server or use an existing one ADMIN_IP: "${ADMIN_IP}" Description: (string) IP address to allow SSH access to the Bastion Server, required when DEPLOY_BASTION_SERVER is true ADMIN_EMAIL: "${ADMIN_EMAIL}" - Description: (string) Admin email address for Neo4j server SSL certificate management, required when USE_PRIVATE_SUBNET is false + Description: (string) Admin email address for Neo4j server SSL certificate management SUBSCRIBE_EMAILS: "${SUBSCRIBE_EMAILS}" Description: (string) Comma separated list of email addresses to subscribe to CloudWatch notifications - HOST_DOMAIN: "${HOST_DOMAIN}" - Description: (string) Domain name for the Neo4j server, required when USE_PRIVATE_SUBNET is false - - SUBDOMAIN: "${SUBDOMAIN}" - Description: (string) Subdomain name for the Neo4j server, required when USE_PRIVATE_SUBNET is false - - HOSTED_ZONE_ID: "${HOSTED_ZONE_ID}" - Description: (string) Route53 hosted zone ID, required when USE_PRIVATE_SUBNET is false - VPC_ID: "${VPC_ID}" Description: (string) ID of an existing VPC, required when CREATE_VPC is false @@ -698,7 +606,6 @@ define HELP_MESSAGE PRIVATE_SUBNET_ID: "${PRIVATE_SUBNET_ID}" Description: (string) ID of an existing private subnet, required when CREATE_VPC is false - and USE_PRIVATE_SUBNET is true APOC_VERSION: "${APOC_VERSION}" Description: (string) Version of APOC to install diff --git a/README.md b/README.md index 98e8cfa6..9388814b 100755 --- a/README.md +++ b/README.md @@ -17,8 +17,9 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE. + [Data Pipeline](#data-pipeline) * [Prerequisites](#prerequisites) + [Libraries](#libraries) - + [AWS Resources](#aws-resources) * [Quick Start](#quick-start) + + [*(Option 1)* Deployment with VPC](#--option-1---deployment-with-vpc) + + [*(Option 2)* Deployment using existing VPC, NAT Gateway and VPC Endpoints](#--option-2---deployment-using-existing-vpc--nat-gateway-and-vpc-endpoints) * [Application Environment](#application-environment) + [AWS Credentials](#aws-credentials) + [Deployment Configurations](#deployment-configurations) @@ -48,6 +49,7 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE. + [Debugging Batch Jobs](#debugging-batch-jobs) - [Running the Build job using Python](#running-the-build-job-using-python) - [Running the Build job using Docker](#running-the-build-job-using-docker) + * [Documentation](#documentation) + [Editing and Building the Documentation](#editing-and-building-the-documentation) * [Troubleshooting](#troubleshooting) * [Authors](#authors) @@ -73,8 +75,7 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE.    │   ├── amazon-cloudwatch-agent    │   │   └── amazon-cloudwatch-agent.json # Sends EC2 logs to CloudWatch Logs for monitoring    │   ├── neo4j -    │   │   ├── cypher # Cypher scripts for initialization and loading -    │   │   └── neo4j.template # Neo4j server configuration file +    │   │   └── cypher # Cypher scripts for initialization and loading    │   ├── scripts # Shell scripts for automation, loading, backup & restore │   └── template.yaml # Base Infrastructure layer @@ -84,10 +85,12 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE.    │   │   ├── bastion-server # bastion server for private deployments    │   │   │   ├── Makefile    │   │   │   └── template.yaml +    │   │   ├── vpc-endpoints # VPC endpoints for private deployments +    │   │   │   ├── Makefile +    │   │   │   └── template.yaml    │   │   └── nat-gateway # NAT Gateway for private deployments    │   │   ├── Makefile    │   │   └── template.yaml -    │   ├── change-batch.json    │   └── template.yaml # Docker Build layer    ├── local @@ -111,7 +114,6 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE. ## Description `gfe-db` is an implementation of the paper [A Gene Feature Enumeration Approach For Describing HLA Allele Polymorphism](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4674356/) to represent IPD-IMGT/HLA sequence data as GFE nodes and relationships using a Neo4j graph database. This application deploys and configures AWS resources for the GFE database and an automated data pipeline for updates. -

gfe-db schema @@ -126,7 +128,7 @@ Graph database representing IPD-IMGT/HLA sequence data as GFE. `gfe-db` architecture is organized by 3 layers each with its own Makefile: 1) Base Infrastructure 2) Database -3) Data pipeline +3) Data Pipeline This allows the database and pipeline layers to be decoupled from each other and deployed or destroyed independently without affecting the other. Common configuration parameters are shared between resources using environment variables, JSON files, AWS SSM Parameter Store and Secrets Manager. All deployment and administration tasks are intended to be done using the root `Makefile`. @@ -136,7 +138,7 @@ Additionally, `gfe-db` can be deployed with its own VPC or use an existing one, The base infrastructure layer deploys networking resources, an S3 bucket and shared configuration values using Parameter Store and Secrets Manager for all services to use. For private deployments this layer manages VPC endpoints, NAT Gateway and a bastion server. For public deployments it manages Elastic IPs and DNS routing for Neo4j by updating the existing A record of the specified Route53 domain and hosted zone so that SSL can be used to connect to Neo4j browser. #### Access Services -Optional resources for private deployments (`USE_PRIVATE_SUBNET=true`) include a NAT Gateway and bastion server. The NAT Gateway provides internet access to the private subnet for initializing Neo4j. The bastion server allows secure access to the Neo4j server and Neo4j Browser. These resources are deployed using CloudFormation and managed using Makefile targets. It is possible to remove them after deployment to save costs and re-deploy them later if needed. +Optional resources include a NAT Gateway, VPC Endpoints and bastion server. The NAT Gateway provides internet access to the private subnet for initializing Neo4j, VPC endpoints provide connectivity to AWS services for resources inside private subnets and the bastion server allows secure access to the Neo4j server and Neo4j Browser. These resources are deployed using CloudFormation and managed using Makefile targets. It is possible to remove them after deployment to save costs and re-deploy them later if needed, although this will prevent the loading pipeline from running. ### Database The database layer deploys Neo4j to an EC2 instance running the Amazon Linux 2 base image in a public or private subnet. During database deployment the SSL certificate is created (public deployments), Neo4j is installed and Cypher queries are run to create users as well as constraints and indexes to help speed up loading and ensure data integrity. Neo4j is ready to be accessed through a browser once the instance has booted sucessfully. For private deployments, the bastion server must be used to connect to Neo4j Browser using SSH tunneling. Cypher Shell can be accessed by connecting to the instance using SSH. @@ -160,29 +162,15 @@ Please refer to the respective documentation for specific installation instructi * jq * Python 3.10+ -**Note**: If using Rancher Desktop, set the `DOCKER_HOST` variable to use the correct file. [Ref](https://github.com/aws/aws-sam-cli/issues/3715#issuecomment-1962126068) +**Note:** If using Rancher Desktop, set the `DOCKER_HOST` variable to use the correct file. [Ref](https://github.com/aws/aws-sam-cli/issues/3715#issuecomment-1962126068) ``` export DOCKER_HOST="unix://$HOME/.rd/docker.sock" ``` -### AWS Resources -The following resources are required to deploy the application depending on the chosen configuration. -* Public deployments with VPC - * Route53 domain - * Hosted zone - * A record -* Public deployments using existing VPC - * VPC - * Public Subnet -* Private deployments using existing VPC - * VPC - * Public Subnet - * Private Subnet - ## Quick Start -### *(Option 1)* Private Subnet Deployment with VPC -Follow these steps to deploy gfe-db to a new VPC and private subnet. +### *(Option 1)* Deployment with VPC +Follow these steps to deploy gfe-db with a new VPC, NAT Gateway and VPC Endpoints. - Configure [AWS Credentials](#aws-credentials). - Define the environment variables in `.env.dev`. ```bash @@ -196,8 +184,9 @@ DOCKER_PASSWORD= ``` - For the stage `dev`, run `STAGE=dev make deploy` to deploy the architecture. - After deployment is complete run `STAGE=dev make database.load.run releases="3560"` to load the database with the IMGT/HLA release version 3560. +- Use `.env.example` as a template for the `.env.` file. -### *(Option 2)* Private deployment using existing VPC, NAT Gateway and VPC Endpoints +### *(Option 2)* Deployment using existing VPC, NAT Gateway and VPC Endpoints Follow these steps to deploy gfe-db to an existing private subnet and VPC. - Configure [AWS Credentials](#aws-credentials). - Create and configure the following resources before deploying the stacks: @@ -267,9 +256,7 @@ For more information visit the documentation page: [Configuration and credential file settings](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) ### Deployment Configurations -It is possible to deploy gfe-db within it's own VPC, or to connect it to an external VPC by specifying `CREATE_VPC=true/false`. Public or private deployments are specified using `USE_PRIVATE_SUBNET=true/false`. If deploying to an external VPC, you must specify the VPC ID and public subnet ID using `VPC_ID` and `PUBLIC_SUBNET_ID`. If deploying to a new VPC, you must specify the hosted zone ID, domain and subdomain using `HOSTED_ZONE_ID`, `HOST_DOMAIN` and `SUBDOMAIN`. - -Private deployments require a NAT Gateway which can be deployed along with the stack or specified using `EXTERNAL_NAT_GATEWAY_ID`. VPC endpoints are required for access to AWS services. These can also be deployed along with the stack or specified using `CREATE_SSM_VPC_ENDPOINT`, `SSM_VPC_ENDPOINT_ID`, `CREATE_SECRETSMANAGER_VPC_ENDPOINT`, `SECRETSMANAGER_VPC_ENDPOINT_ID`, `CREATE_S3_VPC_ENDPOINT` and `S3_VPC_ENDPOINT_ID`. If deploying a bastion server, you must specify the admin IP address using `ADMIN_IP`. +It is possible to deploy gfe-db within it's own VPC, or to connect it to an external VPC by specifying `CREATE_VPC=true` or `CREATE_VPC=false`. If deploying to an external VPC, you must specify the VPC ID, public and private subnet IDs using `VPC_ID`, `PUBLIC_SUBNET_ID` and `PRIVATE_SUBNET_ID`. All deployments use a private subnet for Neo4j and require a NAT Gateway and VPC endpoints to be configured for the subnet which are deployed by default. If deploying a bastion server, you must specify the admin IP address using `ADMIN_IP`. #### Shell Variables These variables must be defined before running Make. The best way to set these variables is with a `.env.` file following this structure. For optional resources such as VPCs, subnets, VPC endpoints and NAT Gateways, an external resource ID is required if it is not deployed as part of the stack. @@ -286,9 +273,8 @@ These variables must be defined before running Make. The best way to set these v | DEPLOY_NAT_GATEWAY | bool | trure/false | Conditional | Optionally deploy a NAT Gateway and associated networking resources | | DEPLOY_VPC_ENDPOINTS | bool | trure/false | Conditional | Optionally deploy VPC endpoints required for services | | DEPLOY_BASTION_SERVER | bool | trure/false | Conditional | Optionally deploy a bastion server | -| USE_PRIVATE_SUBNET | bool | true/false | Yes | Use private subnet if true | | PUBLIC_SUBNET_ID | string | subnet-xxxxxxxx | Conditional | Required if CREATE_VPC=false | -| PRIVATE_SUBNET_ID | string | subnet-xxxxxxxx | Conditional | Required if CREATE_VPC=false and USE_PRIVATE_SUBNET=true | +| PRIVATE_SUBNET_ID | string | subnet-xxxxxxxx | Conditional | Required if CREATE_VPC=false | | ADMIN_EMAIL | string | admin@example.com | Yes | Administrator's email | | SUBSCRIBE_EMAILS | string | notify@example.com | Yes | Emails for subscription | | GITHUB_REPOSITORY_OWNER | string | ANHIG | Yes | Owner of the GitHub repository | @@ -299,15 +285,12 @@ These variables must be defined before running Make. The best way to set these v | GDS_VERSION | string | 2.5.6 | Yes | Version of GDS | | GITHUB_PERSONAL_ACCESS_TOKEN | string | ghp_xxxxxxxxxxxxxx | Yes | GitHub personal access token | | FEATURE_SERVICE_URL | string | https://api.example.com | Yes | URL of the Feature service | -| HOST_DOMAIN | string | example.com | Conditional | Required if USE_PRIVATE_SUBNET=false | -| SUBDOMAIN | string | sub.example.com | Conditional | Required if USE_PRIVATE_SUBNET=false | -| HOSTED_ZONE_ID | string | ZXXXXXXXXXXXXX | Conditional | Required if USE_PRIVATE_SUBNET=false | | VPC_ID | string | vpc-xxxxxxxx | Conditional | Required if CREATE_VPC=false | | ADMIN_IP | string | 192.168.1.1/32 | Conditional | Required if DEPLOY_BASTION_SERVER=true | | DOCKER_USERNAME | string | username | Yes | Required to build the Docker image from gfe-db | | DOCKER_PASSWORD | string | password | Yes | Required to build the Docker image from gfe-db | -*Note:* "Conditional" in the "Required" column indicates that the requirement of the variable depends on specific configurations or conditions. +**Note:** "Conditional" in the "Required" column indicates that the requirement of the variable depends on specific configurations or conditions. ***Important**:* *Always use a `.env` file or AWS SSM Parameter Store or Secrets Manager for sensitive variables like credentials and API keys. Never hard-code them, including when developing. AWS will quarantine an account if any credentials get accidentally exposed and this will cause problems. Make sure to update `.gitignore` to avoid pushing sensitive data to public repositories.* @@ -364,7 +347,7 @@ STAGE= make pipeline.service.deploy # Deploy or update only the Docker image for the build job STAGE= make pipeline.jobs.deploy ``` -*Note:* Because common parameters are passed from the root Makefile to nested Makefiles you can only call targets from the project's root. If a deployed stack has not been changed, the deployment script will continue until it reaches a stack with changes and deploy the changes. +**Note:** Because common parameters are passed from the root Makefile to nested Makefiles you can only call targets from the project's root. If a deployed stack has not been changed, the deployment script will continue until it reaches a stack with changes and deploy the changes. ### Makefile Command Reference To see a list of possible commands using Make, run `make` on the command line. You can also refer to the `Makefile Usage` section in the [Sphinx documentation](#documentation). @@ -424,10 +407,7 @@ Bash scripts are used for automating Neo4j configuration, loading and backup. Th gfe-db/database/scripts ├── Makefile # Orchestrates tasks on the database instance ├── init -│   ├── create_cert.sh # Create an SSL certificate -│   ├── create_users.sh # Create users and passwords in Neo4j -│   ├── eip_assoc_waiter.sh # Waits for the instance to associate with an Elastic IP -│   └── renew_cert.sh # Renews the SSL certificate for public deployments +│   └── create_users.sh # Create users and passwords in Neo4j ├── load_db.sh # Loads data into Neo4j ├── send_heartbeat.sh # Sends task heartbeat to Step Functions API during loading └── start_task.sh # Coordinates database loading with the Step Functions API @@ -446,6 +426,7 @@ Cypher scripts manage node constraints & indexes and load the data. These are fo gfe-db/database/neo4j/ └── cypher # Cypher scripts    ├── create_constraints.cyp # Creates constraints and indexes +    ├── create_user.cyp # Creates Neo4j users    ├── drop_constraints.cyp # Drops constraints and indexes    ├── init.cyp # Run intitialization queries    └── load.cyp # Load Neo4j from local files @@ -468,7 +449,6 @@ Base input parameters (excluding the `releases` value) are passed to the Step Fu "use_existing_build": false, // Optional, defaults to false "skip_load": false // Optional, defaults to false } - ``` | Variable | Example Value | Type | Description | | ------------------ | ------------- | ------ | ------------------------------------------------------------------------------ | @@ -490,6 +470,8 @@ STAGE= make database.load.run \ skip_load= ``` +**Note:** KIR data is not yet implemented. + ### IMGT/HLA Release Versions State The application's state tracks which releases have been processed and added to the database. This file tracks the releases which have already been processed. If the `gfe-db-invoke-pipeline` function detects a valid release branch in the source data repository that is not in the `releases` array, it will start the pipeline for this release. Once the update is finished, the processed release is appended to the array. ```json @@ -786,39 +768,6 @@ docker build --tag gfe-db-build-service --platform "linux/amd64" . docker run --env-file .env -v $(pwd)/data:/data gfe-db-build-service ``` - - ## Documentation It is not necessary to install Sphinx to view `gfe-db` documentation because it is already built and available in the `docs/` folder, but you will need it to edit them. To get the local `index.html` path run the command and navigate to the URL in a browser. diff --git a/gfe-db/database/Makefile b/gfe-db/database/Makefile index 003ea57a..43822c34 100644 --- a/gfe-db/database/Makefile +++ b/gfe-db/database/Makefile @@ -71,7 +71,6 @@ service.deploy: Stage="$${STAGE}" \ AppName="$${APP_NAME}" \ createVpc="$${CREATE_VPC}" \ - usePrivateSubnet="$${USE_PRIVATE_SUBNET}" \ DataBucketName="$${DATA_BUCKET_NAME}" \ Neo4jAmiId="$${NEO4J_AMI_ID}" \ Neo4jDatabaseName="$${NEO4J_DATABASE_NAME}" \ diff --git a/gfe-db/database/scripts/Makefile b/gfe-db/database/scripts/Makefile index d0fe3ebe..70155b1c 100644 --- a/gfe-db/database/scripts/Makefile +++ b/gfe-db/database/scripts/Makefile @@ -1,6 +1,5 @@ # This Makefile only orchestrates process that are run on the EC2 database instance, it is deployed and called on the server # Must be run as root -# TODO make sure commands are idempotent SERVICE := database # Application specific environment variables @@ -31,22 +30,12 @@ env.validate.vars: $(if $(value $(var)),,$(error $(var) is not set. Please add $(var) to the environment variables.))) @echo "$$(date -u +'%Y-%m-%d %H:%M:%S.%3N') - Found environment variables" 2>&1 -bootstrap: env.validate.vars -ifeq ($(USE_PRIVATE_SUBNET),false) - $(MAKE) eip.waiter - $(MAKE) ssl.create-cert -endif - -neo4j: bootstrap +neo4j: env.validate.vars $(MAKE) neo4j.plugins.install-gds $(MAKE) neo4j.restart $(MAKE) neo4j.init $(MAKE) copy-logs -# eip -eip.waiter: - @bash ${EC2_USER_HOME}/init/eip_assoc_waiter.sh - neo4j.credentials.show: @echo "Username: ${NEO4J_USERNAME}" @echo "Password: ${NEO4J_PASSWORD}" @@ -102,14 +91,7 @@ neo4j.init.download-scripts: neo4j.query.init: # public: neo4j+s://${SUBDOMAIN}.${HOST_DOMAIN}:7687; private: bolt://:7687 @echo "Executing initialization queries" -ifeq ($(USE_PRIVATE_SUBNET),true) $(MAKE) neo4j.query.init.no-ssl -else ifeq ($(USE_PRIVATE_SUBNET),false) - $(MAKE) neo4j.query.init.ssl -endif - -neo4j.query.init.ssl: - @cat ${NEO4J_HOME}/cypher/init.cyp | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a neo4j+s://${SUBDOMAIN}.${HOST_DOMAIN}:7687 neo4j.query.init.no-ssl: @cat ${NEO4J_HOME}/cypher/init.cyp | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a bolt://127.0.0.1:7687 --encryption false @@ -119,38 +101,13 @@ neo4j.query.create-users: #=> users= @bash init/create_users.sh "$$users" neo4j.query: #=> query= -ifeq ($(USE_PRIVATE_SUBNET),true) @echo "$$query" | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a bolt://127.0.0.1:7687 --encryption false -else ifeq ($(USE_PRIVATE_SUBNET),false) - @echo "$$query" | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a neo4j+s://${SUBDOMAIN}.${HOST_DOMAIN}:7687 -endif neo4j.query.show-constraints: # public: neo4j+s://${SUBDOMAIN}.${HOST_DOMAIN}:7687; private: bolt://:7687 -ifeq ($(USE_PRIVATE_SUBNET),true) @echo "SHOW CONSTRAINTS;" | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a bolt://127.0.0.1:7687 --encryption false -else ifeq ($(USE_PRIVATE_SUBNET),false) - @echo "SHOW CONSTRAINTS;" | ${NEO4J_HOME}/bin/cypher-shell -u ${NEO4J_USERNAME} -p ${NEO4J_PASSWORD} -a neo4j+s://${SUBDOMAIN}.${HOST_DOMAIN}:7687 -endif neo4j.waiter: -ifeq ($(USE_PRIVATE_SUBNET),true) $(MAKE) neo4j.waiter.no-ssl -else ifeq ($(USE_PRIVATE_SUBNET),false) - $(MAKE) neo4j.waiter.ssl -endif - -neo4j.waiter.ssl: - @timeout=120 && \ - counter=0 && \ - echo "Waiting for response from Neo4j at https://${SUBDOMAIN}.${HOST_DOMAIN}:7473..." && \ - until $$(curl --output /dev/null --silent --head --fail https://${SUBDOMAIN}.${HOST_DOMAIN}:7473) ; do \ - printf '.' ; \ - sleep 1 ; \ - counter=$$((counter + 1)) ; \ - [ $$counter -eq $$timeout ] && break || true ; \ - done && \ - printf "%s\n" " " && \ - [ $$counter -eq $$timeout ] && echo "Operation timed out!" || echo "Neo4j is ready" neo4j.waiter.no-ssl: @timeout=120 && \ @@ -207,12 +164,6 @@ neo4j.restore: #from_path=s3://.zip # @mkdir -p ${REPORTS_DIR} # @${NEO4J_HOME}/bin/neo4j-admin check-consistency --database=neo4j --verbose=true --report-dir=reports > reports/consistency_check.txt -ssl.create-cert: - @bash init/create_cert.sh "${SUBDOMAIN}.${HOST_DOMAIN}" ${ADMIN_EMAIL} - -ssl.renew-cert: - @bash init/renew_cert.sh "${SUBDOMAIN}.${HOST_DOMAIN}" - copy-logs: @mkdir -p ${LOGS_DIR}/var/logs $(MAKE) logs.bootstrap diff --git a/gfe-db/database/scripts/init/create_cert.sh b/gfe-db/database/scripts/init/create_cert.sh deleted file mode 100644 index 3ab6ee86..00000000 --- a/gfe-db/database/scripts/init/create_cert.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -x - -# Part of user data, to be run on the database instance on initialization, or later for renewal - -echo "Provisioning SSL certificate..." -# export NEO4J_HOME=/var/lib/neo4j - -# Passed from command line -DOMAIN=$1 -ADMIN_EMAIL=$2 - -certbot certonly -n \ - -d $DOMAIN \ - --standalone \ - -m $ADMIN_EMAIL \ - --agree-tos \ - --redirect - -chgrp -R neo4j /etc/letsencrypt/* -chmod -R g+rx /etc/letsencrypt/* -mkdir -p $NEO4J_HOME/certificates/{bolt,cluster,https}/trusted - -for certsource in bolt cluster https; do - ln -sf "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" "$NEO4J_HOME/certificates/$certsource/neo4j.cert" - ln -sf "/etc/letsencrypt/live/$DOMAIN/privkey.pem" "$NEO4J_HOME/certificates/$certsource/neo4j.key" - ln -sf "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" "$NEO4J_HOME/certificates/$certsource/trusted/neo4j.cert" -done - -chgrp -R neo4j $NEO4J_HOME/certificates/* -chmod -R g+rx $NEO4J_HOME/certificates/* - -exit 0 diff --git a/gfe-db/database/scripts/init/eip_assoc_waiter.sh b/gfe-db/database/scripts/init/eip_assoc_waiter.sh deleted file mode 100644 index bb68f59b..00000000 --- a/gfe-db/database/scripts/init/eip_assoc_waiter.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -x - -# DIR=$(pwd) -# PARENT_DIR="$(dirname "$DIR")" -ROOT_DIR=/home/ec2-user -PUBLIC_IPV4_ROUTE=http://169.254.169.254/latest/meta-data/public-ipv4 - -# Load BOOTSTRAP_PUBLIC_IPV4 set in user data -# source $ROOT_DIR/env.sh moved to .env, script is called from Makefile which includes them -# echo "Bootstrapped public IP is $BOOTSTRAP_PUBLIC_IPV4" -PUBLIC_IPV4=$(curl -sS $PUBLIC_IPV4_ROUTE) -echo "Current public IPv4 is $PUBLIC_IPV4" - -export NEO4J_ENDPOINT=$(aws ssm get-parameters \ - --region $AWS_REGION \ - --names "/$APP_NAME/$STAGE/$AWS_REGION/Neo4jDatabaseEndpoint" \ - | jq -r '.Parameters[0].Value') - -export ALLOCATION_ID=$(aws ssm get-parameters \ - --region $AWS_REGION \ - --names "/$APP_NAME/$STAGE/$AWS_REGION/Neo4jDatabaseEndpointAllocationId" \ - | jq -r '.Parameters[0].Value') - -echo "Target Elastic IP is $NEO4J_ENDPOINT" -echo "Creating Elastic IP association" -res=$(aws ec2 associate-address \ - --allocation-id $ALLOCATION_ID \ - --region $AWS_REGION \ - --instance-id $INSTANCE_ID) -echo $res | jq -r - -# Set timeout -TIMEOUT=${1:-60} -counter=0 -until [ "$PUBLIC_IPV4" = "$NEO4J_ENDPOINT" ]; do - PUBLIC_IPV4=$(curl -sS $PUBLIC_IPV4_ROUTE) - printf '.' - sleep 1 - counter=$((counter + 1)) - - if [ $counter -eq $TIMEOUT ]; then - echo "Task timed out" - exit 1 - break - fi -done -printf "%s\n" -echo "Validating association..." -echo "Instance is associated with Elastic IP at $PUBLIC_IPV4" -exit 0 \ No newline at end of file diff --git a/gfe-db/database/scripts/init/renew_cert.sh b/gfe-db/database/scripts/init/renew_cert.sh deleted file mode 100644 index b5a4dd40..00000000 --- a/gfe-db/database/scripts/init/renew_cert.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -x - -# Part of user data, to be run on the database instance on initialization, or later for renewal - -echo "Renewing SSL certificate..." -export NEO4J_HOME=/var/lib/neo4j - -# Passed from command line -DOMAIN=$1 -# ADMIN_EMAIL=$2 - -# certbot certonly -n \ -# -d $DOMAIN \ -# --standalone \ -# -m $ADMIN_EMAIL \ -# --agree-tos \ -# --redirect - -certbot renew - -chgrp -R neo4j /etc/letsencrypt/* -chmod -R g+rx /etc/letsencrypt/* -mkdir -p $NEO4J_HOME/certificates/{bolt,cluster,https}/trusted - -for certsource in bolt cluster https; do - ln -sf "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" "$NEO4J_HOME/certificates/$certsource/neo4j.cert" - ln -sf "/etc/letsencrypt/live/$DOMAIN/privkey.pem" "$NEO4J_HOME/certificates/$certsource/neo4j.key" - ln -sf "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" "$NEO4J_HOME/certificates/$certsource/trusted/neo4j.cert" -done - -chgrp -R neo4j $NEO4J_HOME/certificates/* -chmod -R g+rx $NEO4J_HOME/certificates/* - -exit 0 diff --git a/gfe-db/database/scripts/load_db.sh b/gfe-db/database/scripts/load_db.sh index 4b79b291..110c08db 100644 --- a/gfe-db/database/scripts/load_db.sh +++ b/gfe-db/database/scripts/load_db.sh @@ -75,31 +75,15 @@ echo "****** End Cypher ******" # Run Cypher load query echo "$(date -u +'%Y-%m-%d %H:%M:%S.%3N') - Loading data for release $RELEASE into Neo4j..." -if [[ "$USE_PRIVATE_SUBNET" = true ]]; then - - # # With SSL/TLS policy disabled for private instance - cat $NEO4J_CYPHER_PATH/tmp/$RELEASE/load.$RELEASE.cyp | \ - $NEO4J_HOME/bin/cypher-shell \ - --address bolt://127.0.0.1:7687 \ - --encryption false \ - --username $NEO4J_USERNAME \ - --password $NEO4J_PASSWORD \ - --format verbose - LOAD_EXIT_STATUS=$? - -else - - # With SSL/TLS policy enabled - cat $NEO4J_CYPHER_PATH/tmp/$RELEASE/load.$RELEASE.cyp | \ - $NEO4J_HOME/bin/cypher-shell \ - --address neo4j://$SUBDOMAIN.$HOST_DOMAIN:7687 \ - --encryption true \ - --username $NEO4J_USERNAME \ - --password $NEO4J_PASSWORD \ - --format verbose - LOAD_EXIT_STATUS=$? - -fi +# With SSL/TLS policy disabled for private instance +cat $NEO4J_CYPHER_PATH/tmp/$RELEASE/load.$RELEASE.cyp | \ + $NEO4J_HOME/bin/cypher-shell \ + --address bolt://127.0.0.1:7687 \ + --encryption false \ + --username $NEO4J_USERNAME \ + --password $NEO4J_PASSWORD \ + --format verbose +LOAD_EXIT_STATUS=$? if [[ $LOAD_EXIT_STATUS -eq 0 ]]; then echo "$(date -u +'%Y-%m-%d %H:%M:%S.%3N') - Load complete" diff --git a/gfe-db/database/template.yaml b/gfe-db/database/template.yaml index a3f18866..4594f41d 100644 --- a/gfe-db/database/template.yaml +++ b/gfe-db/database/template.yaml @@ -7,16 +7,6 @@ Parameters: AppName: Type: String Description: Application name - # createVpc: - # Type: String - # AllowedValues: - # - 'true' - # - 'false' - usePrivateSubnet: - Type: String - AllowedValues: - - 'true' - - 'false' DataBucketName: Type: String Neo4jAmiId: @@ -45,13 +35,7 @@ Parameters: Type: String GDSVersion: Type: String -Conditions: - UsePrivateSubnet: !Equals - - !Ref usePrivateSubnet - - 'true' - UsePublicSubnet: !Equals - - !Ref usePrivateSubnet - - 'false' + Resources: Neo4jCredentialsSecret: Type: AWS::SecretsManager::Secret @@ -61,6 +45,7 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jCredentials Description: Neo4j credentials for gfedb datase; Specified in the deployment environment. SecretString: !Sub '{"NEO4J_USERNAME":"neo4j","NEO4J_PASSWORD":"${Neo4jPassword}"}' + Neo4jCredentialsSecretArnParameter: Type: AWS::SSM::Parameter Properties: @@ -68,11 +53,12 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jCredentialsSecretArn Description: ARN for Neo4jCredentialsSecret Value: !Ref Neo4jCredentialsSecret + Neo4jDatabaseInstance: Type: AWS::EC2::Instance CreationPolicy: ResourceSignal: - Timeout: PT5M + Timeout: PT10M Properties: KeyName: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/EC2KeyPairName}}' DisableApiTermination: false @@ -164,18 +150,8 @@ Resources: /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json [ $(amazon-cloudwatch-agent-ctl -a status | jq -r '.status') = running ] && echo "INFO: CloudWatch Logs Agent is running" || \ (msg="ERROR: CloudWatch Logs Agent did not initialize correctly" && echo $msg && cfn_signal 1 "$msg" && exit 1) - # Set NEO4J_URI - export USE_PRIVATE_SUBNET=${usePrivateSubnet} - if [ "$USE_PRIVATE_SUBNET" = true ]; then - export NEO4J_URI=bolt://127.0.0.1:7687 - export NEO4J_ENCRYPTION=false - elif [ "$USE_PRIVATE_SUBNET" = false ]; then - export NEO4J_URI=neo4j+s://${Subdomain}.${HostDomain}:7687 - export NEO4J_ENCRYPTION=default - else - echo "ERROR: USE_PRIVATE_SUBNET must be set to 'true' or 'false'" - exit 1 - fi + export NEO4J_URI=bolt://127.0.0.1:7687 + export NEO4J_ENCRYPTION=false # Permanently set application environment variables (these should never change for the duration of the deployment) echo "INFO: Setting application environment variables" echo "INSTANCE_ID=$INSTANCE_ID" > $EC2_USER_HOME/env.sh @@ -183,7 +159,6 @@ Resources: echo "NEO4J_HOME=$NEO4J_HOME" >> $EC2_USER_HOME/env.sh echo "STAGE=${Stage}" >> $EC2_USER_HOME/env.sh echo "APP_NAME=${AppName}" >> $EC2_USER_HOME/env.sh - echo "USE_PRIVATE_SUBNET=$USE_PRIVATE_SUBNET" >> $EC2_USER_HOME/env.sh echo "AWS_REGION=us-east-1" >> $EC2_USER_HOME/env.sh echo "DATA_BUCKET_NAME=${DataBucketName}" >> $EC2_USER_HOME/env.sh echo "HOST_DOMAIN=${HostDomain}" >> $EC2_USER_HOME/env.sh @@ -203,14 +178,7 @@ Resources: echo "INFO: Validating environment variables" exit_code=0 msg="The following variables failed to be set: " - if [ $USE_PRIVATE_SUBNET = "true" ]; then - vars="EC2_USER_HOME STAGE APP_NAME USE_PRIVATE_SUBNET AWS_REGION DATA_BUCKET_NAME PRIVATE_IP ADMIN_EMAIL APOC_VERSION GDS_VERSION NEO4J_URI NEO4J_ENCRYPTION" - elif [ $USE_PRIVATE_SUBNET = "false" ]; then - vars="EC2_USER_HOME STAGE APP_NAME USE_PRIVATE_SUBNET AWS_REGION DATA_BUCKET_NAME HOST_DOMAIN SUBDOMAIN ADMIN_EMAIL APOC_VERSION GDS_VERSION NEO4J_URI NEO4J_ENCRYPTION" - else - echo "ERROR: USE_PRIVATE_SUBNET must be set to 'true' or 'false'" - exit 1 - fi + vars="EC2_USER_HOME STAGE APP_NAME AWS_REGION DATA_BUCKET_NAME PRIVATE_IP ADMIN_EMAIL APOC_VERSION GDS_VERSION NEO4J_URI NEO4J_ENCRYPTION" for var in $vars; do if [ -z "${!var}" ]; then echo "$var is not set" @@ -220,49 +188,10 @@ Resources: echo "$var is set" fi done - if [ -z "$USE_PRIVATE_SUBNET" ]; then - msg="ERROR: USE_PRIVATE_SUBNET must be set" - cfn_signal 1 "$msg" - exit 1 - else - echo "Found USE_PRIVATE_SUBNET=$USE_PRIVATE_SUBNET" - fi install_apoc_plugin() { echo "INFO: Installing APOC" mv $NEO4J_HOME/labs/apoc-*-core.jar $NEO4J_HOME/plugins } - # todo test public configuration - build_neo4j_public_config() { - if [ -z "$SUBDOMAIN" ] || [ -z "$HOST_DOMAIN" ]; then - echo "ERROR: SUBDOMAIN and HOST_DOMAIN must be set" - exit 1 - fi - sed -i "s/#initial.dbms.default_database=neo4j/initial.dbms.default_database=${!NEO4J_DATABASE_NAME}/g" $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.default_listen_address=0.0.0.0/server.default_listen_address=0.0.0.0/g' $NEO4J_HOME/conf/neo4j.conf - sed -i "s/#server.default_advertised_address=localhost/server.default_advertised_address=$SUBDOMAIN.$HOST_DOMAIN/g" $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.bolt.tls_level=DISABLED/server.bolt.tls_level=REQUIRED/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.bolt.listen_address=:7687/server.bolt.listen_address=:7687/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.bolt.advertised_address=:7687/server.bolt.advertised_address=:7687/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/server.http.enabled=true/#server.http.enabled=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/server.https.enabled=false/server.https.enabled=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.https.listen_address=:7473/server.https.listen_address=:7473/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#server.https.advertised_address=:7473/server.https.advertised_address=:7473/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.bolt.enabled=true/dbms.ssl.policy.bolt.enabled=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.bolt.base_directory=certificates\/bolt/dbms.ssl.policy.bolt.base_directory=\/opt\/neo4j\/certificates\/bolt/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.bolt.private_key=private.key/dbms.ssl.policy.bolt.private_key=\/opt\/neo4j\/certificates\/bolt\/neo4j.key/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.bolt.public_certificate=public.crt/dbms.ssl.policy.bolt.public_certificate=\/opt\/neo4j\/certificates\/bolt\/neo4j.cert/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.bolt.client_auth=NONE/dbms.ssl.policy.bolt.client_auth=NONE/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.https.enabled=true/dbms.ssl.policy.https.enabled=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.https.base_directory=certificates\/https/dbms.ssl.policy.https.base_directory=\/opt\/neo4j\/certificates\/https/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.https.private_key=private.key/dbms.ssl.policy.https.private_key=\/opt\/neo4j\/certificates\/https\/neo4j.key/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.https.public_certificate=public.crt/dbms.ssl.policy.https.public_certificate=\/opt\/neo4j\/certificates\/https\/neo4j.cert/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.ssl.policy.https.client_auth=NONE/dbms.ssl.policy.https.client_auth=NONE/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.logs.http.enabled=true/dbms.logs.http.enabled=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.security.allow_csv_import_from_file_urls=true/dbms.security.allow_csv_import_from_file_urls=true/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.security.procedures.unrestricted=my.extensions.example,my.procedures.*/dbms.security.procedures.unrestricted=apoc.*,gds.*/g' $NEO4J_HOME/conf/neo4j.conf - sed -i 's/#dbms.security.procedures.allowlist=apoc.coll.*,apoc.load.*,gds.*/dbms.security.procedures.allowlist=apoc.*,gds.*/g' $NEO4J_HOME/conf/neo4j.conf - $NEO4J_HOME/bin/neo4j-admin server memory-recommendation | grep -v ^# >> $NEO4J_HOME/conf/neo4j.conf - } build_neo4j_private_config() { sed -i "s/#initial.dbms.default_database=neo4j/initial.dbms.default_database=${!NEO4J_DATABASE_NAME}/g" $NEO4J_HOME/conf/neo4j.conf sed -i 's/#server.default_listen_address=0.0.0.0/server.default_listen_address=0.0.0.0/g' $NEO4J_HOME/conf/neo4j.conf @@ -315,16 +244,8 @@ Resources: echo "INFO: Neo4j systemd service file has been created at $SERVICE_FILE" install_apoc_plugin cp $NEO4J_HOME/conf/neo4j.conf $NEO4J_HOME/conf/neo4j.conf.bak - if [ "$USE_PRIVATE_SUBNET" = "false" ]; then - echo "INFO: Building public subnet configuration for Neo4j" - build_neo4j_public_config; - elif [ "$USE_PRIVATE_SUBNET" = "true" ]; then - echo "INFO: Building private subnet configuration for Neo4j" - build_neo4j_private_config; - else - echo "ERROR: USE_PRIVATE_SUBNET must be set to 'true' or 'false'" - exit 1 - fi + echo "INFO: Building private subnet configuration for Neo4j" + build_neo4j_private_config; systemctl daemon-reload systemctl enable neo4j systemctl start neo4j @@ -354,6 +275,7 @@ Resources: Tags: - Key: Name Value: !Sub ${AppName}-${Stage} + Neo4jDatabaseInstanceRole: Type: AWS::IAM::Role Properties: @@ -420,19 +342,13 @@ Resources: - ssm:PutParameter Resource: - !Sub arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/* + Neo4jDatabaseInstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Roles: - !Ref Neo4jDatabaseInstanceRole - - # # TODO move to database init script - # UsePublicSubnetConditionNeo4jDatabaseElasticIpAssociation: - # Type: AWS::EC2::EIPAssociation - # Condition: UsePublicSubnet - # Properties: - # InstanceId: !Ref Neo4jDatabaseInstance - # AllocationId: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseEndpointAllocationId}}' + Neo4jDatabaseInstanceIdParameter: Type: AWS::SSM::Parameter Properties: @@ -440,24 +356,15 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId Description: Instance ID for gfe-db Neo4j database instance Value: !Ref Neo4jDatabaseInstance + Neo4jUriParameter: Type: AWS::SSM::Parameter Properties: Type: String Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jUri Description: Connection URI for Neo4j - Value: !If - - UsePrivateSubnet - - !Sub bolt://${Neo4jDatabaseInstance.PrivateIp}:7687 - - !Sub neo4j+s://${Subdomain}.${HostDomain}:7687 - UsePublicSubnetConditionNeo4jBrowserUrlParameter: - Type: AWS::SSM::Parameter - Condition: UsePublicSubnet - Properties: - Type: String - Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jBrowserUrl - Description: Connection URI for Neo4j - Value: !Sub https://${Subdomain}.${HostDomain}:7473/browser/ + Value: !Sub bolt://${Neo4jDatabaseInstance.PrivateIp}:7687 + Neo4jPrivateIpParameter: Type: AWS::SSM::Parameter Properties: @@ -465,14 +372,7 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jPrivateIp Description: Private IP for Neo4j Value: !Sub ${Neo4jDatabaseInstance.PrivateIp} - Neo4jPublicIpParameter: - Type: AWS::SSM::Parameter - Condition: UsePublicSubnet - Properties: - Type: String - Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jPublicIp - Description: Public IP for Neo4j - Value: !Sub ${Neo4jDatabaseInstance.PublicIp} + Neo4jBackupDocument: Type: AWS::SSM::Document Properties: @@ -500,6 +400,7 @@ Resources: - directory=$(pwd) - export PATH=$PATH:$directory - cd {{ workingDirectory }} && {{ commandLine }} + Neo4jBackupDocumentNameParameter: Type: AWS::SSM::Parameter Properties: @@ -507,6 +408,7 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jBackupDocumentName Description: Name of SSM document for backing up Neo4j Value: !Ref Neo4jBackupDocument + CurrentBackupS3PathParameter: Type: AWS::SSM::Parameter Properties: @@ -514,6 +416,7 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/CurrentBackupS3Path Description: !Sub 'S3 path to the most recent backup for ${Stage}-${AppName}' Value: 'null' + Neo4jBackupMaintenanceWindow: Type: AWS::SSM::MaintenanceWindow Properties: @@ -524,6 +427,7 @@ Resources: Name: BackupNeo4jMaintenanceWindow Schedule: cron(0 0 ? * * *) ScheduleTimezone: US/Central + Neo4jBackupMaintenanceWindowIdParameter: Type: AWS::SSM::Parameter Properties: @@ -531,6 +435,7 @@ Resources: Name: !Sub /${AppName}/${Stage}/${AWS::Region}/Neo4jBackupMaintenanceWindowId Description: ID for Neo4jBackupMaintenanceWindow Value: !Ref Neo4jBackupMaintenanceWindow + Neo4jBackupMaintenanceWindowTask: Type: AWS::SSM::MaintenanceWindowTask Properties: @@ -546,6 +451,7 @@ Resources: - Key: WindowTargetIds Values: - !Ref Neo4jBackupMaintenanceWindowTarget + Neo4jBackupMaintenanceWindowTarget: Type: AWS::SSM::MaintenanceWindowTarget Properties: @@ -555,6 +461,7 @@ Resources: Values: - !Ref Neo4jDatabaseInstance WindowId: !Ref Neo4jBackupMaintenanceWindow + DatabaseSyncScriptsDocument: Type: AWS::SSM::Document Properties: @@ -609,6 +516,7 @@ Resources: - "" workingDirectory: "{{ workingDirectory }}" timeoutSeconds: "{{ executionTimeout }}" + DatabaseSyncScriptsDocumentNameParameter: Type: AWS::SSM::Parameter Properties: diff --git a/gfe-db/infrastructure/Makefile b/gfe-db/infrastructure/Makefile index 5cc45773..4678f15f 100644 --- a/gfe-db/infrastructure/Makefile +++ b/gfe-db/infrastructure/Makefile @@ -38,32 +38,9 @@ deploy: @echo "\033[0;34m$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deploying ${SERVICE} services\033[0m" 2>&1 | tee -a $${CFN_LOG_PATH} $(MAKE) service.key-pair.create $(MAKE) service.deploy -ifeq ($(USE_PRIVATE_SUBNET),true) $(MAKE) service.access-services.deploy -else ifeq ($(USE_PRIVATE_SUBNET),false) - $(MAKE) service.deploy.update-dns -endif $(MAKE) service.monitoring.create-subscriptions -service.deploy.update-dns: - @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Updating DNS records" 2>&1 | tee -a $${CFN_LOG_PATH} - @config_path=${INFRA_DIR}/change-batch.json && \ - elastic_ip=$$(aws ssm get-parameters \ - --names "/$${APP_NAME}/$${STAGE}/$${AWS_REGION}/Neo4jDatabaseEndpoint" \ - --with-decryption \ - --query "Parameters[0].Value" \ - --output text) && \ - sed -e "s//${SUBDOMAIN}.${HOST_DOMAIN}./g" -e "s//$$elastic_ip/g" $$config_path > $$config_path.tmp && \ - echo "Updating DNS records with:" && \ - cat $$config_path.tmp | jq -r && \ - res=$$(aws route53 change-resource-record-sets --hosted-zone-id $${HOSTED_ZONE_ID} --change-batch file://$$config_path.tmp) && \ - echo && \ - echo "Waiting for update to complete:" && \ - echo $$res | jq -r && \ - aws route53 wait resource-record-sets-changed --id $$(echo $$res | jq -r '.ChangeInfo.Id') && \ - echo && \ - echo "Successfully updated DNS records" - service.key-pair.create: ##=> Checks if the key pair already exists and creates it if it does not @echo "\033[0;34m$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Creating EC2 key pair\033[0m" 2>&1 | tee -a $${CFN_LOG_PATH} @key_pair="$$(aws ec2 describe-key-pairs --key-name ${EC2_KEY_PAIR_NAME} | jq '.KeyPairs[0].KeyName' || true)" && \ @@ -90,7 +67,6 @@ service.deploy: Stage="$${STAGE}" \ AppName="$${APP_NAME}" \ createVpc="$${CREATE_VPC}" \ - usePrivateSubnet="$${USE_PRIVATE_SUBNET}" \ deployNatGateway="$${DEPLOY_NAT_GATEWAY}" \ AdminEmail="$${ADMIN_EMAIL}" \ DataBucketName="$$DATA_BUCKET_NAME" \ @@ -163,7 +139,6 @@ ifeq ($(data),true) @echo "$$(gdate -u +'%Y-%m-%d %H:%M:%S.%3N') - Deleting data in $${DATA_BUCKET_NAME}" 2>&1 | tee -a $${CFN_LOG_PATH} @aws s3 rm --recursive s3://$${DATA_BUCKET_NAME}/ endif -ifeq ($(USE_PRIVATE_SUBNET),true) ifeq ($(DEPLOY_BASTION_SERVER),true) $(MAKE) service.access-services.bastion-server.delete endif @@ -173,7 +148,7 @@ endif ifeq ($(DEPLOY_VPC_ENDPOINTS),true) $(MAKE) service.access-services.vpc-endpoints.delete endif -endif +# endif $(MAKE) service.delete $(MAKE) service.key-pair.delete $(MAKE) service.parameters.delete diff --git a/gfe-db/infrastructure/change-batch.json b/gfe-db/infrastructure/change-batch.json deleted file mode 100644 index 3ea3589d..00000000 --- a/gfe-db/infrastructure/change-batch.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "Changes": [ - { - "Action": "UPSERT", - "ResourceRecordSet": { - "Name": "", - "Type": "A", - "TTL": 300, - "ResourceRecords": [ - { - "Value": "" - } - ] - } - } - ] -} \ No newline at end of file diff --git a/gfe-db/infrastructure/template.yaml b/gfe-db/infrastructure/template.yaml index 52201dae..a0800ee1 100644 --- a/gfe-db/infrastructure/template.yaml +++ b/gfe-db/infrastructure/template.yaml @@ -12,17 +12,6 @@ Parameters: AllowedValues: - "true" - "false" - usePrivateSubnet: - Type: String - AllowedValues: - - "true" - - "false" - deployNatGateway: - Type: String - AllowedValues: - - "" - - "true" - - "false" AdminEmail: Type: String DataBucketName: @@ -39,19 +28,9 @@ Parameters: GitHubPersonalAccessToken: Type: String NoEcho: true - + Conditions: - # Important: All stacks (infra, database, pipeline) should use the same conditions - CreateVpc: !Equals [!Ref createVpc, 'true'] - # CreateVpcPublic: !And [!Equals [!Ref createVpc, 'true'], !Equals [!Ref usePrivateSubnet, 'false']] - CreateVpcPrivate: !And [!Equals [!Ref createVpc, 'true'], !Equals [!Ref usePrivateSubnet, 'true']] - # UseExternalVpc: !Equals [!Ref createVpc, 'false'] - # UseExternalVpcPublic: !And [!Equals [!Ref createVpc, 'false'], !Equals [!Ref usePrivateSubnet, 'false']] - # UseExternalVpcPrivate: !And [!Equals [!Ref createVpc, 'false'], !Equals [!Ref usePrivateSubnet, 'true']] - UsePrivateSubnet: !Equals [!Ref usePrivateSubnet, 'true'] - # UsePrivateSubnetDeployNatGateway: !And [!Equals [!Ref usePrivateSubnet, 'true'], !Equals [!Ref deployNatGateway, 'true']] - UsePublicSubnet: !Equals [!Ref usePrivateSubnet, 'false'] - # UseExternalNatGateway: !Equals [!Ref deployNatGateway, 'false'] + CreateVpc: !Equals [!Ref createVpc, "true"] Mappings: # AvailabilityZoneMap defines availability zones where an m5d.xlarge instance is available (used for Neo4j server). @@ -72,12 +51,11 @@ Mappings: AvailabilityZone: eu-west-3a Resources: - StageParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Stage' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/Stage" Description: "Stage of production" Value: !Ref Stage @@ -85,7 +63,7 @@ Resources: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/AppName' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/AppName" Description: "Name of application" Value: !Ref AppName @@ -93,7 +71,7 @@ Resources: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/CreateVpc' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/CreateVpc" Description: "True if stack creates a VPC, false if stack uses an existing VPC" Value: !Ref createVpc @@ -106,63 +84,43 @@ Resources: EnableDnsSupport: true Tags: - Key: Name - Value: !Sub '${Stage}-${AppName}-${AWS::Region}-vpc' - + Value: !Sub "${Stage}-${AppName}-${AWS::Region}-vpc" + CreateVpcConditionPublicSubnet: Type: AWS::EC2::Subnet Condition: CreateVpc Properties: CidrBlock: 10.0.0.0/24 VpcId: !Ref CreateVpcConditionVpc - AvailabilityZone: !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] + AvailabilityZone: + !FindInMap [AvailabilityZoneMap, !Ref AWS::Region, AvailabilityZone] MapPublicIpOnLaunch: true - - CreateVpcPrivateConditionPrivateSubnet: + + CreateVpcConditionPrivateSubnet: Type: AWS::EC2::Subnet - Condition: CreateVpcPrivate + Condition: CreateVpc Properties: CidrBlock: 10.0.1.0/24 VpcId: !Ref CreateVpcConditionVpc AvailabilityZone: !GetAtt CreateVpcConditionPublicSubnet.AvailabilityZone - + CreateVpcConditionInternetGateway: Type: AWS::EC2::InternetGateway Condition: CreateVpc - + CreateVpcConditionPublicRouteTable: Type: AWS::EC2::RouteTable Condition: CreateVpc Properties: VpcId: !Ref CreateVpcConditionVpc - - # # Required for NAT Gateway - # UsePrivateSubnetDeployNatGatewayConditionPrivateRouteTable: - # Type: AWS::EC2::RouteTable - # Condition: UsePrivateSubnetDeployNatGateway - # Properties: - # VpcId: !If - # - CreateVpc - # - !Ref CreateVpcConditionVpc - # - !Ref VpcId - - # # TODO Move to NAT Gateway stack - # # Required for NAT Gateway - # UsePrivateSubnetDeployNatGatewayConditionPrivateRouteTableParameter: - # Type: AWS::SSM::Parameter - # Condition: UsePrivateSubnetDeployNatGateway - # Properties: - # Type: String - # Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PrivateRouteTableId' - # Description: !Sub "ID of the private route table for ${AppName}" - # Value: !Ref UsePrivateSubnetDeployNatGatewayConditionPrivateRouteTable - + CreateVpcConditionVpcGatewayAttachment: Type: AWS::EC2::VPCGatewayAttachment Condition: CreateVpc Properties: VpcId: !Ref CreateVpcConditionVpc InternetGatewayId: !Ref CreateVpcConditionInternetGateway - + CreateVpcConditionPublicRoute: Type: AWS::EC2::Route Condition: CreateVpc @@ -172,7 +130,7 @@ Resources: RouteTableId: !Ref CreateVpcConditionPublicRouteTable DestinationCidrBlock: 0.0.0.0/0 GatewayId: !Ref CreateVpcConditionInternetGateway - + CreateVpcConditionPublicSubnetRouteTableAssociation: Type: AWS::EC2::SubnetRouteTableAssociation Condition: CreateVpc @@ -180,68 +138,47 @@ Resources: RouteTableId: !Ref CreateVpcConditionPublicRouteTable SubnetId: !Ref CreateVpcConditionPublicSubnet - # # Required for NAT Gateway - # UsePrivateSubnetDeployNatGatewayConditionPrivateSubnetRouteTableAssociation: - # Type: AWS::EC2::SubnetRouteTableAssociation - # Condition: UsePrivateSubnetDeployNatGateway - # Properties: - # RouteTableId: !Ref UsePrivateSubnetDeployNatGatewayConditionPrivateRouteTable - # SubnetId: !If - # - CreateVpcPrivate - # - !Ref CreateVpcPrivateConditionPrivateSubnet - # - !Ref PrivateSubnetId - VpcIdParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/VpcId' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/VpcId" Description: !Sub "Name of VPC network for ${AppName}" - Value: !If + Value: !If - CreateVpc - !Ref CreateVpcConditionVpc - !Ref VpcId - + PublicSubnetIdParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PublicSubnetId' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/PublicSubnetId" Description: !Sub "Public Subnet for the ${AppName} Neo4j server" Value: !If - CreateVpc - !Ref CreateVpcConditionPublicSubnet - !Ref PublicSubnetId - - UsePrivateSubnetConditionPrivateSubnetIdParameter: + + PrivateSubnetIdParameter: Type: AWS::SSM::Parameter - Condition: UsePrivateSubnet Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId" Description: !Sub "Private Subnet for the ${AppName} Neo4j server" Value: !If - - CreateVpcPrivate - - !Ref CreateVpcPrivateConditionPrivateSubnet + - CreateVpc + - !Ref CreateVpcConditionPrivateSubnet - !Ref PrivateSubnetId - + Neo4jSubnetIdParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jSubnetId' - Description: !Sub "Public Subnet for the ${AppName} Neo4j server" - Value: !If - - CreateVpc - - !If - - UsePrivateSubnet - - !Ref CreateVpcPrivateConditionPrivateSubnet - - !Ref CreateVpcConditionPublicSubnet - - !If - - UsePrivateSubnet - - !Ref PrivateSubnetId - - !Ref PublicSubnetId - + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/Neo4jSubnetId" + Description: !Sub "Subnet for the ${AppName} Neo4j server" + Value: !GetAtt PrivateSubnetIdParameter.Value + DataBucket: Type: AWS::S3::Bucket Properties: @@ -251,35 +188,35 @@ Resources: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/DataBucketName' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/DataBucketName" Description: "Name of gfe-db data bucket" Value: !Ref DataBucket - + DataBucketArnParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/DataBucketArn' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/DataBucketArn" Description: "ARN of gfe-db data bucket" Value: !GetAtt DataBucket.Arn - + DataBucketRegionalDomainNameParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/DataBucketRegionalDomainName' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/DataBucketRegionalDomainName" Description: "S3 Bucket Regional Domain name for application bucket" Value: !GetAtt DataBucket.RegionalDomainName BuildServerSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: - GroupName: !Sub '${Stage}-${AppName}-build-server-sg' - GroupDescription: !Sub 'Security group for the ${Stage}-${AppName} build server' + GroupName: !Sub "${Stage}-${AppName}-build-server-sg" + GroupDescription: !Sub "Security group for the ${Stage}-${AppName} build server" VpcId: !If - CreateVpc - !Ref CreateVpcConditionVpc - - !Ref VpcId + - !Ref VpcId # SecurityGroupIngress: # Uncomment to allow SSH access to build instance # - CidrIp: 0.0.0.0/0 # FromPort: 22 @@ -287,20 +224,20 @@ Resources: # ToPort: 22 Tags: - Key: Name - Value: !Sub '${Stage}-${AppName}-build-server-sg' - + Value: !Sub "${Stage}-${AppName}-build-server-sg" + BuildServerSecurityGroupIdParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId" Description: "ID of the security group for the build server" Value: !Ref BuildServerSecurityGroup Neo4jDatabaseSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: - GroupName: !Sub '${Stage}-${AppName}-neo4j-sg' + GroupName: !Sub "${Stage}-${AppName}-neo4j-sg" GroupDescription: Security group for the GFE database VpcId: !If - CreateVpc @@ -339,13 +276,13 @@ Resources: Description: Update IP to use MyIP Tags: - Key: Name - Value: !Sub '${Stage}-${AppName}-neo4j-sg' + Value: !Sub "${Stage}-${AppName}-neo4j-sg" Neo4jDatabaseSecurityGroupIdParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseSecurityGroupId' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseSecurityGroupId" Description: "Name of the Neo4jDatabaseSecurityGroup" Value: !Ref Neo4jDatabaseSecurityGroup @@ -358,33 +295,6 @@ Resources: Description: GitHub Personal Access Token for repository access SecretString: !Sub '{"personal_access_token":"${GitHubPersonalAccessToken}"}' - UsePublicSubnetConditionNeo4jDatabaseElasticIp: - Type: AWS::EC2::EIP - Condition: UsePublicSubnet - Properties: - Domain: vpc - Tags: - - Key: Name - Value: !Sub '${Stage}-${AppName}-neo4j' - - UsePublicSubnetConditionNeo4jDatabaseEndpointParameter: - Type: AWS::SSM::Parameter - Condition: UsePublicSubnet - Properties: - Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseEndpoint' - Description: !Sub 'Endpoint for ${AppName} Neo4j server' - Value: !Ref UsePublicSubnetConditionNeo4jDatabaseElasticIp - - UsePublicSubnetConditionNeo4jDatabaseEndpointAllocationIdParameter: - Type: AWS::SSM::Parameter - Condition: UsePublicSubnet - Properties: - Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseEndpointAllocationId' - Description: !Sub 'AllocationId for ${AppName} Neo4j server static IP' - Value: !GetAtt UsePublicSubnetConditionNeo4jDatabaseElasticIp.AllocationId - DataPipelineErrorsTopicPolicy: Type: AWS::SNS::TopicPolicy Properties: @@ -414,8 +324,8 @@ Resources: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn' - Description: !Sub 'ARN for ${AppName} Data Pipeline Errors SNS topic' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn" + Description: !Sub "ARN for ${AppName} Data Pipeline Errors SNS topic" Value: !Ref DataPipelineErrorsTopic DataPipelineExecutionTopicPolicy: @@ -442,11 +352,11 @@ Resources: Subscription: - Endpoint: !Ref AdminEmail Protocol: email - + DataPipelineExecutionTopicArnParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn' - Description: !Sub 'ARN for ${AppName} Data Pipeline Errors SNS topic' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn" + Description: !Sub "ARN for ${AppName} Data Pipeline Errors SNS topic" Value: !Ref DataPipelineExecutionTopic diff --git a/gfe-db/pipeline/Makefile b/gfe-db/pipeline/Makefile index 17572b7e..f395d66b 100644 --- a/gfe-db/pipeline/Makefile +++ b/gfe-db/pipeline/Makefile @@ -32,7 +32,6 @@ service.deploy: Stage="$${STAGE}" \ AppName="$${APP_NAME}" \ createVpc="$${CREATE_VPC}" \ - usePrivateSubnet="$${USE_PRIVATE_SUBNET}" \ GitHubRepositoryOwner="${GITHUB_REPOSITORY_OWNER}" \ GitHubRepositoryName="${GITHUB_REPOSITORY_NAME}" \ InvokePipelineFunctionName="$${STAGE}"-"$${APP_NAME}"-"$$(cat functions/environment.json | jq -r '.Functions.InvokePipeline.FunctionConfiguration.FunctionName')" \ diff --git a/gfe-db/pipeline/functions/execute_validation_queries/app.py b/gfe-db/pipeline/functions/execute_validation_queries/app.py index 7b3ce7ad..f12a7fca 100644 --- a/gfe-db/pipeline/functions/execute_validation_queries/app.py +++ b/gfe-db/pipeline/functions/execute_validation_queries/app.py @@ -1,7 +1,5 @@ """ This function executes validation queries against the Neo4j database and returns the results. -If USE_PRIVATE_SUBNET is true, this function will run inside a VPC and private subnet. -If USE_PRIVATE_SUBNET is false, this function will run outside a VPC and in a public subnet. """ import os import logging diff --git a/gfe-db/pipeline/template.yaml b/gfe-db/pipeline/template.yaml index 8ca723b6..e63031a5 100644 --- a/gfe-db/pipeline/template.yaml +++ b/gfe-db/pipeline/template.yaml @@ -1,7 +1,6 @@ AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: Deploys IAM, ECR repositories, AWS Batch Jobs and a State Machine for the gfe-db update pipeline - Parameters: Stage: Type: String @@ -9,17 +8,7 @@ Parameters: AppName: Type: String Description: Application name - # createVpc: - # Type: String - # AllowedValues: - # - "true" - # - "false" - usePrivateSubnet: - Type: String - AllowedValues: - - "true" - - "false" - GitHubRepositoryOwner: + GitHubRepositoryOwner: Type: String GitHubRepositoryName: Type: String @@ -78,42 +67,9 @@ Parameters: Type: String FeatureServiceUrl: Type: String - Ec2KeyPairName: - Type: String - -Conditions: - # Important: All stacks (infra, database, pipeline) should use the same conditions - # CreateVpc: !Equals [!Ref createVpc, 'true'] - # UseExternalVpc: !Equals [!Ref createVpc, 'false'] - # UseExternalVpcPublic: !And [!Equals [!Ref createVpc, 'false'], !Equals [!Ref usePrivateSubnet, 'false']] - # UseExternalVpcPrivate: !And [!Equals [!Ref createVpc, 'false'], !Equals [!Ref usePrivateSubnet, 'true']] - UsePrivateSubnet: !Equals [!Ref usePrivateSubnet, 'true'] - # UsePublicSubnet: !Equals [!Ref usePrivateSubnet, 'false'] - + # Ec2KeyPairName: + # Type: String Resources: - # BuildServerSG: - # Type: AWS::EC2::SecurityGroup - # Properties: - # GroupName: !Sub '${Stage}-${AppName}-build-server-sg' - # GroupDescription: !Sub 'Security group for the ${Stage}-${AppName} build server' - # VpcId: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/VpcId}}' - # # SecurityGroupIngress: # Uncomment to allow SSH access to build instance - # # - CidrIp: 0.0.0.0/0 - # # FromPort: 22 - # # IpProtocol: tcp - # # ToPort: 22 - # Tags: - # - Key: Name - # Value: !Sub '${Stage}-${AppName}-build-server-sg' - - # BuildServerSecurityGroupIdParameter: - # Type: AWS::SSM::Parameter - # Properties: - # Type: String - # Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId' - # Description: "ID of the security group for the build server" - # Value: !Ref BuildServerSG - BatchServiceRole: Type: AWS::IAM::Role Properties: @@ -126,11 +82,13 @@ Resources: Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole + BatchWorkerInstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Roles: - !Ref BatchWorkerInstanceRole + BatchWorkerInstanceRole: Type: AWS::IAM::Role Properties: @@ -147,8 +105,8 @@ Resources: - arn:aws:iam::aws:policy/AmazonS3FullAccess - arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy Policies: - - PolicyName: !Sub '${Stage}-${AppName}-ECS-CloudWatchLogs' - PolicyDocument: + - PolicyName: !Sub "${Stage}-${AppName}-ECS-CloudWatchLogs" + PolicyDocument: Version: "2012-10-17" Statement: - Action: @@ -158,12 +116,12 @@ Resources: - "logs:DescribeLogStreams" Effect: "Allow" Resource: "arn:aws:logs:*:*:*" - - PolicyName: !Sub '${Stage}-${AppName}-BuildServiceS3ReadWritePolicy' + - PolicyName: !Sub "${Stage}-${AppName}-BuildServiceS3ReadWritePolicy" PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "s3:GetObject" - "s3:ListBucket" - "s3:GetBucketLocation" @@ -174,14 +132,14 @@ Resources: - "s3:PutLifecycleConfiguration" - "s3:DeleteObject" Resource: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}' - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*' - - PolicyName: !Sub '${Stage}-${AppName}-SecretsPolicy' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}" + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*" + - PolicyName: !Sub "${Stage}-${AppName}-SecretsPolicy" PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - "secretsmanager:GetResourcePolicy" @@ -190,29 +148,16 @@ Resources: - "secretsmanager:ListSecretVersionIds" - "secretsmanager:ListSecrets" # TODO tighten permissions - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' - # - PolicyName: !Sub '${Stage}-${AppName}-SQSPolicy' - # PolicyDocument: - # Version: '2012-10-17' - # Statement: - # - Effect: Allow - # Action: - # - "sqs:GetQueueAttributes" - # - "sqs:SendMessageBatch" - # - "sqs:SendMessage" - # - "sqs:ReceiveMessage" - # - "sqs:DeleteMessage" - # Resource: - # - !GetAtt FailedAllelesQueue.Arn + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" BuildJobDefinition: Type: AWS::Batch::JobDefinition Properties: Type: container - JobDefinitionName: !Sub '${Stage}-${AppName}-BuildJobDefinition' + JobDefinitionName: !Sub "${Stage}-${AppName}-BuildJobDefinition" ContainerProperties: - Image: !Sub '${ECRBaseUri}/${BuildServiceRepositoryName}:latest' + Image: !Sub "${ECRBaseUri}/${BuildServiceRepositoryName}:latest" Vcpus: 8 # TODO: Memory param is deprecated, move to ResourceRequirements Memory: 8000 # Keep this around half the available RAM to avoid getting stuck in RUNNABLE status @@ -224,11 +169,9 @@ Resources: - run.sh Environment: - Name: GFE_BUCKET - Value: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}' + Value: !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}" - Name: FEATURE_SERVICE_URL Value: !Ref FeatureServiceUrl - # - Name: FAILED_ALLELES_QUEUE - # Value: !Ref FailedAllelesQueue - Name: AWS_REGION Value: !Ref AWS::Region RetryStrategy: @@ -237,7 +180,7 @@ Resources: BuildJobQueue: Type: AWS::Batch::JobQueue Properties: - JobQueueName: !Sub '${Stage}-${AppName}-BuildJobQueue' + JobQueueName: !Sub "${Stage}-${AppName}-BuildJobQueue" Priority: 1 ComputeEnvironmentOrder: - Order: 1 @@ -251,36 +194,36 @@ Resources: Type: EC2 AllocationStrategy: BEST_FIT_PROGRESSIVE # # Testing only, comment before production deployment - Ec2KeyPair: !Ref Ec2KeyPairName + # Ec2KeyPair: !Ref Ec2KeyPairName MinvCpus: 0 DesiredvCpus: 0 MaxvCpus: 32 InstanceTypes: - c5d.2xlarge Subnets: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId}}" SecurityGroupIds: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId}}" InstanceRole: !Ref BatchWorkerInstanceProfile LaunchTemplate: LaunchTemplateId: !Ref BuildLaunchTemplate Tags: { "Name": "gfe-db-build-worker" } ServiceRole: !GetAtt BatchServiceRole.Arn - + BuildLaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: DisableApiTermination: false InstanceInitiatedShutdownBehavior: stop - ImageId: '{{resolve:ssm:/aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id}}' + ImageId: "{{resolve:ssm:/aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id}}" InstanceType: c5d.xlarge IamInstanceProfile: Name: !Ref BatchWorkerInstanceProfile Monitoring: Enabled: true SecurityGroupIds: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/BuildServerSecurityGroupId}}" UserData: Fn::Base64: | MIME-Version: 1.0 @@ -304,10 +247,10 @@ Resources: --==MYBOUNDARY== TagSpecifications: - ResourceType: launch-template - Tags: + Tags: - Key: Name - Value: 'gfe-db-build-worker' - + Value: "gfe-db-build-worker" + BatchTaskExecutionRole: Type: AWS::IAM::Role Properties: @@ -320,8 +263,8 @@ Resources: Service: ecs-tasks.amazonaws.com Action: sts:AssumeRole Policies: - - PolicyName: !Sub '${Stage}-${AppName}-ecsTaskExecutionRolePolicy' - PolicyDocument: + - PolicyName: !Sub "${Stage}-${AppName}-ecsTaskExecutionRolePolicy" + PolicyDocument: Version: "2012-10-17" Statement: - Effect: Allow @@ -333,25 +276,25 @@ Resources: - "logs:CreateLogStream" - "logs:PutLogEvents" Resource: "*" - - PolicyName: !Sub '${Stage}-${AppName}-BatchTaskExecutionPolicy' + - PolicyName: !Sub "${Stage}-${AppName}-BatchTaskExecutionPolicy" PolicyDocument: Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" - Effect: "Allow" - Action: + Action: - "secretsmanager:GetResourcePolicy" - "secretsmanager:GetSecretValue" - "secretsmanager:DescribeSecret" - "secretsmanager:ListSecretVersionIds" - "secretsmanager:ListSecrets" - Resource: - - !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:/${AppName}/${Stage}/${AWS::Region}/*' - + Resource: + - !Sub "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:/${AppName}/${Stage}/${AWS::Region}/*" + BuildServiceRepository: Type: AWS::ECR::Repository Properties: @@ -363,7 +306,7 @@ Resources: Effect: "Allow" Principal: AWS: - - !Sub 'arn:aws:iam::${AWS::AccountId}:role/${BatchWorkerInstanceRole}' + - !Sub "arn:aws:iam::${AWS::AccountId}:role/${BatchWorkerInstanceRole}" Action: - "ecr:GetDownloadUrlForLayer" - "ecr:BatchGetImage" @@ -372,20 +315,20 @@ Resources: - "ecr:InitiateLayerUpload" - "ecr:UploadLayerPart" - "ecr:CompleteLayerUpload" - + BuildServiceRepositoryNameParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/BuildServiceRepositoryName' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/BuildServiceRepositoryName" Description: "Name of gfe-db build service repository" Value: !Ref BuildServiceRepositoryName - + InvokePipelineFunction: Type: AWS::Serverless::Function Properties: FunctionName: !Ref InvokePipelineFunctionName - Description: !Sub '${AppName} update pipeline trigger: checks for new IMGT/HLA releases and starts the loading process' + Description: !Sub "${AppName} update pipeline trigger: checks for new IMGT/HLA releases and starts the loading process" CodeUri: functions/invoke_pipeline/ Handler: app.lambda_handler Runtime: python3.11 @@ -395,10 +338,10 @@ Resources: - x86_64 Environment: Variables: - GITHUB_PERSONAL_ACCESS_TOKEN: !Sub '{{resolve:secretsmanager:${AppName}-${Stage}-GitHubPersonalAccessToken:SecretString:personal_access_token:AWSCURRENT}}' + GITHUB_PERSONAL_ACCESS_TOKEN: !Sub "{{resolve:secretsmanager:${AppName}-${Stage}-GitHubPersonalAccessToken:SecretString:personal_access_token:AWSCURRENT}}" GITHUB_REPOSITORY_OWNER: !Ref GitHubRepositoryOwner GITHUB_REPOSITORY_NAME: !Ref GitHubRepositoryName - DATA_BUCKET_NAME: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}' + DATA_BUCKET_NAME: !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}" UPDATE_PIPELINE_STATE_MACHINE_ARN: !GetAtt UpdatePipelineStateMachine.Arn PIPELINE_STATE_PATH: !Ref PipelineStatePath PIPELINE_PARAMS_PATH: !Ref PipelineParamsPath @@ -418,21 +361,21 @@ Resources: - "s3:PutObject" - "s3:ListBucket" Resource: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}' - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}" + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*" - Sid: "AllowSFNStartExecution" Effect: "Allow" Action: - "states:StartExecution" Resource: !GetAtt UpdatePipelineStateMachine.Arn - + InvokePipelineFunctionAlarm: Type: AWS::CloudWatch::Alarm Properties: - AlarmDescription: !Sub 'Alarm for ${InvokePipelineFunctionName} function errors' + AlarmDescription: !Sub "Alarm for ${InvokePipelineFunctionName} function errors" ActionsEnabled: true AlarmActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}" MetricName: Errors Namespace: AWS/Lambda Statistic: Sum @@ -443,7 +386,7 @@ Resources: Dimensions: - Name: FunctionName Value: !Ref InvokePipelineFunctionName - + ExecuteValidationQueriesFunction: Type: AWS::Serverless::Function Properties: @@ -460,23 +403,20 @@ Resources: Variables: STAGE: !Ref Stage APP_NAME: !Ref AppName - VpcConfig: !If - - UsePrivateSubnet - - SubnetIds: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId}}' - SecurityGroupIds: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseSecurityGroupId}}' - - !Ref AWS::NoValue - # If UsePrivateSubnet is true, attach AWSLambdaVPCAccessExecutionRole, otherwise attach AWSLambdaBasicExecutionRole + VpcConfig: + SubnetIds: + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/PrivateSubnetId}}" + SecurityGroupIds: + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseSecurityGroupId}}" Policies: - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" - Effect: "Allow" Action: - "secretsmanager:GetSecretValue" @@ -484,26 +424,22 @@ Resources: - "secretsmanager:ListSecretVersionIds" - "secretsmanager:ListSecrets" Resource: - - !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:/${AppName}/${Stage}/${AWS::Region}/Neo4jCredentials*' + - !Sub "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:/${AppName}/${Stage}/${AWS::Region}/Neo4jCredentials*" - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: !If - - UsePrivateSubnet - - - "logs:CreateLogGroup" - - "logs:CreateLogStream" - - "logs:PutLogEvents" - - "ec2:CreateNetworkInterface" - - "ec2:DescribeNetworkInterfaces" - - "ec2:DeleteNetworkInterface" - - "ec2:AssignPrivateIpAddresses" - - "ec2:UnassignPrivateIpAddresses" - - - "logs:CreateLogGroup" - - "logs:CreateLogStream" - - "logs:PutLogEvents" - Resource: + Action: + - "logs:CreateLogGroup" + - "logs:CreateLogStream" + - "logs:PutLogEvents" + - "ec2:CreateNetworkInterface" + - "ec2:DescribeNetworkInterfaces" + - "ec2:DeleteNetworkInterface" + - "ec2:AssignPrivateIpAddresses" + - "ec2:UnassignPrivateIpAddresses" + Resource: - "*" - + InvokeBackupScriptFunction: Type: AWS::Serverless::Function Properties: @@ -525,25 +461,25 @@ Resources: - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:SendCommand" - "ssm:GetDocument" Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${Stage}-${AppName}-database-Neo4jBackupDocument*' - - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId}}' - # - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*' + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${Stage}-${AppName}-database-Neo4jBackupDocument*" + - !Sub "arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId}}" + # - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*' # development - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" - Effect: "Allow" - Action: + Action: - "ssm:GetCommandInvocation" - Resource: - - '*' - + Resource: + - "*" + InvokeLoadScriptFunction: Type: AWS::Serverless::Function Properties: @@ -558,8 +494,8 @@ Resources: - x86_64 Environment: Variables: - NEO4J_LOAD_QUERY_DOCUMENT_NAME_SSM_PARAM: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jLoadQueryDocumentName' - NEO4J_DATABASE_INSTANCE_ID_SSM_PARAM: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId' + NEO4J_LOAD_QUERY_DOCUMENT_NAME_SSM_PARAM: !Sub "/${AppName}/${Stage}/${AWS::Region}/Neo4jLoadQueryDocumentName" + NEO4J_DATABASE_INSTANCE_ID_SSM_PARAM: !Sub "/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId" LOAD_NEO4J_ACTIVITY: !Ref LoadNeo4jActivity APP_NAME: !Ref AppName Policies: @@ -567,27 +503,27 @@ Resources: - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:SendCommand" - "ssm:GetDocument" Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${Neo4jLoadQueryDocument}' - - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId}}' - # - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*' + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${Neo4jLoadQueryDocument}" + - !Sub "arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jDatabaseInstanceId}}" + # - !Sub 'arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*' # development - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' - + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" + InvokeLoadScriptFunctionAlarm: Type: AWS::CloudWatch::Alarm Properties: - AlarmDescription: !Sub 'Alarm for ${InvokeLoadScriptFunction} function errors' + AlarmDescription: !Sub "Alarm for ${InvokeLoadScriptFunction} function errors" ActionsEnabled: true AlarmActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}" MetricName: Errors Namespace: AWS/Lambda Statistic: Sum @@ -598,7 +534,7 @@ Resources: Dimensions: - Name: FunctionName Value: !Ref InvokeLoadScriptFunctionName - + ValidateBuildOutputFunction: Type: AWS::Serverless::Function Properties: @@ -620,31 +556,31 @@ Resources: - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "s3:GetObject" - "s3:ListBucket" - "s3:ListObjects" - Resource: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}' - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*' - + Resource: + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}" + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketArn}}/*" + UpdatePipelineStateMachine: Type: AWS::Serverless::StateMachine Properties: DefinitionUri: statemachines/pipeline.asl.json DefinitionSubstitutions: AppName: !Ref AppName - DataBucketName: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}' + DataBucketName: !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}" BuildJobDefinition: !Ref BuildJobDefinition - BuildJobName: !Sub '${Stage}-${AppName}-build-job' + BuildJobName: !Sub "${Stage}-${AppName}-build-job" BuildJobQueue: !Ref BuildJobQueue ExecuteValidationQueriesFunctionArn: !GetAtt ExecuteValidationQueriesFunction.Arn ValidateBuildOutputFunctionArn: !GetAtt ValidateBuildOutputFunction.Arn @@ -661,7 +597,7 @@ Resources: - LambdaInvokePolicy: FunctionName: !Ref ValidateBuildOutputFunction - S3ReadPolicy: - BucketName: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}' + BucketName: !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}" - Version: "2012-10-17" Statement: - Effect: "Allow" @@ -674,65 +610,66 @@ Resources: - "logs:PutResourcePolicy" - "logs:DescribeResourcePolicies" - "logs:DescribeLogGroups" - Resource: + Resource: - "*" - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "batch:SubmitJob" - "batch:DescribeJobs" - "batch:TerminateJob" - Resource: - - !Sub 'arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-queue/${Stage}-${AppName}*' - - !Sub 'arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-definition/${Stage}-${AppName}-*' + Resource: + - !Sub "arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-queue/${Stage}-${AppName}*" + - !Sub "arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-definition/${Stage}-${AppName}-*" - Effect: "Allow" - Action: + Action: - "events:PutTargets" - "events:PutRule" - "events:DescribeRule" - Resource: - - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForBatchJobsRule' + Resource: + - !Sub "arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForBatchJobsRule" Logging: - Destinations: - - CloudWatchLogsLogGroup: + Destinations: + - CloudWatchLogsLogGroup: LogGroupArn: !GetAtt UpdatePipelineLogGroup.Arn IncludeExecutionData: true Level: ALL - + LoadNeo4jActivity: Type: AWS::StepFunctions::Activity - Properties: + Properties: Name: !Sub "${AppName}-${Stage}-load-Neo4j" + UpdatePipelineLogGroup: Type: AWS::Logs::LogGroup UpdateReplacePolicy: Delete DeletionPolicy: Delete Properties: LogGroupName: !Sub "${Stage}-${AppName}-pipeline-execution-logs" - + UpdatePipelineArnParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/UpdatePipelineArn' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/UpdatePipelineArn" Description: "ARN of gfe-db update pipeline state machine" Value: !GetAtt UpdatePipelineStateMachine.Arn - # CloudWatch Alarm for state machine execution in progress + UpdatePipelineStateMachineExecutionAlarm: Type: AWS::CloudWatch::Alarm Properties: - AlarmDescription: !Sub '${UpdatePipelineStateMachine} state machine execution in progress' + AlarmDescription: !Sub "${UpdatePipelineStateMachine} state machine execution in progress" ActionsEnabled: true AlarmActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}" OKActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}' - Metrics: + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}" + Metrics: - Id: m1 - MetricStat: - Metric: + MetricStat: + Metric: MetricName: ExecutionsStarted Namespace: AWS/States Dimensions: @@ -743,8 +680,8 @@ Resources: Unit: Count ReturnData: false - Id: m2 - MetricStat: - Metric: + MetricStat: + Metric: MetricName: ExecutionsSucceeded Namespace: AWS/States Dimensions: @@ -755,8 +692,8 @@ Resources: Unit: Count ReturnData: false - Id: m3 - MetricStat: - Metric: + MetricStat: + Metric: MetricName: ExecutionsFailed Namespace: AWS/States Dimensions: @@ -767,8 +704,8 @@ Resources: Unit: Count ReturnData: false - Id: m4 - MetricStat: - Metric: + MetricStat: + Metric: MetricName: ExecutionsAborted Namespace: AWS/States Dimensions: @@ -785,7 +722,7 @@ Resources: ComparisonOperator: GreaterThanThreshold Threshold: 0 EvaluationPeriods: 1 - + DisableBackupFunction: Type: AWS::Serverless::Function Properties: @@ -806,32 +743,32 @@ Resources: - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:GetParameters" - "ssm:GetParameter" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:parameter/${AppName}/${Stage}/${AWS::Region}/*" - Version: "2012-10-17" Statement: - Effect: "Allow" - Action: + Action: - "ssm:UpdateMaintenanceWindow" - Resource: - - !Sub 'arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:maintenancewindow/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jBackupMaintenanceWindowId}}' + Resource: + - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:maintenancewindow/{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/Neo4jBackupMaintenanceWindowId}}" Events: PipelineExecutionTopic: Type: SNS Properties: - Topic: !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}' - + Topic: !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineExecutionTopicArn}}" + # CloudWatch Alarm for failed pipeline executions UpdatePipelineStateMachineExecutionErrorsAlarm: Type: AWS::CloudWatch::Alarm Properties: - AlarmDescription: !Sub '${UpdatePipelineStateMachine} state machine errors' + AlarmDescription: !Sub "${UpdatePipelineStateMachine} state machine errors" ActionsEnabled: true AlarmActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}" MetricName: ExecutionsFailed Namespace: AWS/States Statistic: Sum @@ -842,15 +779,15 @@ Resources: Dimensions: - Name: StateMachineArn Value: !GetAtt UpdatePipelineStateMachine.Arn - + # CloudWatch Alarm for failed pipeline integrations (Batch jobs) UpdatePipelineStateMachineIntegrationAlarm: Type: AWS::CloudWatch::Alarm Properties: - AlarmDescription: !Sub '${UpdatePipelineStateMachine} state machine errors' + AlarmDescription: !Sub "${UpdatePipelineStateMachine} state machine errors" ActionsEnabled: true AlarmActions: - - !Sub '{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}' + - !Sub "{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataPipelineErrorsTopicArn}}" MetricName: ServiceIntegrationsFailed Namespace: AWS/States Statistic: Sum @@ -860,19 +797,8 @@ Resources: ComparisonOperator: GreaterThanOrEqualToThreshold Dimensions: - Name: ServiceIntegrationResourceArn - Value: !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:batch:submitJob.sync' - - # FailedAllelesQueue: - # Type: AWS::SQS::Queue - # Properties: - # VisibilityTimeout: 20 - # RedrivePolicy: - # deadLetterTargetArn: !GetAtt FailedAllelesDeadLetterQueue.Arn - # maxReceiveCount: 5 - # FailedAllelesDeadLetterQueue: - # Type: AWS::SQS::Queue - - Neo4jLoadQueryDocument: + Value: !Sub "arn:aws:states:${AWS::Region}:${AWS::AccountId}:batch:submitJob.sync" + Neo4jLoadQueryDocument: Type: AWS::SSM::Document Properties: DocumentType: "Command" @@ -890,11 +816,11 @@ Resources: type: "StringMap" description: !Sub "Downloads all files under the ${AppName} scripts prefix" default: - path: !Sub 'https://{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}.s3.amazonaws.com/config/scripts/' + path: !Sub "https://{{resolve:ssm:/${AppName}/${Stage}/${AWS::Region}/DataBucketName}}.s3.amazonaws.com/config/scripts/" commandLine: type: "String" description: "These commands are invoked by a Lambda script which sets the correct parameters (Refer to documentation)." - default: 'make neo4j.start && bash start_task.sh' + default: "make neo4j.start && bash start_task.sh" workingDirectory: type: "String" description: "Working directory" @@ -912,22 +838,19 @@ Resources: destinationPath: "{{ workingDirectory }}" - action: "aws:runShellScript" name: "runShellScript" - inputs: + inputs: runCommand: - "" - "directory=$(pwd)" - "export PATH=$PATH:$directory" - - " {{ commandLine }} " + - " {{ commandLine }} " - "" workingDirectory: "{{ workingDirectory }}" timeoutSeconds: "{{ executionTimeout }}" - Neo4jLoadQueryDocumentNameParameter: Type: AWS::SSM::Parameter Properties: Type: String - Name: !Sub '/${AppName}/${Stage}/${AWS::Region}/Neo4jLoadQueryDocumentName' + Name: !Sub "/${AppName}/${Stage}/${AWS::Region}/Neo4jLoadQueryDocumentName" Description: "Name of SSM document for loading Neo4j" Value: !Ref Neo4jLoadQueryDocument - - \ No newline at end of file