|
| 1 | +terraform { |
| 2 | + required_version = ">= 1.5" |
| 3 | + |
| 4 | + required_providers { |
| 5 | + aws = { |
| 6 | + source = "hashicorp/aws" |
| 7 | + version = "~> 5.0" |
| 8 | + } |
| 9 | + } |
| 10 | +} |
| 11 | + |
| 12 | +provider "aws" { |
| 13 | + region = var.aws_region |
| 14 | +} |
| 15 | + |
| 16 | +# --------------------------------------------------------------------------- |
| 17 | +# Official NixOS AMI lookup |
| 18 | +# AMIs are published weekly by the NixOS project under AWS account 427812963091. |
| 19 | +# See: https://nixos.github.io/amis/ |
| 20 | +# --------------------------------------------------------------------------- |
| 21 | +data "aws_ami" "nixos" { |
| 22 | + most_recent = true |
| 23 | + owners = ["427812963091"] |
| 24 | + |
| 25 | + filter { |
| 26 | + name = "name" |
| 27 | + values = ["nixos/${var.nixos_channel}*"] |
| 28 | + } |
| 29 | + |
| 30 | + filter { |
| 31 | + name = "architecture" |
| 32 | + values = ["x86_64"] |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +locals { |
| 37 | + common_tags = merge(var.tags, { |
| 38 | + Project = "hf-kernels-dev" |
| 39 | + ManagedBy = "terraform" |
| 40 | + }) |
| 41 | + |
| 42 | + # Encode the NixOS configuration as base64 so it can be safely embedded in |
| 43 | + # the user-data script without escaping issues (Nix files contain ${ ... }). |
| 44 | + user_data = base64encode(join("", [ |
| 45 | + "#!/bin/sh\n", |
| 46 | + "set -e\n", |
| 47 | + # Wait for the EBS data volume to be attached. |
| 48 | + # Terraform attaches it after instance creation, so it may not be present |
| 49 | + # immediately at boot. Poll for up to 5 minutes (30 x 10 s). |
| 50 | + "echo 'Waiting for data volume /dev/nvme1n1...'\n", |
| 51 | + "for i in $(seq 1 30); do\n", |
| 52 | + " [ -b /dev/nvme1n1 ] && break\n", |
| 53 | + " sleep 10\n", |
| 54 | + "done\n", |
| 55 | + # Format (first boot only) and mount the data volume, then create the |
| 56 | + # directories that NixOS will later bind-mount over /nix/store. |
| 57 | + "if [ -b /dev/nvme1n1 ]; then\n", |
| 58 | + " if ! blkid /dev/nvme1n1 | grep -q ext4; then\n", |
| 59 | + " mkfs.ext4 -L kernels-data /dev/nvme1n1\n", |
| 60 | + " fi\n", |
| 61 | + " mkdir -p /data\n", |
| 62 | + " mount /dev/nvme1n1 /data\n", |
| 63 | + " mkdir -p /data/nix-store /data/workspace\n", |
| 64 | + "fi\n", |
| 65 | + # Decode and write the NixOS configuration. |
| 66 | + "base64 -d > /etc/nixos/configuration.nix << 'B64EOF'\n", |
| 67 | + filebase64("${path.module}/nixos-configuration.nix"), |
| 68 | + "\nB64EOF\n", |
| 69 | + # Write the Cachix auth token if one was provided. |
| 70 | + var.cachix_auth_token != "" ? join("", [ |
| 71 | + "mkdir -p /root/.config/cachix\n", |
| 72 | + "printf '{\\n authToken = \"${var.cachix_auth_token}\";\\n}\\n'", |
| 73 | + " > /root/.config/cachix/cachix.dhall\n", |
| 74 | + "chmod 600 /root/.config/cachix/cachix.dhall\n", |
| 75 | + ]) : "", |
| 76 | + # Apply the configuration (installs all packages including cachix). |
| 77 | + "nixos-rebuild switch 2>&1 | tail -20\n", |
| 78 | + # Register the huggingface Cachix binary cache — mirrors cachix-action@v16. |
| 79 | + "cachix use huggingface\n", |
| 80 | + ])) |
| 81 | +} |
| 82 | + |
| 83 | +# --------------------------------------------------------------------------- |
| 84 | +# EC2 instance running NixOS |
| 85 | +# --------------------------------------------------------------------------- |
| 86 | +resource "aws_instance" "kernels_dev" { |
| 87 | + ami = data.aws_ami.nixos.id |
| 88 | + instance_type = var.instance_type |
| 89 | + key_name = var.key_pair_name |
| 90 | + subnet_id = var.subnet_id |
| 91 | + |
| 92 | + associate_public_ip_address = true |
| 93 | + |
| 94 | + vpc_security_group_ids = [var.security_group_id] |
| 95 | + |
| 96 | + # NixOS configuration is applied on first boot via user data. |
| 97 | + # Changing nixos-configuration.nix will replace the instance. |
| 98 | + user_data = local.user_data |
| 99 | + user_data_replace_on_change = true |
| 100 | + |
| 101 | + root_block_device { |
| 102 | + volume_size = var.root_volume_size_gb |
| 103 | + volume_type = "gp3" |
| 104 | + delete_on_termination = true |
| 105 | + encrypted = true |
| 106 | + } |
| 107 | + |
| 108 | + metadata_options { |
| 109 | + http_tokens = "required" # IMDSv2 |
| 110 | + } |
| 111 | + |
| 112 | + tags = merge(local.common_tags, { Name = var.instance_name }) |
| 113 | +} |
| 114 | + |
| 115 | +# --------------------------------------------------------------------------- |
| 116 | +# Extra EBS data volume (Nix store spillover, build artefacts, source trees) |
| 117 | +# --------------------------------------------------------------------------- |
| 118 | +resource "aws_ebs_volume" "data" { |
| 119 | + availability_zone = aws_instance.kernels_dev.availability_zone |
| 120 | + size = var.data_volume_size_gb |
| 121 | + type = var.data_volume_type |
| 122 | + iops = var.data_volume_iops |
| 123 | + throughput = var.data_volume_throughput |
| 124 | + encrypted = true |
| 125 | + |
| 126 | + tags = merge(local.common_tags, { Name = "${var.instance_name}-data" }) |
| 127 | +} |
| 128 | + |
| 129 | +resource "aws_volume_attachment" "data" { |
| 130 | + device_name = "/dev/xvdf" |
| 131 | + volume_id = aws_ebs_volume.data.id |
| 132 | + instance_id = aws_instance.kernels_dev.id |
| 133 | + force_detach = false |
| 134 | +} |
0 commit comments