gitpod-io
diff --git a/‎.goreleaser.yaml
Lines changed: 2 additions & 0 deletions b/‎.goreleaser.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎gitpod-network-check/.gitignore
Lines changed: 2 additions & 2 deletions b/‎gitpod-network-check/.gitignore
Lines changed: 2 additions & 2 deletions
diff --git a/‎gitpod-network-check/Makefile
Lines changed: 4 additions & 0 deletions b/‎gitpod-network-check/Makefile
Lines changed: 4 additions & 0 deletions
diff --git a/‎gitpod-network-check/README.md
Lines changed: 55 additions & 15 deletions b/‎gitpod-network-check/README.md
Lines changed: 55 additions & 15 deletions
diff --git a/‎gitpod-network-check/cmd/checks.go
Lines changed: 22 additions & 10 deletions b/‎gitpod-network-check/cmd/checks.go
Lines changed: 22 additions & 10 deletions
diff --git a/‎gitpod-network-check/cmd/cleanup.go
Lines changed: 8 additions & 4 deletions b/‎gitpod-network-check/cmd/cleanup.go
Lines changed: 8 additions & 4 deletions
diff --git a/‎gitpod-network-check/cmd/lambda_handler.go
Lines changed: 113 additions & 0 deletions b/‎gitpod-network-check/cmd/lambda_handler.go
Lines changed: 113 additions & 0 deletions
@@ -14,6 +14,8 @@ builds:
     ignore:
       - goos: windows
         goarch: arm64
+    ldflags:
+      - -s -w -extldflags=-static
     binary: gitpod-network-check
 
 archives:
 
@@ -1,2 +1,2 @@
-
-gitpod-network-check
+gitpod-network-check
+*.zip
@@ -0,0 +1,4 @@
+.PHONY: build
+
+build:
+	GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -a -ldflags="-s -w -extldflags=-static" -o gitpod-network-check main.go
@@ -44,17 +44,28 @@ A CLI to check if your network setup is suitable for the installation of Gitpod.
    pod-subnets: subnet-028d11dce93b8eefc, subnet-04ec8257d95c434b7,subnet-00a83550ce709f39c
    https-hosts: accounts.google.com, github.com
    instance-ami: # put your custom ami id here if you want to use it, otherwise it will using latest ubuntu AMI from aws
-   api-endpoint: # optional, put your API endpoint regional sub-domain here to test connectivity, like when the execute-api vpc endpoint is not in the same account as Gitpod 
+   api-endpoint: # optional, put your API endpoint regional sub-domain here to test connectivity, like when the execute-api vpc endpoint is not in the same account as Gitpod
+   # lambda-role-arn: arn:aws:iam::123456789012:role/MyExistingLambdaRole # Optional: Use existing IAM Role for Lambda mode
+   # lambda-sg-id: sg-0123456789abcdef0 # Optional: Use existing Security Group for Lambda mode
    ```
 
-   note: if using a custom AMI, please ensure the [SSM agent](https://docs.aws.amazon.com/systems-manager/latest/userguide/manually-install-ssm-agent-linux.html) and [curl](https://curl.se/) are both installed. We rely on SSM's [SendCommand](https://docs.aws.amazon.com/code-library/latest/ug/ssm_example_ssm_SendCommand_section.html) to test HTTPS connectivity.
+   **Note:** The `lambda-role-arn` and `lambda-sg-id` fields correspond to the `--lambda-role-arn` and `--lambda-sg-id` command-line flags, respectively. Setting them in the config file or via environment variables (e.g., `NTCHK_LAMBDA_ROLE_ARN`) achieves the same result.
+
+   **EC2 Mode Note:** If using a custom AMI (`instance-ami`), please ensure the [SSM agent](https://docs.aws.amazon.com/systems-manager/latest/userguide/manually-install-ssm-agent-linux.html) and [curl](https://curl.se/) are both installed. We rely on SSM's [SendCommand](https://docs.aws.amazon.com/code-library/latest/ug/ssm_example_ssm_SendCommand_section.html) to test HTTPS connectivity in EC2 mode.
 
 2. Run the network diagnosis
 
-   To start the diagnosis, the the command: `./gitpod-network-check diagnose`
+   The tool supports different modes for running the checks, specified by the `--mode` flag (`ec2`, `lambda`, `local`).
+
+   **Using EC2 Mode (Default):**
+
+   This mode launches temporary EC2 instances in your specified subnets to perform the network checks. This most closely simulates the environment where Gitpod components will run.
+
+   To start the diagnosis using EC2 mode: `./gitpod-network-check diagnose --mode ec2` (or simply `./gitpod-network-check diagnose` as EC2 is the default).
 
    ```console
-   ./gitpod-network-check diagnose
+   # Example output for EC2 mode
+   ./gitpod-network-check diagnose --mode ec2
    INFO[0000] ℹ️  Running with region `eu-central-1`, main subnet `[subnet-0ed211f14362b224f  subnet-041703e62a05d2024]`, pod subnet `[subnet-075c44edead3b062f  subnet-06eb311c6b92e0f29]`, hosts `[accounts.google.com  https://github.com]`, ami ``, and API endpoint `` 
    INFO[0000] ✅ Main Subnets are valid                     
    INFO[0000] ✅ Pod Subnets are valid                      
@@ -116,22 +127,51 @@ A CLI to check if your network setup is suitable for the installation of Gitpod.
    INFO[0306] ✅ Security group 'sg-00d4a66a7840ebd67' deleted 
    ```
 
+   **Using Lambda Mode:**
+
+   This mode uses AWS Lambda functions deployed into your specified subnets to perform the network checks. It avoids the need to launch full EC2 instances but has its own prerequisites.
+
+   *   **Prerequisites for Lambda Mode:**
+       *   **IAM Permissions:** The AWS credentials used to run `gitpod-network-check` need permissions to manage Lambda functions, IAM roles, security groups, and CloudWatch Logs. Specifically, it needs to perform actions like: `lambda:CreateFunction`, `lambda:GetFunction`, `lambda:DeleteFunction`, `lambda:InvokeFunction`, `iam:CreateRole`, `iam:GetRole`, `iam:DeleteRole`, `iam:AttachRolePolicy`, `iam:DetachRolePolicy`, `ec2:CreateSecurityGroup`, `ec2:DescribeSecurityGroups`, `ec2:DeleteSecurityGroup`, `ec2:AuthorizeSecurityGroupEgress`, `ec2:DescribeSubnets`, `logs:DeleteLogGroup`.
+       *   **Network Connectivity:** Lambda functions running within a VPC need a route to the internet or required AWS service endpoints. This typically requires a **NAT Gateway** in your VPC or **VPC Endpoints** for all necessary services (e.g., STS, CloudWatch Logs, ECR, S3, DynamoDB, and any target HTTPS hosts). Without proper outbound connectivity, the Lambda checks will fail.
+
+   *   **Running Lambda Mode:**
+       To start the diagnosis using Lambda mode:
+       ```bash
+       ./gitpod-network-check diagnose --mode lambda
+       ```
+
+   *   **Using Existing Resources (Lambda Mode):**
+       If you have pre-existing IAM roles or Security Groups you want the Lambda functions to use, you can specify them using flags. This will prevent the tool from creating or deleting these specific resources.
+       ```bash
+       ./gitpod-network-check diagnose --mode lambda \
+         --lambda-role-arn arn:aws:iam::123456789012:role/MyExistingLambdaRole \
+         --lambda-sg-id sg-0123456789abcdef0 
+       ```
+
+   *   **Example Output (Lambda Mode):**
+       The output will be similar to EC2 mode but will show Lambda function creation/invocation instead of EC2 instance management.
+
+   **Using Local Mode:**
+
+   This mode runs the checks directly from the machine where you execute the CLI. It's useful for basic outbound connectivity tests but **does not** accurately reflect the network environment within your AWS subnets.
+
+   To start the diagnosis using local mode: `./gitpod-network-check diagnose --mode local`
+
 3. Clean up after network diagnosis
 
-   Dianosis is designed to do clean-up before it finishes. However, if the process terminates unexpectedly, you may clean-up AWS resources it creates like so:
+   The `diagnose` command is designed to clean up the AWS resources it creates (EC2 instances, Lambda functions, IAM roles, Security Groups, CloudWatch Log groups) before it finishes. However, if the process terminates unexpectedly, you can manually trigger cleanup using the `clean` command. This command respects the `--mode` flag to clean up resources specific to that mode.
 
-   ```console
-   ./gitpod-network-check clean
-   INFO[0000] ✅ Main Subnets are valid
-   INFO[0000] ✅ Pod Subnets are valid
-   INFO[0000] ✅ Instances terminated
-   INFO[0000] Cleaning up: Waiting for 2 minutes so network interfaces are deleted
-   INFO[0121] ✅ Role 'GitpodNetworkCheck' deleted
-   INFO[0121] ✅ Instance profile deleted
-   INFO[0122] ✅ Security group 'sg-0a6119dcb6a564fc1' deleted
-   INFO[0122] ✅ Security group 'sg-07373362953212e54' deleted
+   ```bash
+   # Clean up resources potentially left by EC2 mode
+   ./gitpod-network-check clean --mode ec2 
+
+   # Clean up resources potentially left by Lambda mode
+   ./gitpod-network-check clean --mode lambda
    ```
 
+   **Note:** The `clean` command will *not* delete IAM roles or Security Groups if they were provided using the `--lambda-role-arn` or `--lambda-sg-id` flags during the `diagnose` run.
+
 ## FAQ
 
 If the EC2 instances are timing out, or you cannot connect to them with Session Manager, be sure to add the following policies.
 
@@ -12,20 +12,32 @@ import (
 	testrunner "github.com/gitpod-io/enterprise-deployment-toolkit/gitpod-network-check/pkg/runner"
 )
 
+var skipCleanup bool
+
+func init() {
+	checkCommand.Flags().BoolVar(&skipCleanup, "skip-cleanup", false, "Skip the cleanup false (default: false). Useful for debugging purposes.")
+	NetworkCheckCmd.AddCommand(checkCommand)
+}
+
 var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
-	PersistentPreRunE: validateArguments,
-	Use:               "diagnose",
-	Short:             "Runs the network check diagnosis",
-	SilenceUsage:      false,
+	PreRunE:      validateArguments,
+	Use:          "diagnose",
+	Short:        "Runs the network check diagnosis",
+	SilenceUsage: false,
 	RunE: func(cmd *cobra.Command, args []string) error {
 		ctx := cmd.Context()
 
-		runner, err := testrunner.NewRunner(ctx, flags.Mode, &networkConfig)
+		runner, err := testrunner.NewRunner(ctx, Flags.Mode, &NetworkConfig)
 		if err != nil {
 			return fmt.Errorf("❌  failed to create test runner: %v", err)
 		}
 
 		defer (func() {
+			if skipCleanup {
+				log.Info("⚠️  Skipping cleanup, because --skip-cleanup flag is set.")
+				return
+			}
+
 			// Ensure runner was actually assigned before trying to clean up
 			if runner == nil {
 				log.Info("ℹ️  No runner initialized, skipping cleanup.")
@@ -45,12 +57,12 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
 			return fmt.Errorf("❌  failed to prepare: %v", err)
 		}
 
-		for _, testset := range flags.SelectedTestsets {
+		for _, testset := range Flags.SelectedTestsets {
 			log.Infof("ℹ️  Running testset: %s", testset)
 
 			ts := checks.TestSets[checks.TestsetName(testset)]
-			serviceEndpoints, subnetType := ts(&networkConfig)
-			subnets := Filter(networkConfig.GetAllSubnets(), func(subnet checks.Subnet) bool {
+			serviceEndpoints, subnetType := ts(&NetworkConfig)
+			subnets := Filter(NetworkConfig.GetAllSubnets(), func(subnet checks.Subnet) bool { 
 				return subnet.Type == subnetType
 			})
 
@@ -73,8 +85,8 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
 
 func validateArguments(cmd *cobra.Command, args []string) error {
 	// Validate testsets if specified
-	if len(flags.SelectedTestsets) > 0 {
-		for _, testset := range flags.SelectedTestsets {
+	if len(Flags.SelectedTestsets) > 0 {
+		for _, testset := range Flags.SelectedTestsets {
 			if _, exists := checks.TestSets[checks.TestsetName(testset)]; !exists {
 				return fmt.Errorf("Invalid testset: %s. Available testsets: %v",
 					testset,
 
@@ -10,14 +10,14 @@ import (
 )
 
 var cleanCommand = &cobra.Command{ // nolint:gochecknoglobals
-	Use:               "clean",
-	Short:             "Explicitly cleans up after the network check diagnosis",
-	SilenceUsage:      false,
+	Use:          "clean",
+	Short:        "Explicitly cleans up after the network check diagnosis",
+	SilenceUsage: false,
 	RunE: func(cmd *cobra.Command, args []string) error {
 		ctx := cmd.Context()
 
 		log.Infof("ℹ️ Running cleanup")
-		runner, err := runner.NewRunner(ctx, flags.Mode, &networkConfig)
+		runner, err := runner.LoadRunnerFromTags(ctx, Flags.Mode, &NetworkConfig)
 		if err != nil {
 			return fmt.Errorf("❌  failed to create test runner: %v", err)
 		}
@@ -31,3 +31,7 @@ var cleanCommand = &cobra.Command{ // nolint:gochecknoglobals
 		return nil
 	},
 }
+
+func init() {
+	NetworkCheckCmd.AddCommand(cleanCommand)
+}
@@ -0,0 +1,113 @@
+package cmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	"github.com/gitpod-io/enterprise-deployment-toolkit/gitpod-network-check/pkg/lambda_types"
+)
+
+var lambdaHandlerCmd = &cobra.Command{
+	Use:    "lambda-handler",
+	Short:  "Internal command to execute network checks within AWS Lambda (reads JSON request from stdin, writes JSON response to stdout)",
+	Hidden: true, // Hide this command from user help output
+	PersistentPreRun: func(cmd *cobra.Command, args []string) {
+		// override parent, as we don't care about the config or other flags
+	},
+	RunE: func(cmd *cobra.Command, args []string) error {
+		// Lambda environment might not have sophisticated logging setup, print directly
+		fmt.Fprintln(os.Stderr, "Lambda Handler: Starting execution.")
+
+		// Read request payload from stdin
+		stdinBytes, err := io.ReadAll(os.Stdin)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Error reading stdin: %v\n", err)
+			return fmt.Errorf("error reading stdin: %w", err)
+		}
+
+		var request lambda_types.CheckRequest
+		err = json.Unmarshal(stdinBytes, &request)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Error unmarshalling request JSON: %v\n", err)
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Received input: %s\n", string(stdinBytes))
+			return fmt.Errorf("error unmarshalling request: %w", err)
+		}
+
+		fmt.Fprintf(os.Stderr, "Lambda Handler: Received check request for %d endpoints.\n", len(request.Endpoints))
+
+		response := lambda_types.CheckResponse{
+			Results: make(map[string]lambda_types.CheckResult),
+		}
+
+		client := &http.Client{
+			Timeout: 10 * time.Second, // Slightly longer timeout for Lambda environment?
+		}
+
+		// Perform checks (similar logic to the previous dedicated handler)
+		for name, url := range request.Endpoints {
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Checking endpoint: %s (%s)\n", name, url)
+			// Use context from command if needed, otherwise background context is fine here
+			req, err := http.NewRequestWithContext(cmd.Context(), "GET", url, nil)
+			if err != nil {
+				response.Results[name] = lambda_types.CheckResult{Success: false, Error: fmt.Sprintf("failed to create request: %v", err)}
+				fmt.Fprintf(os.Stderr, "  -> Failed (request creation): %v\n", err)
+				continue
+			}
+
+			resp, err := client.Do(req)
+			if err != nil {
+				response.Results[name] = lambda_types.CheckResult{Success: false, Error: fmt.Sprintf("HTTP request failed: %v", err)}
+				fmt.Fprintf(os.Stderr, "  -> Failed (HTTP request): %v\n", err)
+			} else {
+				resp.Body.Close() // Ensure body is closed
+				if resp.StatusCode >= 200 && resp.StatusCode < 300 {
+					response.Results[name] = lambda_types.CheckResult{Success: true}
+					fmt.Fprintf(os.Stderr, "  -> Success (Status: %d)\n", resp.StatusCode)
+				} else {
+					response.Results[name] = lambda_types.CheckResult{Success: false, Error: fmt.Sprintf("unexpected status code: %d", resp.StatusCode)}
+					fmt.Fprintf(os.Stderr, "  -> Failed (Status: %d)\n", resp.StatusCode)
+				}
+			}
+		}
+
+		// Marshal response payload to stdout
+		responseBytes, err := json.Marshal(response)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Error marshalling response JSON: %v\n", err)
+			return fmt.Errorf("error marshalling response: %w", err)
+		}
+
+		_, err = fmt.Fprint(os.Stdout, string(responseBytes))
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Lambda Handler: Error writing response to stdout: %v\n", err)
+			return fmt.Errorf("error writing response: %w", err)
+		}
+
+		fmt.Fprintln(os.Stderr, "Lambda Handler: Execution complete.")
+		return nil
+	},
+	// Disable flag parsing for this internal command as it gets input via stdin
+	DisableFlagParsing: true,
+}
+
+func init() {
+	// Note: We don't add this to networkCheckCmd directly in init() here
+	// because it might interfere with normal flag parsing if not careful.
+	// It will be added in the main Execute() function or similar central place.
+	// For now, just define the command struct.
+	// We also need to ensure logging doesn't interfere with stdout JSON output.
+	// Maybe configure logging to stderr specifically for this command?
+	lambdaHandlerCmd.PersistentPreRun = func(cmd *cobra.Command, args []string) {
+		// Ensure logs go to stderr for this command to keep stdout clean for JSON
+		log.SetOutput(os.Stderr)
+	}
+
+	NetworkCheckCmd.AddCommand(lambdaHandlerCmd) // Register the hidden lambda handler command
+}
-Original file line number
+Diff line change
@@ @@ -1,2 +1,2 @@ @@
+-
 -gitpod-network-check
 +gitpod-network-check
 +*.zip