elastic
diff --git a/‎.github/workflows/esql-validation.yml‎
Lines changed: 112 additions & 0 deletions b/‎.github/workflows/esql-validation.yml‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎.github/workflows/pythonpackage.yml‎
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/pythonpackage.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎CLI.md‎
Lines changed: 4 additions & 0 deletions b/‎CLI.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎detection_rules/cli_utils.py‎
Lines changed: 101 additions & 1 deletion b/‎detection_rules/cli_utils.py‎
Lines changed: 101 additions & 1 deletion
diff --git a/‎detection_rules/devtools.py‎
Lines changed: 79 additions & 2 deletions b/‎detection_rules/devtools.py‎
Lines changed: 79 additions & 2 deletions
@@ -0,0 +1,112 @@
+name: ES|QL Validation
+on:
+    pull_request:
+      branches: [ "*" ]
+      paths:
+        - 'rules/**/*.toml'
+jobs:
+  build-and-validate:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Setup Detection Rules
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        with:
+            fetch-depth: 0
+            path: detection-rules
+
+      - name: Check if new or modified rule files are ESQL rules
+        id: check-esql
+        run: |
+          cd detection-rules
+
+          # Check if the event is a push
+          if [ "${{ github.event_name }}" = "push" ]; then
+            echo "Triggered by a push event. Setting run_esql=true."
+            echo "run_esql=true" >> $GITHUB_ENV
+            exit 0
+          fi
+
+          MODIFIED_FILES=$(git diff --name-only --diff-filter=AM HEAD~1 | grep '^rules/.*\.toml$' || true)
+          if [ -z "$MODIFIED_FILES" ]; then
+            echo "No modified or new .toml files found. Skipping workflow."
+            echo "run_esql=false" >> $GITHUB_ENV
+            exit 0
+          fi
+
+          if ! grep -q 'type = "esql"' $MODIFIED_FILES; then
+            echo "No 'type = \"esql\"' found in the modified .toml files. Skipping workflow."
+            echo "run_esql=false" >> $GITHUB_ENV
+            exit 0
+          fi
+
+          echo "run_esql=true" >> $GITHUB_ENV
+
+      - name: Check out repository
+        env:
+          DR_CLOUD_ID: ${{ secrets.dr_cloud_id }}
+          DR_API_KEY: ${{ secrets.dr_api_key }}
+        if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }}
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        with:
+          path: elastic-container
+          repository: peasead/elastic-container
+
+      - name: Build and run containers
+        env:
+          DR_CLOUD_ID: ${{ secrets.dr_cloud_id }}
+          DR_API_KEY: ${{ secrets.dr_api_key }}
+        if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }}
+        run: |
+          cd elastic-container
+          GENERATED_PASSWORD=$(openssl rand -base64 16)
+          sed -i "s|changeme|$GENERATED_PASSWORD|" .env
+          echo "::add-mask::$GENERATED_PASSWORD"
+          echo "GENERATED_PASSWORD=$GENERATED_PASSWORD" >> $GITHUB_ENV
+          set -x
+          bash elastic-container.sh start
+          
+      - name: Get API Key and setup auth
+        env:
+          DR_CLOUD_ID: ${{ secrets.dr_cloud_id }}
+          DR_API_KEY: ${{ secrets.dr_api_key }}
+          DR_ELASTICSEARCH_URL: "https://localhost:9200"
+          ES_USER: "elastic"
+          ES_PASSWORD: ${{ env.GENERATED_PASSWORD }}
+        if: ${{ !env.DR_CLOUD_ID && !env.DR_API_KEY && env.run_esql == 'true' }}
+        run: |
+          cd detection-rules
+          response=$(curl -k -X POST -u "$ES_USER:$ES_PASSWORD" -H "Content-Type: application/json" -d '{
+              "name": "tmp-api-key",
+              "expiration": "1d"
+          }' "$DR_ELASTICSEARCH_URL/_security/api_key")
+
+          DR_API_KEY=$(echo "$response" | jq -r '.encoded')
+          echo "::add-mask::$DR_API_KEY"
+          echo "DR_API_KEY=$DR_API_KEY" >> $GITHUB_ENV
+
+      - name: Set up Python 3.13
+        if: ${{ env.run_esql == 'true' }}
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6
+        with:
+          python-version: '3.13'
+
+      - name: Install dependencies
+        if: ${{ env.run_esql == 'true' }}
+        run: |
+          cd detection-rules
+          python -m pip install --upgrade pip
+          pip cache purge
+          pip install .[dev]
+
+      - name: Remote Test ESQL Rules
+        if: ${{ env.run_esql == 'true' }}
+        env:
+          DR_CLOUD_ID: ${{ secrets.dr_cloud_id || '' }}
+          DR_KIBANA_URL: ${{ secrets.dr_cloud_id == '' && 'https://localhost:5601' || '' }}
+          DR_ELASTICSEARCH_URL: ${{ secrets.dr_cloud_id == '' && 'https://localhost:9200' || '' }}
+          DR_API_KEY: ${{ secrets.dr_api_key || env.DR_API_KEY }}
+          DR_IGNORE_SSL_ERRORS: ${{ secrets.dr_cloud_id == '' && 'true' || '' }}
+        run: |
+          cd detection-rules
+          python -m detection_rules dev test esql-remote-validation
@@ -37,6 +37,13 @@ jobs:
       env:
         # only run the test test_rule_change_has_updated_date on pull request events to main
         GITHUB_EVENT_NAME: "${{ github.event_name}}"
+        # only run remote validation if repo is set to do so otherwise defer to .github/workflows/esql-validation.yml
+        DR_REMOTE_ESQL_VALIDATION: "${{ vars.remote_esql_validation }}"
+        DR_CLOUD_ID: ${{ secrets.dr_cloud_id }}
+        DR_KIBANA_URL: ${{ secrets.dr_cloud_id }}
+        DR_ELASTICSEARCH_URL: ${{ secrets.dr_cloud_id }}
+        DR_API_KEY: ${{ secrets.dr_api_key }}
+        DR_IGNORE_SSL_ERRORS: ${{ secrets.dr_cloud_id }}
       run: |
         python -m detection_rules test
 
 
@@ -49,6 +49,10 @@ Using the environment variable `DR_BYPASS_TIMELINE_TEMPLATE_VALIDATION` will byp
 Using the environment variable `DR_CLI_MAX_WIDTH` will set a custom max width for the click CLI. 
 For instance, some users may want to increase the default value in cases where help messages are cut off. 
 
+Using the environment variable `DR_REMOTE_ESQL_VALIDATION` will enable remote ESQL validation for rules that use ESQL queries. This validation will be performed whenever the rule is loaded including for example the view-rule command. This requires the appropriate kibana_url or cloud_id, api_key, and es_url to be set in the config file or as environment variables.
+
+Using the environment variable `DR_SKIP_EMPTY_INDEX_CLEANUP` will disable the cleanup of remote testing indexes that are created as part of the remote ESQL validation. By default, these indexes are deleted after the validation is complete, or upon validation error.
+
 ## Importing rules into the repo
 
 You can import rules into the repo using the `create-rule` or `import-rules-to-repo` commands. Both of these commands will
 
@@ -7,7 +7,10 @@
 import datetime
 import functools
 import os
+import re
+import time
 import typing
+import uuid
 from collections.abc import Callable
 from pathlib import Path
 from typing import Any
@@ -27,6 +30,104 @@
 RULES_CONFIG = parse_rules_config()
 
 
+def schema_prompt(name: str, value: Any | None = None, is_required: bool = False, **options: Any) -> Any:  # noqa: PLR0911, PLR0912, PLR0915
+    """Interactively prompt based on schema requirements."""
+    field_type = options.get("type")
+    pattern: str | None = options.get("pattern")
+    enum = options.get("enum", [])
+    minimum = int(options["minimum"]) if "minimum" in options else None
+    maximum = int(options["maximum"]) if "maximum" in options else None
+    min_item = int(options.get("min_items", 0))
+    max_items = int(options.get("max_items", 9999))
+
+    default = options.get("default")
+    if default is not None and str(default).lower() in ("true", "false"):
+        default = str(default).lower()
+
+    if "date" in name:
+        default = time.strftime("%Y/%m/%d")
+
+    if name == "rule_id":
+        default = str(uuid.uuid4())
+
+    if len(enum) == 1 and is_required and field_type not in ("array", ["array"]):
+        return enum[0]
+
+    def _check_type(_val: Any) -> bool:  # noqa: PLR0911
+        if field_type in ("number", "integer") and not str(_val).isdigit():
+            print(f"Number expected but got: {_val}")
+            return False
+        if pattern:
+            match = re.match(pattern, _val)
+            if not match or len(match.group(0)) != len(_val):
+                print(f"{_val} did not match pattern: {pattern}!")
+                return False
+        if enum and _val not in enum:
+            print("{} not in valid options: {}".format(_val, ", ".join(enum)))
+            return False
+        if minimum and (type(_val) is int and int(_val) < minimum):
+            print(f"{_val!s} is less than the minimum: {minimum!s}")
+            return False
+        if maximum and (type(_val) is int and int(_val) > maximum):
+            print(f"{_val!s} is greater than the maximum: {maximum!s}")
+            return False
+        if type(_val) is str and field_type == "boolean" and _val.lower() not in ("true", "false"):
+            print(f"Boolean expected but got: {_val!s}")
+            return False
+        return True
+
+    def _convert_type(_val: Any) -> Any:
+        if field_type == "boolean" and type(_val) is not bool:
+            _val = _val.lower() == "true"
+        return int(_val) if field_type in ("number", "integer") else _val
+
+    prompt = (
+        "{name}{default}{required}{multi}".format(
+            name=name,
+            default=f' [{default}] ("n/a" to leave blank) ' if default else "",
+            required=" (required) " if is_required else "",
+            multi=(" (multi, comma separated) " if field_type in ("array", ["array"]) else ""),
+        ).strip()
+        + ": "
+    )
+
+    while True:
+        result = value or input(prompt) or default
+        if result == "n/a":
+            result = None
+
+        if not result:
+            if is_required:
+                value = None
+                continue
+            return None
+
+        if field_type in ("array", ["array"]):
+            result_list = result.split(",")
+
+            if not (min_item < len(result_list) < max_items):
+                if is_required:
+                    value = None
+                    break
+                return []
+
+            for value in result_list:
+                if not _check_type(value):
+                    if is_required:
+                        value = None  # noqa: PLW2901
+                        break
+                    return []
+            if is_required and value is None:
+                continue
+            return [_convert_type(r) for r in result_list]
+        if _check_type(result):
+            return _convert_type(result)
+        if is_required:
+            value = None
+            continue
+        return None
+
+
 def single_collection(f: Callable[..., Any]) -> Callable[..., Any]:
     """Add arguments to get a RuleCollection by file, directory or a list of IDs"""
     from .misc import raise_client_error
@@ -145,7 +246,6 @@ def rule_prompt(  # noqa: PLR0912, PLR0913, PLR0915
     **kwargs: Any,
 ) -> TOMLRule | str:
     """Prompt loop to build a rule."""
-    from .misc import schema_prompt
 
     additional_required = additional_required or []
     creation_date = datetime.date.today().strftime("%Y/%m/%d")  # noqa: DTZ011
 
@@ -25,7 +25,8 @@
 import pytoml  # type: ignore[reportMissingTypeStubs]
 import requests.exceptions
 import yaml
-from elasticsearch import Elasticsearch
+from elasticsearch import BadRequestError, Elasticsearch
+from elasticsearch import ConnectionError as ESConnectionError
 from eql.table import Table  # type: ignore[reportMissingTypeStubs]
 from eql.utils import load_dump  # type: ignore[reportMissingTypeStubs, reportUnknownVariableType]
 from kibana.connector import Kibana  # type: ignore[reportMissingTypeStubs]
@@ -39,6 +40,9 @@
 from .docs import REPO_DOCS_DIR, IntegrationSecurityDocs, IntegrationSecurityDocsMDX
 from .ecs import download_endpoint_schemas, download_schemas
 from .endgame import EndgameSchemaManager
+from .esql_errors import (
+    ESQL_EXCEPTION_TYPES,
+)
 from .eswrap import CollectEvents, add_range_to_dsl
 from .ghwrap import GithubClient, update_gist
 from .integrations import (
@@ -50,7 +54,13 @@
     load_integrations_manifests,
 )
 from .main import root
-from .misc import PYTHON_LICENSE, add_client, raise_client_error
+from .misc import (
+    PYTHON_LICENSE,
+    add_client,
+    get_default_elasticsearch_client,
+    get_default_kibana_client,
+    raise_client_error,
+)
 from .packaging import CURRENT_RELEASE_PATH, PACKAGE_FILE, RELEASE_DIR, Package
 from .rule import (
     AnyRuleData,
@@ -63,6 +73,7 @@
     TOMLRuleContents,
 )
 from .rule_loader import RuleCollection, production_filter
+from .rule_validators import ESQLValidator
 from .schemas import definitions, get_stack_versions
 from .utils import check_version_lock_double_bumps, dict_hash, get_etc_path, get_path
 from .version_lock import VersionLockFile, loaded_version_lock
@@ -1403,6 +1414,72 @@ def rule_event_search(  # noqa: PLR0913
         raise_client_error("Rule is not a query rule!")
 
 
+@test_group.command("esql-remote-validation")
+@click.option(
+    "--verbosity",
+    type=click.IntRange(0, 1),
+    default=0,
+    help="Set verbosity level: 0 for minimal output, 1 for detailed output.",
+)
+def esql_remote_validation(
+    verbosity: int,
+) -> None:
+    """Search using a rule file against an Elasticsearch instance."""
+
+    rule_collection: RuleCollection = RuleCollection.default().filter(production_filter)
+    esql_rules = [r for r in rule_collection if r.contents.data.type == "esql"]
+
+    click.echo(f"ESQL rules loaded: {len(esql_rules)}")
+
+    if not esql_rules:
+        return
+    # TODO(eric-forte-elastic): @add_client https://github.com/elastic/detection-rules/issues/5156  # noqa: FIX002
+    with get_default_kibana_client() as kibana_client, get_default_elasticsearch_client() as elastic_client:
+        if not kibana_client or not elastic_client:
+            raise_client_error("Skipping remote validation due to missing client")
+
+        failed_count = 0
+        fail_list: list[str] = []
+        max_retries = 3
+        for r in esql_rules:
+            retry_count = 0
+            while retry_count < max_retries:
+                try:
+                    validator = ESQLValidator(r.contents.data.query)  # type: ignore[reportIncompatibleMethodOverride]
+                    _ = validator.remote_validate_rule_contents(kibana_client, elastic_client, r.contents, verbosity)
+                    break
+                except (ValueError, BadRequestError, *ESQL_EXCEPTION_TYPES) as e:  # type: ignore[reportUnknownMemberType]
+                    e_type = type(e)  # type: ignore[reportUnknownMemberType]
+                    if isinstance(e, ESQL_EXCEPTION_TYPES):
+                        click.echo(click.style(f"{r.contents.data.rule_id} ", fg="red", bold=True), nl=False)
+                        _ = e.show()  # type: ignore[reportUnknownMemberType]
+                    else:
+                        click.echo(f"FAILURE: {e_type}: {e}")  # type: ignore[reportUnknownMemberType]
+                    fail_list.append(f"{r.contents.data.rule_id}  FAILURE: {e_type}: {e}")  # type: ignore[reportUnknownMemberType]
+                    failed_count += 1
+                    break
+                except ESConnectionError as e:
+                    retry_count += 1
+                    click.echo(f"Connection error: {e}. Retrying {retry_count}/{max_retries}...")
+                    time.sleep(30)
+                    if retry_count == max_retries:
+                        click.echo(f"FAILURE: {e} after {max_retries} retries")
+                        fail_list.append(f"FAILURE: {e} after {max_retries} retries")
+                        failed_count += 1
+
+        click.echo(f"Total rules: {len(esql_rules)}")
+        click.echo(f"Failed rules: {failed_count}")
+
+        _ = Path("failed_rules.log").write_text("\n".join(fail_list), encoding="utf-8")
+        click.echo("Failed rules written to failed_rules.log")
+        if failed_count > 0:
+            click.echo("Failed rule IDs:")
+            uuids = {line.split()[0] for line in fail_list}
+            click.echo("\n".join(uuids))
+            ctx = click.get_current_context()
+            ctx.exit(1)
+
+
 @test_group.command("rule-survey")
 @click.argument("query", required=False)
 @click.option("--date-range", "-d", type=(str, str), default=("now-7d", "now"), help="Date range to scope search")