elastic
diff --git a/‎docs/reference/esql/esql-get-started.asciidoc‎
Lines changed: 20 additions & 50 deletions b/‎docs/reference/esql/esql-get-started.asciidoc‎
Lines changed: 20 additions & 50 deletions
diff --git a/‎x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java‎
Lines changed: 9 additions & 2 deletions b/‎x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips.csv‎
Lines changed: 6 additions & 0 deletions b/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips.csv‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec‎
Lines changed: 42 additions & 0 deletions b/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec‎
Lines changed: 27 additions & 0 deletions b/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec‎
Lines changed: 65 additions & 1 deletion b/‎x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec‎
Lines changed: 65 additions & 1 deletion
@@ -39,7 +39,7 @@ This query returns up to 500 documents from the `sample_data` index:
 
 [source,esql]
 ----
-FROM sample_data
+include::{esql-specs}/docs.csv-spec[tag=gs-from]
 ----
 
 Each column corresponds to a field, and can be accessed by the name of that
@@ -52,7 +52,7 @@ previous one:
 
 [source,esql]
 ----
-from sample_data
+include::{esql-specs}/docs.csv-spec[tag=gs-from-lowercase]
 ----
 ====
 
@@ -73,8 +73,7 @@ that are returned, up to a maximum of 10,000 rows:
 
 [source,esql]
 ----
-FROM sample_data
-| LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-limit]
 ----
 
 [TIP]
@@ -84,7 +83,7 @@ have to. The following query is identical to the previous one:
 
 [source,esql]
 ----
-FROM sample_data | LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-limit-one-line]
 ----
 ====
 
@@ -100,8 +99,7 @@ sort rows on one or more columns:
 
 [source,esql]
 ----
-FROM sample_data
-| SORT @timestamp DESC
+include::{esql-specs}/docs.csv-spec[tag=gs-sort]
 ----
 
 [discrete]
@@ -113,16 +111,14 @@ events with a duration longer than 5ms:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE event.duration > 5000000
+include::{esql-specs}/where.csv-spec[tag=gs-where]
 ----
 
 `WHERE` supports several <<esql-operators,operators>>. For example, you can use <<esql-like-operator>> to run a wildcard query against the `message` column:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE message LIKE "Connected*"
+include::{esql-specs}/where-like.csv-spec[tag=gs-like]
 ----
 
 [discrete]
@@ -149,9 +145,7 @@ result set to 3 rows:
 
 [source,esql]
 ----
-FROM sample_data
-| SORT @timestamp DESC
-| LIMIT 3
+include::{esql-specs}/docs.csv-spec[tag=gs-chaining]
 ----
 
 NOTE: The order of processing commands is important. First limiting the result
@@ -169,8 +163,7 @@ other words: `event.duration` converted from nanoseconds to milliseconds.
 
 [source,esql]
 ----
-FROM sample_data
-| EVAL duration_ms = event.duration / 1000000.0
+include::{esql-specs}/eval.csv-spec[tag=gs-eval]
 ----
 
 `EVAL` supports several <<esql-functions,functions>>. For example, to round a
@@ -179,8 +172,7 @@ number to the closest number with the specified number of digits, use the
 
 [source,esql]
 ----
-FROM sample_data
-| EVAL duration_ms = ROUND(event.duration / 1000000.0, 1)
+include::{esql-specs}/eval.csv-spec[tag=gs-round]
 ----
 
 [discrete]
@@ -193,25 +185,22 @@ example, the median duration:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration)
+include::{esql-specs}/stats.csv-spec[tag=gs-stats]
 ----
 
 You can calculate multiple stats with one command:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration), max_duration = MAX(event.duration)
+include::{esql-specs}/stats.csv-spec[tag=gs-two-stats]
 ----
 
 Use `BY` to group calculated stats by one or more columns. For example, to
 calculate the median duration per client IP:
 
 [source,esql]
 ----
-FROM sample_data
-| STATS median_duration = MEDIAN(event.duration) BY client.ip
+include::{esql-specs}/stats.csv-spec[tag=gs-stats-by]
 ----
 
 [discrete]
@@ -227,30 +216,22 @@ For example, to create hourly buckets for the data on October 23rd:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket]
 ----
 
 Combine `AUTO_BUCKET` with <<esql-stats-by>> to create a histogram. For example,
 to count the number of events per hour:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, event.duration
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS COUNT(*) BY bucket
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket-stats-by]
 ----
 
 Or the median duration per hour:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, event.duration
-| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
-| STATS median_duration = MEDIAN(event.duration) BY bucket
+include::{esql-specs}/date.csv-spec[tag=gs-auto_bucket-stats-by-median]
 ----
 
 [discrete]
@@ -273,10 +254,7 @@ command:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, client.ip, event.duration
-| EVAL client.ip = TO_STRING(client.ip)
-| ENRICH clientip_policy ON client.ip WITH env
+include::{esql-specs}/enrich.csv-spec[tag=gs-enrich]
 ----
 
 You can use the new `env` column that's added by the `ENRICH` command in
@@ -285,11 +263,7 @@ environment:
 
 [source,esql]
 ----
-FROM sample_data
-| KEEP @timestamp, client.ip, event.duration
-| EVAL client.ip = TO_STRING(client.ip)
-| ENRICH clientip_policy ON client.ip WITH env
-| STATS median_duration = MEDIAN(event.duration) BY env
+include::{esql-specs}/enrich.csv-spec[tag=gs-enrich-stats-by]
 ----
 
 For more about data enrichment with {esql}, refer to <<esql-enrich-data>>.
@@ -321,8 +295,7 @@ string, you can use the following `DISSECT` command:
 
 [source,esql]
 ----
-FROM sample_data
-| DISSECT message "Connected to %{server.ip}"
+include::{esql-specs}/dissect.csv-spec[tag=gs-dissect]
 ----
 
 This adds a `server.ip` column to those rows that have a `message` that matches
@@ -334,10 +307,7 @@ has accepted:
 
 [source,esql]
 ----
-FROM sample_data
-| WHERE STARTS_WITH(message, "Connected to")
-| DISSECT message "Connected to %{server.ip}"
-| STATS COUNT(*) BY server.ip
+include::{esql-specs}/dissect.csv-spec[tag=gs-dissect-stats-by]
 ----
 
 For more about data processing with {esql}, refer to
 
@@ -52,6 +52,8 @@ public class CsvTestsDataLoader {
     private static final TestsDataset APPS = new TestsDataset("apps", "mapping-apps.json", "apps.csv");
     private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv");
     private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv");
+    private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv");
+    private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv");
     private static final TestsDataset AIRPORTS = new TestsDataset("airports", "mapping-airports.json", "airports.csv");
     private static final TestsDataset AIRPORTS_WEB = new TestsDataset("airports_web", "mapping-airports_web.json", "airports_web.csv");
 
@@ -66,15 +68,20 @@ public class CsvTestsDataLoader {
         LANGUAGES,
         UL_LOGS.indexName,
         UL_LOGS,
+        SAMPLE_DATA.indexName,
+        SAMPLE_DATA,
+        CLIENT_IPS.indexName,
+        CLIENT_IPS,
         AIRPORTS.indexName,
         AIRPORTS,
         AIRPORTS_WEB.indexName,
         AIRPORTS_WEB
     );
 
-    private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enricy-policy-languages.json");
+    private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
+    private static final EnrichConfig CLIENT_IPS_ENRICH = new EnrichConfig("clientip_policy", "enrich-policy-clientips.json");
 
-    public static final List<EnrichConfig> ENRICH_POLICIES = List.of(LANGUAGES_ENRICH);
+    public static final List<EnrichConfig> ENRICH_POLICIES = List.of(LANGUAGES_ENRICH, CLIENT_IPS_ENRICH);
 
     /**
      * <p>
 
@@ -0,0 +1,6 @@
+client_ip:keyword,env:keyword
+172.21.0.5,Development
+172.21.2.113,QA
+172.21.2.162,QA
+172.21.3.15,Production
+172.21.3.16,Production
@@ -725,3 +725,45 @@ birth_date:datetime
 1952-02-27T00:00:00.000Z
 1953-04-21T00:00:00.000Z
 ;
+
+docsGettingStartedAutoBucket
+// tag::gs-auto_bucket[]
+FROM sample_data
+| KEEP @timestamp
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+// end::gs-auto_bucket[]
+| LIMIT 0
+;
+
+@timestamp:date | bucket:date
+;
+
+docsGettingStartedAutoBucketStatsBy
+// tag::gs-auto_bucket-stats-by[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+| STATS COUNT(*) BY bucket
+// end::gs-auto_bucket-stats-by[]
+| SORT bucket
+;
+
+COUNT(*):long | bucket:date
+2              |2023-10-23T12:00:00.000Z
+5              |2023-10-23T13:00:00.000Z
+;
+
+docsGettingStartedAutoBucketStatsByMedian
+// tag::gs-auto_bucket-stats-by-median[]
+FROM sample_data
+| KEEP @timestamp, event_duration
+| EVAL bucket = AUTO_BUCKET (@timestamp, 24, "2023-10-23T00:00:00Z", "2023-10-23T23:59:59Z")
+| STATS median_duration = MEDIAN(event_duration) BY bucket
+// end::gs-auto_bucket-stats-by-median[]
+| SORT bucket
+;
+
+median_duration:double | bucket:date
+3107561.0              |2023-10-23T12:00:00.000Z
+1756467.0              |2023-10-23T13:00:00.000Z
+;
@@ -159,6 +159,33 @@ emp_no:integer | a:keyword            | b:keyword         | c:keyword
 10006          | [Principal, Senior]  | [Support, Team]   | [Engineer, Lead]
 ;
 
+docsGettingStartedDissect
+// tag::gs-dissect[]
+FROM sample_data
+| DISSECT message "Connected to %{server_ip}"
+// end::gs-dissect[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword | server_ip:keyword
+;
+
+docsGettingStartedDissectStatsBy
+// tag::gs-dissect-stats-by[]
+FROM sample_data
+| WHERE STARTS_WITH(message, "Connected to")
+| DISSECT message "Connected to %{server_ip}"
+| STATS COUNT(*) BY server_ip
+// end::gs-dissect-stats-by[]
+| SORT server_ip
+;
+
+COUNT(*):long  | server_ip:keyword
+1              |10.1.0.1       
+1              |10.1.0.2       
+1              |10.1.0.3      
+;
+
 emptyPattern#[skip:-8.11.99]
 ROW a="b c d"| DISSECT a "%{b} %{} %{d}";
 
 
@@ -650,4 +650,68 @@ FROM employees
 first_name:keyword  | last_name:keyword
 Alejandro      |McAlpine     
 // end::rlike-result[]
-;
+;
+
+docsGettingStartedFrom
+// tag::gs-from[]
+FROM sample_data
+// end::gs-from[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedFromLowercase
+// tag::gs-from-lowercase[]
+from sample_data
+// end::gs-from-lowercase[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedLimit
+// tag::gs-limit[]
+FROM sample_data
+| LIMIT 3
+// end::gs-limit[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedLimitOneLine
+// tag::gs-limit-one-line[]
+FROM sample_data | LIMIT 3
+// end::gs-limit-one-line[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedSort
+// tag::gs-sort[]
+FROM sample_data
+| SORT @timestamp DESC
+// end::gs-sort[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;
+
+docsGettingStartedChaining
+// tag::gs-chaining[]
+FROM sample_data
+| SORT @timestamp DESC
+| LIMIT 3
+// end::gs-chaining[]
+| LIMIT 0
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+;