Skip to content

Commit 329e2c1

Browse files
authored
ESQL: Begin using .. for ranges (elastic#134460)
In our tests for loading enrich policies we were embedding json into the csv. That's ugly looking but was fine for a while. But we're going to have an actual syntax for range literals in ESQL soon. This ports the tests to using that syntax. ``` -{"gte": "1900-01-01"\, "lt":"1910-01-01"}, 1900, Edwardian Era +1900-01-01..1910-01-01 , 1900, Edwardian Era ```
1 parent f8c6364 commit 329e2c1

File tree

4 files changed

+60
-41
lines changed

4 files changed

+60
-41
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
import java.util.Map;
4747
import java.util.Set;
4848
import java.util.concurrent.Semaphore;
49+
import java.util.regex.Matcher;
50+
import java.util.regex.Pattern;
4951
import java.util.stream.Collectors;
5052
import java.util.stream.Stream;
5153

@@ -637,6 +639,8 @@ public static String readTextFile(URL resource) throws IOException {
637639
}
638640
}
639641

642+
record ColumnHeader(String name, String type) {}
643+
640644
@SuppressWarnings("unchecked")
641645
/**
642646
* Loads a classic csv file in an ES cluster using a RestClient.
@@ -654,12 +658,13 @@ public static String readTextFile(URL resource) throws IOException {
654658
*/
655659
private static void loadCsvData(RestClient client, String indexName, URL resource, boolean allowSubFields, Logger logger)
656660
throws IOException {
661+
657662
ArrayList<String> failures = new ArrayList<>();
658663
StringBuilder builder = new StringBuilder();
659664
try (BufferedReader reader = reader(resource)) {
660665
String line;
661666
int lineNumber = 1;
662-
String[] columns = null; // list of column names. If one column name contains dot, it is a subfield and its value will be null
667+
ColumnHeader[] columns = null; // Column info. If one column name contains dot, it is a subfield and its value will be null
663668
List<Integer> subFieldsIndices = new ArrayList<>(); // list containing the index of a subfield in "columns" String[]
664669

665670
while ((line = reader.readLine()) != null) {
@@ -669,15 +674,16 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc
669674
String[] entries = multiValuesAwareCsvToStringArray(line, lineNumber);
670675
// the schema row
671676
if (columns == null) {
672-
columns = new String[entries.length];
677+
columns = new ColumnHeader[entries.length];
673678
for (int i = 0; i < entries.length; i++) {
674679
int split = entries[i].indexOf(':');
675680
if (split < 0) {
676-
columns[i] = entries[i].trim();
681+
columns[i] = new ColumnHeader(entries[i].trim(), null);
677682
} else {
678683
String name = entries[i].substring(0, split).trim();
684+
String type = entries[i].substring(split + 1).trim();
679685
if (allowSubFields || name.contains(".") == false) {
680-
columns[i] = name;
686+
columns[i] = new ColumnHeader(name, type);
681687
} else {// if it's a subfield, ignore it in the _bulk request
682688
columns[i] = null;
683689
subFieldsIndices.add(i);
@@ -707,7 +713,7 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc
707713
// Value is null, skip
708714
continue;
709715
}
710-
if ("_id".equals(columns[i])) {
716+
if (columns[i] != null && "_id".equals(columns[i].name)) {
711717
// Value is an _id
712718
idField = entries[i];
713719
continue;
@@ -722,17 +728,17 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc
722728
if (multiValues.length > 1) {
723729
StringBuilder rowStringValue = new StringBuilder("[");
724730
for (String s : multiValues) {
725-
rowStringValue.append(quoteIfNecessary(s)).append(",");
731+
rowStringValue.append(toJson(columns[i].type, s)).append(",");
726732
}
727733
// remove the last comma and put a closing bracket instead
728734
rowStringValue.replace(rowStringValue.length() - 1, rowStringValue.length(), "]");
729735
entries[i] = rowStringValue.toString();
730736
} else {
731-
entries[i] = quoteIfNecessary(entries[i]);
737+
entries[i] = toJson(columns[i].type, entries[i]);
732738
}
733739
// replace any escaped commas with single comma
734740
entries[i] = entries[i].replace(ESCAPED_COMMA_SEQUENCE, ",");
735-
row.append("\"").append(columns[i]).append("\":").append(entries[i]);
741+
row.append("\"").append(columns[i].name).append("\":").append(entries[i]);
736742
} catch (Exception e) {
737743
throw new IllegalArgumentException(
738744
format(
@@ -770,10 +776,23 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc
770776
}
771777
}
772778

773-
private static String quoteIfNecessary(String value) {
774-
boolean isQuoted = (value.startsWith("\"") && value.endsWith("\"")) || (value.startsWith("{") && value.endsWith("}"));
775-
boolean isNumeric = value.matches(NUMERIC_REGEX);
776-
return isQuoted || isNumeric ? value : "\"" + value + "\"";
779+
private static final Pattern RANGE_PATTERN = Pattern.compile("([0-9\\-.Z:]+)\\.\\.([0-9\\-.Z:]+)");
780+
781+
private static String toJson(String type, String value) {
782+
return switch (type == null ? "" : type) {
783+
case "date_range", "double_range", "integer_range" -> {
784+
Matcher m = RANGE_PATTERN.matcher(value);
785+
if (m.matches() == false) {
786+
throw new IllegalArgumentException("can't parse range: " + value);
787+
}
788+
yield "{\"gte\": \"" + m.group(1) + "\", \"lt\": \"" + m.group(2) + "\"}";
789+
}
790+
default -> {
791+
boolean isQuoted = (value.startsWith("\"") && value.endsWith("\"")) || (value.startsWith("{") && value.endsWith("}"));
792+
boolean isNumeric = value.matches(NUMERIC_REGEX);
793+
yield isQuoted || isNumeric ? value : "\"" + value + "\"";
794+
}
795+
};
777796
}
778797

779798
private static void sendBulkRequest(String indexName, StringBuilder builder, RestClient client, Logger logger, List<String> failures)
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
age_range:integer_range, description:keyword
2-
{"gte": 0\, "lt":2}, Baby
3-
{"gte": 2\, "lt":4}, Toddler
4-
{"gte": 3\, "lt":5}, Preschooler
5-
{"gte": 5\, "lt":12}, Child
6-
{"gte": 13\, "lt":20}, Adolescent
7-
{"gte": 20\, "lt":40}, Young Adult
8-
{"gte": 40\, "lt":60}, Middle-aged
9-
{"gte": 60\, "lt":80}, Senior
10-
{"gte": 80\, "lt":100}, Elderly
11-
{"gte": 100\, "lt":200}, Incredible
2+
0..2 , Baby
3+
2..4 , Toddler
4+
3..5 , Preschooler
5+
5..12 , Child
6+
13..20 , Adolescent
7+
20..40 , Young Adult
8+
40..60 , Middle-aged
9+
60..80 , Senior
10+
80..100, Elderly
11+
100..200, Incredible
Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
date_range:date_range, decade:integer, description:keyword
2-
{"gte": "1900-01-01"\, "lt":"1910-01-01"}, 1900, Edwardian Era
3-
{"gte": "1910-01-01"\, "lt":"1920-01-01"}, 1910, Ragtime Era
4-
{"gte": "1920-01-01"\, "lt":"1930-01-01"}, 1920, Roaring Twenties
5-
{"gte": "1930-01-01"\, "lt":"1940-01-01"}, 1930, Dirty Thirties
6-
{"gte": "1940-01-01"\, "lt":"1950-01-01"}, 1940, Fabulous Forties
7-
{"gte": "1950-01-01"\, "lt":"1960-01-01"}, 1950, Nifty Fifties
8-
{"gte": "1960-01-01"\, "lt":"1970-01-01"}, 1960, Swinging Sixties
9-
{"gte": "1970-01-01"\, "lt":"1980-01-01"}, 1970, Groovy Seventies
10-
{"gte": "1980-01-01"\, "lt":"1990-01-01"}, 1980, Radical Eighties
11-
{"gte": "1990-01-01"\, "lt":"2000-01-01"}, 1990, Nineties Nostalgia
12-
{"gte": "2000-01-01"\, "lt":"2010-01-01"}, 2000, Innovation Explosion Decade
13-
{"gte": "2010-01-01"\, "lt":"2020-01-01"}, 2010, Renaissance Decade
14-
{"gte": "2020-01-01"\, "lt":"2030-01-01"}, 2020, Empowerment Era
1+
date_range:date_range, decade:integer, description:keyword
2+
1900-01-01..1910-01-01, 1900, Edwardian Era
3+
1910-01-01..1920-01-01, 1910, Ragtime Era
4+
1920-01-01..1930-01-01, 1920, Roaring Twenties
5+
1930-01-01..1940-01-01, 1930, Dirty Thirties
6+
1940-01-01..1950-01-01, 1940, Fabulous Forties
7+
1950-01-01..1960-01-01, 1950, Nifty Fifties
8+
1960-01-01..1970-01-01, 1960, Swinging Sixties
9+
1970-01-01..1980-01-01, 1970, Groovy Seventies
10+
1980-01-01..1990-01-01, 1980, Radical Eighties
11+
1990-01-01..2000-01-01, 1990, Nineties Nostalgia
12+
2000-01-01..2010-01-01, 2000, Innovation Explosion Decade
13+
2010-01-01..2020-01-01, 2010, Renaissance Decade
14+
2020-01-01..2030-01-01, 2020, Empowerment Era
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
height_range:double_range, description:keyword
2-
{"gte": 0\, "lt": 1.5}, Very Short
3-
{"gte": 1.5\, "lt": 1.6}, Short
4-
{"gte": 1.6\, "lt": 1.8}, Medium Height
5-
{"gte": 1.8\, "lt": 2.0}, Tall
6-
{"gte": 2.0\, "lt": 5.0}, Very Tall
2+
0.0..1.5, Very Short
3+
1.5..1.6, Short
4+
1.6..1.8, Medium Height
5+
1.8..2.0, Tall
6+
2.0..5.0, Very Tall

0 commit comments

Comments
 (0)