4242import java .util .Map ;
4343import java .util .Set ;
4444
45- import static org .elasticsearch .common .Strings .delimitedListToStringArray ;
4645import static org .elasticsearch .common .logging .LoggerMessageFormat .format ;
46+ import static org .elasticsearch .xpack .esql .CsvTestUtils .COMMA_ESCAPING_REGEX ;
47+ import static org .elasticsearch .xpack .esql .CsvTestUtils .ESCAPED_COMMA_SEQUENCE ;
4748import static org .elasticsearch .xpack .esql .CsvTestUtils .multiValuesAwareCsvToStringArray ;
4849
4950public class CsvTestsDataLoader {
@@ -137,17 +138,33 @@ public static void main(String[] args) throws IOException {
137138 }
138139
139140 try (RestClient client = builder .build ()) {
140- loadDataSetIntoEs (client );
141+ loadDataSetIntoEs (client , (restClient , indexName , indexMapping ) -> {
142+ Request request = new Request ("PUT" , "/" + indexName );
143+ request .setJsonEntity ("{\" mappings\" :" + indexMapping + "}" );
144+ restClient .performRequest (request );
145+ });
141146 }
142147 }
143148
149+ private static void loadDataSetIntoEs (RestClient client , IndexCreator indexCreator ) throws IOException {
150+ loadDataSetIntoEs (client , LogManager .getLogger (CsvTestsDataLoader .class ), indexCreator );
151+ }
152+
144153 public static void loadDataSetIntoEs (RestClient client ) throws IOException {
145- loadDataSetIntoEs (client , LogManager .getLogger (CsvTestsDataLoader .class ));
154+ loadDataSetIntoEs (client , (restClient , indexName , indexMapping ) -> {
155+ ESRestTestCase .createIndex (restClient , indexName , null , indexMapping , null );
156+ });
146157 }
147158
148159 public static void loadDataSetIntoEs (RestClient client , Logger logger ) throws IOException {
160+ loadDataSetIntoEs (client , logger , (restClient , indexName , indexMapping ) -> {
161+ ESRestTestCase .createIndex (restClient , indexName , null , indexMapping , null );
162+ });
163+ }
164+
165+ private static void loadDataSetIntoEs (RestClient client , Logger logger , IndexCreator indexCreator ) throws IOException {
149166 for (var dataSet : CSV_DATASET_MAP .values ()) {
150- load (client , dataSet .indexName , "/" + dataSet .mappingFileName , "/" + dataSet .dataFileName , logger );
167+ load (client , dataSet .indexName , "/" + dataSet .mappingFileName , "/" + dataSet .dataFileName , logger , indexCreator );
151168 }
152169 forceMerge (client , CSV_DATASET_MAP .keySet (), logger );
153170 for (var policy : ENRICH_POLICIES ) {
@@ -169,7 +186,14 @@ private static void loadEnrichPolicy(RestClient client, String policyName, Strin
169186 client .performRequest (request );
170187 }
171188
172- private static void load (RestClient client , String indexName , String mappingName , String dataName , Logger logger ) throws IOException {
189+ private static void load (
190+ RestClient client ,
191+ String indexName ,
192+ String mappingName ,
193+ String dataName ,
194+ Logger logger ,
195+ IndexCreator indexCreator
196+ ) throws IOException {
173197 URL mapping = CsvTestsDataLoader .class .getResource (mappingName );
174198 if (mapping == null ) {
175199 throw new IllegalArgumentException ("Cannot find resource " + mappingName );
@@ -178,14 +202,10 @@ private static void load(RestClient client, String indexName, String mappingName
178202 if (data == null ) {
179203 throw new IllegalArgumentException ("Cannot find resource " + dataName );
180204 }
181- createTestIndex (client , indexName , readTextFile (mapping ));
205+ indexCreator . createIndex (client , indexName , readTextFile (mapping ));
182206 loadCsvData (client , indexName , data , CsvTestsDataLoader ::createParser , logger );
183207 }
184208
185- private static void createTestIndex (RestClient client , String indexName , String mapping ) throws IOException {
186- ESRestTestCase .createIndex (client , indexName , null , mapping , null );
187- }
188-
189209 public static String readTextFile (URL resource ) throws IOException {
190210 try (BufferedReader reader = TestUtils .reader (resource )) {
191211 StringBuilder b = new StringBuilder ();
@@ -198,6 +218,20 @@ public static String readTextFile(URL resource) throws IOException {
198218 }
199219
200220 @ SuppressWarnings ("unchecked" )
221+ /**
222+ * Loads a classic csv file in an ES cluster using a RestClient.
223+ * The structure of the file is as follows:
224+ * - commented lines should start with "//"
225+ * - the first non-comment line from the file is the schema line (comma separated field_name:ES_data_type elements)
226+ * - sub-fields should be placed after the root field using a dot notation for the name:
227+ * root_field:long,root_field.sub_field:integer
228+ * - a special _id field can be used in the schema and the values of this field will be used in the bulk request as actual doc ids
229+ * - all subsequent non-comment lines represent the values that will be used to build the _bulk request
230+ * - an empty string "" refers to a null value
231+ * - a value starting with an opening square bracket "[" and ending with a closing square bracket "]" refers to a multi-value field
232+ * - multi-values are comma separated
233+ * - commas inside multivalue fields can be escaped with \ (backslash) character
234+ */
201235 private static void loadCsvData (
202236 RestClient client ,
203237 String indexName ,
@@ -278,9 +312,11 @@ private static void loadCsvData(
278312 if (i > 0 && row .length () > 0 ) {
279313 row .append ("," );
280314 }
281- if (entries [i ].contains ("," )) {// multi-value
315+ // split on comma ignoring escaped commas
316+ String [] multiValues = entries [i ].split (COMMA_ESCAPING_REGEX );
317+ if (multiValues .length > 0 ) {// multi-value
282318 StringBuilder rowStringValue = new StringBuilder ("[" );
283- for (String s : delimitedListToStringArray ( entries [ i ], "," ) ) {
319+ for (String s : multiValues ) {
284320 rowStringValue .append ("\" " + s + "\" ," );
285321 }
286322 // remove the last comma and put a closing bracket instead
@@ -289,6 +325,8 @@ private static void loadCsvData(
289325 } else {
290326 entries [i ] = "\" " + entries [i ] + "\" " ;
291327 }
328+ // replace any escaped commas with single comma
329+ entries [i ].replace (ESCAPED_COMMA_SEQUENCE , "," );
292330 row .append ("\" " + columns [i ] + "\" :" + entries [i ]);
293331 } catch (Exception e ) {
294332 throw new IllegalArgumentException (
@@ -356,4 +394,8 @@ private static XContentParser createParser(XContent xContent, InputStream data)
356394 public record TestsDataset (String indexName , String mappingFileName , String dataFileName ) {}
357395
358396 public record EnrichConfig (String policyName , String policyFileName ) {}
397+
398+ private interface IndexCreator {
399+ void createIndex (RestClient client , String indexName , String mapping ) throws IOException ;
400+ }
359401}
0 commit comments