|
17 | 17 | import org.apache.http.client.CredentialsProvider; |
18 | 18 | import org.apache.http.impl.client.BasicCredentialsProvider; |
19 | 19 | import org.apache.logging.log4j.core.config.plugins.util.PluginManager; |
| 20 | +import org.apache.lucene.util.IOConsumer; |
20 | 21 | import org.elasticsearch.client.Request; |
21 | 22 | import org.elasticsearch.client.Response; |
22 | 23 | import org.elasticsearch.client.ResponseException; |
|
30 | 31 | import org.elasticsearch.inference.TaskType; |
31 | 32 | import org.elasticsearch.logging.LogManager; |
32 | 33 | import org.elasticsearch.logging.Logger; |
| 34 | +import org.elasticsearch.test.ESTestCase; |
33 | 35 | import org.elasticsearch.test.rest.ESRestTestCase; |
34 | 36 | import org.elasticsearch.xcontent.XContentType; |
35 | 37 |
|
|
43 | 45 | import java.util.List; |
44 | 46 | import java.util.Map; |
45 | 47 | import java.util.Set; |
| 48 | +import java.util.concurrent.Semaphore; |
46 | 49 | import java.util.regex.Matcher; |
47 | 50 | import java.util.regex.Pattern; |
48 | 51 | import java.util.stream.Collectors; |
|
55 | 58 | import static org.elasticsearch.xpack.esql.EsqlTestUtils.reader; |
56 | 59 |
|
57 | 60 | public class CsvTestsDataLoader { |
| 61 | + private static final int PARALLEL_THREADS = 10; |
58 | 62 | private static final int BULK_DATA_SIZE = 100_000; |
59 | 63 | private static final TestDataset EMPLOYEES = new TestDataset("employees", "mapping-default.json", "employees.csv").noSubfields(); |
60 | 64 | private static final TestDataset EMPLOYEES_INCOMPATIBLE = new TestDataset( |
@@ -429,18 +433,42 @@ private static void loadDataSetIntoEs( |
429 | 433 | IndexCreator indexCreator |
430 | 434 | ) throws IOException { |
431 | 435 | Logger logger = LogManager.getLogger(CsvTestsDataLoader.class); |
| 436 | + List<TestDataset> datasets = availableDatasetsForEs( |
| 437 | + supportsIndexModeLookup, |
| 438 | + supportsSourceFieldMapping, |
| 439 | + inferenceEnabled, |
| 440 | + timeSeriesOnly |
| 441 | + ).stream().toList(); |
| 442 | + |
| 443 | + logger.info("Creating test indices"); |
| 444 | + executeInParallel(datasets, dataset -> createIndex(client, dataset, indexCreator), "Failed to create indices in parallel"); |
432 | 445 |
|
433 | | - Set<String> loadedDatasets = new HashSet<>(); |
434 | 446 | logger.info("Loading test datasets"); |
435 | | - for (var dataset : availableDatasetsForEs(supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, timeSeriesOnly)) { |
436 | | - load(client, dataset, logger, indexCreator); |
437 | | - loadedDatasets.add(dataset.indexName); |
438 | | - } |
439 | | - forceMerge(client, loadedDatasets, logger); |
| 447 | + executeInParallel(datasets, dataset -> loadData(client, dataset, logger), "Failed to load data in parallel"); |
| 448 | + |
| 449 | + forceMerge(client, datasets.stream().map(d -> d.indexName).collect(Collectors.toSet()), logger); |
| 450 | + |
440 | 451 | logger.info("Loading enrich policies"); |
441 | | - for (var policy : ENRICH_POLICIES) { |
442 | | - loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger); |
443 | | - } |
| 452 | + executeInParallel( |
| 453 | + ENRICH_POLICIES, |
| 454 | + policy -> loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger), |
| 455 | + "Failed to load enrich policies in parallel" |
| 456 | + ); |
| 457 | + |
| 458 | + } |
| 459 | + |
| 460 | + private static <T> void executeInParallel(List<T> items, IOConsumer<T> consumer, String errorMessage) { |
| 461 | + Semaphore semaphore = new Semaphore(PARALLEL_THREADS); |
| 462 | + ESTestCase.runInParallel(items.size(), i -> { |
| 463 | + try { |
| 464 | + semaphore.acquire(); |
| 465 | + consumer.accept(items.get(i)); |
| 466 | + } catch (IOException | InterruptedException e) { |
| 467 | + throw new RuntimeException(errorMessage, e); |
| 468 | + } finally { |
| 469 | + semaphore.release(); |
| 470 | + } |
| 471 | + }); |
444 | 472 | } |
445 | 473 |
|
446 | 474 | public static void createInferenceEndpoints(RestClient client) throws IOException { |
@@ -598,12 +626,14 @@ private static URL getResource(String name) { |
598 | 626 | return result; |
599 | 627 | } |
600 | 628 |
|
601 | | - private static void load(RestClient client, TestDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException { |
602 | | - logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName); |
| 629 | + private static void createIndex(RestClient client, TestDataset dataset, IndexCreator indexCreator) throws IOException { |
603 | 630 | URL mapping = getResource("/" + dataset.mappingFileName); |
604 | 631 | Settings indexSettings = dataset.readSettingsFile(); |
605 | 632 | indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings); |
| 633 | + } |
606 | 634 |
|
| 635 | + private static void loadData(RestClient client, TestDataset dataset, Logger logger) throws IOException { |
| 636 | + logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName); |
607 | 637 | // Some examples only test that the query and mappings are valid, and don't need example data. Use .noData() for those |
608 | 638 | if (dataset.dataFileName != null) { |
609 | 639 | URL data = getResource("/data/" + dataset.dataFileName); |
|
0 commit comments