|
17 | 17 | import org.apache.http.client.CredentialsProvider; |
18 | 18 | import org.apache.http.impl.client.BasicCredentialsProvider; |
19 | 19 | import org.apache.logging.log4j.core.config.plugins.util.PluginManager; |
20 | | -import org.apache.lucene.util.IOConsumer; |
21 | 20 | import org.elasticsearch.client.Request; |
22 | 21 | import org.elasticsearch.client.Response; |
23 | 22 | import org.elasticsearch.client.ResponseException; |
|
31 | 30 | import org.elasticsearch.inference.TaskType; |
32 | 31 | import org.elasticsearch.logging.LogManager; |
33 | 32 | import org.elasticsearch.logging.Logger; |
34 | | -import org.elasticsearch.test.ESTestCase; |
35 | 33 | import org.elasticsearch.test.rest.ESRestTestCase; |
36 | 34 | import org.elasticsearch.xcontent.XContentType; |
37 | 35 |
|
|
45 | 43 | import java.util.List; |
46 | 44 | import java.util.Map; |
47 | 45 | import java.util.Set; |
48 | | -import java.util.concurrent.Semaphore; |
49 | 46 | import java.util.regex.Matcher; |
50 | 47 | import java.util.regex.Pattern; |
51 | 48 | import java.util.stream.Collectors; |
|
58 | 55 | import static org.elasticsearch.xpack.esql.EsqlTestUtils.reader; |
59 | 56 |
|
60 | 57 | public class CsvTestsDataLoader { |
61 | | - private static final int PARALLEL_THREADS = 10; |
62 | 58 | private static final int BULK_DATA_SIZE = 100_000; |
63 | 59 | private static final TestDataset EMPLOYEES = new TestDataset("employees", "mapping-default.json", "employees.csv").noSubfields(); |
64 | 60 | private static final TestDataset EMPLOYEES_INCOMPATIBLE = new TestDataset( |
@@ -433,42 +429,18 @@ private static void loadDataSetIntoEs( |
433 | 429 | IndexCreator indexCreator |
434 | 430 | ) throws IOException { |
435 | 431 | Logger logger = LogManager.getLogger(CsvTestsDataLoader.class); |
436 | | - List<TestDataset> datasets = availableDatasetsForEs( |
437 | | - supportsIndexModeLookup, |
438 | | - supportsSourceFieldMapping, |
439 | | - inferenceEnabled, |
440 | | - timeSeriesOnly |
441 | | - ).stream().toList(); |
442 | | - |
443 | | - logger.info("Creating test indices"); |
444 | | - executeInParallel(datasets, dataset -> createIndex(client, dataset, indexCreator), "Failed to create indices in parallel"); |
445 | 432 |
|
| 433 | + Set<String> loadedDatasets = new HashSet<>(); |
446 | 434 | logger.info("Loading test datasets"); |
447 | | - executeInParallel(datasets, dataset -> loadData(client, dataset, logger), "Failed to load data in parallel"); |
448 | | - |
449 | | - forceMerge(client, datasets.stream().map(d -> d.indexName).collect(Collectors.toSet()), logger); |
450 | | - |
| 435 | + for (var dataset : availableDatasetsForEs(supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, timeSeriesOnly)) { |
| 436 | + load(client, dataset, logger, indexCreator); |
| 437 | + loadedDatasets.add(dataset.indexName); |
| 438 | + } |
| 439 | + forceMerge(client, loadedDatasets, logger); |
451 | 440 | logger.info("Loading enrich policies"); |
452 | | - executeInParallel( |
453 | | - ENRICH_POLICIES, |
454 | | - policy -> loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger), |
455 | | - "Failed to load enrich policies in parallel" |
456 | | - ); |
457 | | - |
458 | | - } |
459 | | - |
460 | | - private static <T> void executeInParallel(List<T> items, IOConsumer<T> consumer, String errorMessage) { |
461 | | - Semaphore semaphore = new Semaphore(PARALLEL_THREADS); |
462 | | - ESTestCase.runInParallel(items.size(), i -> { |
463 | | - try { |
464 | | - semaphore.acquire(); |
465 | | - consumer.accept(items.get(i)); |
466 | | - } catch (IOException | InterruptedException e) { |
467 | | - throw new RuntimeException(errorMessage, e); |
468 | | - } finally { |
469 | | - semaphore.release(); |
470 | | - } |
471 | | - }); |
| 441 | + for (var policy : ENRICH_POLICIES) { |
| 442 | + loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger); |
| 443 | + } |
472 | 444 | } |
473 | 445 |
|
474 | 446 | public static void createInferenceEndpoints(RestClient client) throws IOException { |
@@ -626,14 +598,12 @@ private static URL getResource(String name) { |
626 | 598 | return result; |
627 | 599 | } |
628 | 600 |
|
629 | | - private static void createIndex(RestClient client, TestDataset dataset, IndexCreator indexCreator) throws IOException { |
| 601 | + private static void load(RestClient client, TestDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException { |
| 602 | + logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName); |
630 | 603 | URL mapping = getResource("/" + dataset.mappingFileName); |
631 | 604 | Settings indexSettings = dataset.readSettingsFile(); |
632 | 605 | indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings); |
633 | | - } |
634 | 606 |
|
635 | | - private static void loadData(RestClient client, TestDataset dataset, Logger logger) throws IOException { |
636 | | - logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName); |
637 | 607 | // Some examples only test that the query and mappings are valid, and don't need example data. Use .noData() for those |
638 | 608 | if (dataset.dataFileName != null) { |
639 | 609 | URL data = getResource("/data/" + dataset.dataFileName); |
|
0 commit comments