Skip to content

Commit 646875a

Browse files
authored
Merge branch 'main' into wire_cps_check
2 parents d9b23ba + da8b9c6 commit 646875a

File tree

13 files changed

+516
-125
lines changed

13 files changed

+516
-125
lines changed

docs/changelog/137220.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137220
2+
summary: Skip dataframes when disabled
3+
area: Machine Learning
4+
type: bug
5+
issues: []

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/MSBitToInt4ESNextOSQVectorsScorer.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import jdk.incubator.vector.LongVector;
1515
import jdk.incubator.vector.ShortVector;
1616
import jdk.incubator.vector.VectorOperators;
17-
import jdk.incubator.vector.VectorSpecies;
1817

1918
import org.apache.lucene.index.VectorSimilarityFunction;
2019
import org.apache.lucene.store.IndexInput;
@@ -31,23 +30,6 @@
3130
/** Panamized scorer for quantized vectors stored as a {@link MemorySegment}. */
3231
final class MSBitToInt4ESNextOSQVectorsScorer extends MemorySegmentESNextOSQVectorsScorer.MemorySegmentScorer {
3332

34-
private static final int BULK_SIZE = MemorySegmentESNextOSQVectorsScorer.BULK_SIZE;
35-
private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1);
36-
37-
private static final VectorSpecies<Integer> INT_SPECIES_128 = IntVector.SPECIES_128;
38-
39-
private static final VectorSpecies<Long> LONG_SPECIES_128 = LongVector.SPECIES_128;
40-
private static final VectorSpecies<Long> LONG_SPECIES_256 = LongVector.SPECIES_256;
41-
42-
private static final VectorSpecies<Byte> BYTE_SPECIES_128 = ByteVector.SPECIES_128;
43-
private static final VectorSpecies<Byte> BYTE_SPECIES_256 = ByteVector.SPECIES_256;
44-
45-
private static final VectorSpecies<Short> SHORT_SPECIES_128 = ShortVector.SPECIES_128;
46-
private static final VectorSpecies<Short> SHORT_SPECIES_256 = ShortVector.SPECIES_256;
47-
48-
private static final VectorSpecies<Float> FLOAT_SPECIES_128 = FloatVector.SPECIES_128;
49-
private static final VectorSpecies<Float> FLOAT_SPECIES_256 = FloatVector.SPECIES_256;
50-
5133
MSBitToInt4ESNextOSQVectorsScorer(IndexInput in, int dimensions, int dataLength, MemorySegment memorySegment) {
5234
super(in, dimensions, dataLength, memorySegment);
5335
}

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/MSInt4SymmetricESNextOSQVectorsScorer.java

Lines changed: 419 additions & 0 deletions
Large diffs are not rendered by default.

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/MemorySegmentESNextOSQVectorsScorer.java

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,6 @@
2525
/** Panamized scorer for quantized vectors stored as a {@link MemorySegment}. */
2626
public final class MemorySegmentESNextOSQVectorsScorer extends ESNextOSQVectorsScorer {
2727

28-
private static final VectorSpecies<Integer> INT_SPECIES_128 = IntVector.SPECIES_128;
29-
30-
private static final VectorSpecies<Long> LONG_SPECIES_128 = LongVector.SPECIES_128;
31-
private static final VectorSpecies<Long> LONG_SPECIES_256 = LongVector.SPECIES_256;
32-
33-
private static final VectorSpecies<Byte> BYTE_SPECIES_128 = ByteVector.SPECIES_128;
34-
private static final VectorSpecies<Byte> BYTE_SPECIES_256 = ByteVector.SPECIES_256;
35-
36-
private static final VectorSpecies<Short> SHORT_SPECIES_128 = ShortVector.SPECIES_128;
37-
private static final VectorSpecies<Short> SHORT_SPECIES_256 = ShortVector.SPECIES_256;
38-
39-
private static final VectorSpecies<Float> FLOAT_SPECIES_128 = FloatVector.SPECIES_128;
40-
private static final VectorSpecies<Float> FLOAT_SPECIES_256 = FloatVector.SPECIES_256;
41-
4228
private final MemorySegment memorySegment;
4329
private final MemorySegmentScorer scorer;
4430

@@ -54,6 +40,10 @@ public MemorySegmentESNextOSQVectorsScorer(
5440
this.memorySegment = memorySegment;
5541
if (queryBits == 4 && indexBits == 1) {
5642
this.scorer = new MSBitToInt4ESNextOSQVectorsScorer(in, dimensions, dataLength, memorySegment);
43+
} else if (queryBits == 4 && indexBits == 4) {
44+
this.scorer = new MSInt4SymmetricESNextOSQVectorsScorer(in, dimensions, dataLength, memorySegment);
45+
} else if (queryBits == 4 && indexBits == 2) {
46+
throw new IllegalArgumentException("Only symmetric 4-bit query and 1-bit index supported");
5747
} else {
5848
throw new IllegalArgumentException("Only asymmetric 4-bit query and 1-bit index supported");
5949
}
@@ -112,7 +102,24 @@ public float scoreBulk(
112102
);
113103
}
114104

115-
abstract static sealed class MemorySegmentScorer permits MSBitToInt4ESNextOSQVectorsScorer {
105+
abstract static sealed class MemorySegmentScorer permits MSBitToInt4ESNextOSQVectorsScorer, MSInt4SymmetricESNextOSQVectorsScorer {
106+
107+
static final int BULK_SIZE = MemorySegmentESNextOSQVectorsScorer.BULK_SIZE;
108+
static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1);
109+
static final VectorSpecies<Integer> INT_SPECIES_128 = IntVector.SPECIES_128;
110+
111+
static final VectorSpecies<Long> LONG_SPECIES_128 = LongVector.SPECIES_128;
112+
static final VectorSpecies<Long> LONG_SPECIES_256 = LongVector.SPECIES_256;
113+
114+
static final VectorSpecies<Byte> BYTE_SPECIES_128 = ByteVector.SPECIES_128;
115+
static final VectorSpecies<Byte> BYTE_SPECIES_256 = ByteVector.SPECIES_256;
116+
117+
static final VectorSpecies<Short> SHORT_SPECIES_128 = ShortVector.SPECIES_128;
118+
static final VectorSpecies<Short> SHORT_SPECIES_256 = ShortVector.SPECIES_256;
119+
120+
static final VectorSpecies<Float> FLOAT_SPECIES_128 = FloatVector.SPECIES_128;
121+
static final VectorSpecies<Float> FLOAT_SPECIES_256 = FloatVector.SPECIES_256;
122+
116123
protected final MemorySegment memorySegment;
117124
protected final IndexInput in;
118125
protected final int length;

libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorizationProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public ESNextOSQVectorsScorer newESNextOSQVectorsScorer(IndexInput input, byte q
4040
if (PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS
4141
&& input instanceof MemorySegmentAccessInput msai
4242
&& queryBits == 4
43-
&& indexBits == 1) {
43+
&& (indexBits == 1 || indexBits == 4)) {
4444
MemorySegment ms = msai.segmentSliceOrNull(0, input.length());
4545
if (ms != null) {
4646
return new MemorySegmentESNextOSQVectorsScorer(input, queryBits, indexBits, dimension, dataLength, ms);

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,6 @@ tests:
492492
- class: org.elasticsearch.readiness.ReadinessClusterIT
493493
method: testReadinessDuringRestartsNormalOrder
494494
issue: https://github.com/elastic/elasticsearch/issues/136955
495-
- class: org.elasticsearch.xpack.ilm.TimeSeriesDataStreamsIT
496-
method: testSearchableSnapshotAction
497-
issue: https://github.com/elastic/elasticsearch/issues/137167
498495
- class: org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT
499496
method: test {yaml=indices.validate_query/20_query_string/validate_query with query_string parameters}
500497
issue: https://github.com/elastic/elasticsearch/issues/137391

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import org.apache.http.client.CredentialsProvider;
1818
import org.apache.http.impl.client.BasicCredentialsProvider;
1919
import org.apache.logging.log4j.core.config.plugins.util.PluginManager;
20-
import org.apache.lucene.util.IOConsumer;
2120
import org.elasticsearch.client.Request;
2221
import org.elasticsearch.client.Response;
2322
import org.elasticsearch.client.ResponseException;
@@ -31,7 +30,6 @@
3130
import org.elasticsearch.inference.TaskType;
3231
import org.elasticsearch.logging.LogManager;
3332
import org.elasticsearch.logging.Logger;
34-
import org.elasticsearch.test.ESTestCase;
3533
import org.elasticsearch.test.rest.ESRestTestCase;
3634
import org.elasticsearch.xcontent.XContentType;
3735

@@ -45,7 +43,6 @@
4543
import java.util.List;
4644
import java.util.Map;
4745
import java.util.Set;
48-
import java.util.concurrent.Semaphore;
4946
import java.util.regex.Matcher;
5047
import java.util.regex.Pattern;
5148
import java.util.stream.Collectors;
@@ -58,7 +55,6 @@
5855
import static org.elasticsearch.xpack.esql.EsqlTestUtils.reader;
5956

6057
public class CsvTestsDataLoader {
61-
private static final int PARALLEL_THREADS = 10;
6258
private static final int BULK_DATA_SIZE = 100_000;
6359
private static final TestDataset EMPLOYEES = new TestDataset("employees", "mapping-default.json", "employees.csv").noSubfields();
6460
private static final TestDataset EMPLOYEES_INCOMPATIBLE = new TestDataset(
@@ -433,42 +429,18 @@ private static void loadDataSetIntoEs(
433429
IndexCreator indexCreator
434430
) throws IOException {
435431
Logger logger = LogManager.getLogger(CsvTestsDataLoader.class);
436-
List<TestDataset> datasets = availableDatasetsForEs(
437-
supportsIndexModeLookup,
438-
supportsSourceFieldMapping,
439-
inferenceEnabled,
440-
timeSeriesOnly
441-
).stream().toList();
442-
443-
logger.info("Creating test indices");
444-
executeInParallel(datasets, dataset -> createIndex(client, dataset, indexCreator), "Failed to create indices in parallel");
445432

433+
Set<String> loadedDatasets = new HashSet<>();
446434
logger.info("Loading test datasets");
447-
executeInParallel(datasets, dataset -> loadData(client, dataset, logger), "Failed to load data in parallel");
448-
449-
forceMerge(client, datasets.stream().map(d -> d.indexName).collect(Collectors.toSet()), logger);
450-
435+
for (var dataset : availableDatasetsForEs(supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, timeSeriesOnly)) {
436+
load(client, dataset, logger, indexCreator);
437+
loadedDatasets.add(dataset.indexName);
438+
}
439+
forceMerge(client, loadedDatasets, logger);
451440
logger.info("Loading enrich policies");
452-
executeInParallel(
453-
ENRICH_POLICIES,
454-
policy -> loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger),
455-
"Failed to load enrich policies in parallel"
456-
);
457-
458-
}
459-
460-
private static <T> void executeInParallel(List<T> items, IOConsumer<T> consumer, String errorMessage) {
461-
Semaphore semaphore = new Semaphore(PARALLEL_THREADS);
462-
ESTestCase.runInParallel(items.size(), i -> {
463-
try {
464-
semaphore.acquire();
465-
consumer.accept(items.get(i));
466-
} catch (IOException | InterruptedException e) {
467-
throw new RuntimeException(errorMessage, e);
468-
} finally {
469-
semaphore.release();
470-
}
471-
});
441+
for (var policy : ENRICH_POLICIES) {
442+
loadEnrichPolicy(client, policy.policyName, policy.policyFileName, logger);
443+
}
472444
}
473445

474446
public static void createInferenceEndpoints(RestClient client) throws IOException {
@@ -626,14 +598,12 @@ private static URL getResource(String name) {
626598
return result;
627599
}
628600

629-
private static void createIndex(RestClient client, TestDataset dataset, IndexCreator indexCreator) throws IOException {
601+
private static void load(RestClient client, TestDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException {
602+
logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName);
630603
URL mapping = getResource("/" + dataset.mappingFileName);
631604
Settings indexSettings = dataset.readSettingsFile();
632605
indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings);
633-
}
634606

635-
private static void loadData(RestClient client, TestDataset dataset, Logger logger) throws IOException {
636-
logger.info("Loading dataset [{}] into ES index [{}]", dataset.dataFileName, dataset.indexName);
637607
// Some examples only test that the query and mappings are valid, and don't need example data. Use .noData() for those
638608
if (dataset.dataFileName != null) {
639609
URL data = getResource("/data/" + dataset.dataFileName);

x-pack/plugin/esql/qa/testFixtures/src/test/java/org/elasticsearch/xpack/esql/CsvTestsDataLoaderTests.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,12 @@
1111

1212
import java.net.ConnectException;
1313

14-
import static org.hamcrest.Matchers.instanceOf;
1514
import static org.hamcrest.Matchers.startsWith;
1615

1716
public class CsvTestsDataLoaderTests extends ESTestCase {
1817

1918
public void testCsvTestsDataLoaderExecution() {
20-
Throwable cause = expectThrows(AssertionError.class, () -> CsvTestsDataLoader.main(new String[] {}));
21-
// find the root cause
22-
while (cause.getCause() != null) {
23-
cause = cause.getCause();
24-
}
25-
assertThat(cause, instanceOf(ConnectException.class));
26-
assertThat(cause.getMessage(), startsWith("Connection refused"));
19+
ConnectException ce = expectThrows(ConnectException.class, () -> CsvTestsDataLoader.main(new String[] {}));
20+
assertThat(ce.getMessage(), startsWith("Connection refused"));
2721
}
2822
}

x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesDataStreamsIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ public void testSearchableSnapshotAction() throws Exception {
184184
// Manual rollover the original index such that it's not the write index in the data stream anymore
185185
rolloverMaxOneDocCondition(client(), dataStream);
186186

187-
awaitIndexExists(restoredIndexName);
188-
awaitIndexDoesNotExist(backingIndexName, TimeValue.timeValueSeconds(60));
187+
awaitIndexExists(restoredIndexName, TimeValue.timeValueSeconds(20));
188+
awaitIndexDoesNotExist(backingIndexName);
189189
assertBusy(
190190
() -> assertThat(explainIndex(client(), restoredIndexName).get("step"), is(PhaseCompleteStep.NAME)),
191191
30,

0 commit comments

Comments
 (0)