Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
Expand Down Expand Up @@ -234,6 +235,36 @@ public static Collection<String> filterReplacedIndexes(Collection<String> indexP
return result;
}

/**
* Remove indexes if there is a newer, active version.
* This suppresses indexes when a newer version of a different type exists
* (e.g. lucene vs. elasticsearch).
*
* @param candidatePaths paths of one specific index type being evaluated
* @param allCompetingPaths paths of all competing index types (e.g. both lucene and elasticsearch)
* @return candidates that are not superseded by a higher-versioned entry in allCompetingPaths
*/
public static Collection<String> filterGloballySuperseded(
Collection<String> candidatePaths, Collection<String> allCompetingPaths) {
Map<String, IndexName> maxByBase = new HashMap<>();
for (String p : allCompetingPaths) {
IndexName n = IndexName.parse(PathUtils.getName(p));
IndexName stored = maxByBase.get(n.baseName);
if (stored == null || stored.compareTo(n) < 0) {
maxByBase.put(n.baseName, n);
}
}
List<String> result = new ArrayList<>();
for (String p : candidatePaths) {
IndexName n = IndexName.parse(PathUtils.getName(p));
IndexName globalMax = maxByBase.get(n.baseName);
if (globalMax == null || globalMax.compareTo(n) == 0) {
result.add(p);
}
}
return result;
}

public static Collection<String> filterNewestIndexes(Collection<String> indexPaths) {
HashMap<String, IndexName> latestVersions = new HashMap<>();
for (String p : indexPaths) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.index;

import org.junit.Test;

import java.util.Collection;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class IndexNameAdditionalTest {

// ---- filterGloballySuperseded ----

@Test
public void filterGloballySuperseded_noCompetitors() {
// no competing paths: all candidates pass through
Collection<String> result = IndexName.filterGloballySuperseded(
List.of("/oak:index/lucene-2"),
List.of());
assertEquals(List.of("/oak:index/lucene-2"), List.copyOf(result));
}

@Test
public void filterGloballySuperseded_olderCompetitorKept() {
// lucene-2 is newer than /oak:index/lucene-1-custom-1, so it passes
Collection<String> result = IndexName.filterGloballySuperseded(
List.of("/oak:index/lucene-2"),
List.of("/oak:index/lucene-2", "/oak:index/lucene-1-custom-1"));
assertEquals(List.of("/oak:index/lucene-2"), List.copyOf(result));
}

@Test
public void filterGloballySuperseded_newerCompetitorFilters() {
// lucene-1 vs. lucene-2 (same base): lucene-1 is superseded
Collection<String> result = IndexName.filterGloballySuperseded(
List.of("/oak:index/lucene-1"),
List.of("/oak:index/lucene-1", "/oak:index/lucene-2"));
assertTrue(result.isEmpty());
}

@Test
public void filterGloballySuperseded_differentBaseNotAffected() {
// lucene-1 for "fooIndex" is not affected by a newer version of "barIndex"
Collection<String> result = IndexName.filterGloballySuperseded(
List.of("/oak:index/fooIndex-1"),
List.of("/oak:index/fooIndex-1", "/oak:index/barIndex-2"));
assertEquals(List.of("/oak:index/fooIndex-1"), List.copyOf(result));
}

@Test
public void filterGloballySuperseded_unversionedSupersededByVersioned() {
// unversioned lucene (version 0) is superseded by lucene-1
Collection<String> result = IndexName.filterGloballySuperseded(
List.of("/oak:index/lucene"),
List.of("/oak:index/lucene", "/oak:index/lucene-1"));
assertTrue(result.isEmpty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.search.spi.query;
package org.apache.jackrabbit.oak.plugins.index;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;

import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
import org.apache.jackrabbit.oak.plugins.index.IndexName;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.junit.Test;
Expand Down Expand Up @@ -71,10 +70,10 @@
IndexName p1c1 = IndexName.parse("/lucene-1-custom-1");
IndexName p1c2 = IndexName.parse("/lucene-1-custom-2");

assertTrue(p0.compareTo(p0a) == 0);

Check warning on line 73 in oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexNameTest.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use assertEquals instead.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ0ltYpZ3TOoRD_-O1QI&open=AZ0ltYpZ3TOoRD_-O1QI&pullRequest=2813
assertTrue(p0.compareTo(p0b) == 0);

Check warning on line 74 in oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexNameTest.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use assertEquals instead.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ0ltYpZ3TOoRD_-O1QJ&open=AZ0ltYpZ3TOoRD_-O1QJ&pullRequest=2813
assertTrue(p0a.compareTo(p0b) == 0);

Check warning on line 75 in oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexNameTest.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use assertEquals instead.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ0ltYpZ3TOoRD_-O1QK&open=AZ0ltYpZ3TOoRD_-O1QK&pullRequest=2813
assertTrue(p0c1.compareTo(p0c1a) == 0);

Check warning on line 76 in oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexNameTest.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use assertEquals instead.

See more on https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ0ltYpZ3TOoRD_-O1QL&open=AZ0ltYpZ3TOoRD_-O1QL&pullRequest=2813

assertTrue(p0.compareTo(p0c1) < 0);
assertTrue(p0c1.compareTo(p1) < 0);
Expand Down
13 changes: 13 additions & 0 deletions oak-it-osgi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,19 @@
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>oak-search-elastic</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>elasticsearch</artifactId>
<version>${testcontainers.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.jackrabbit</groupId>
<artifactId>oak-store-composite</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.osgi;

import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.Oak;
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.ContentSession;
import org.apache.jackrabbit.oak.api.QueryEngine;
import org.apache.jackrabbit.oak.api.Result;
import org.apache.jackrabbit.oak.api.Root;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnectionRule;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexNameHelper;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexTracker;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticMetricHandler;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;

import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
import static org.junit.Assert.assertTrue;

/**
* Tests that verify index version selection behavior when multiple versions of an index exist
* across different index types (lucene and elasticsearch).
*/
public class IndexVersionSelectionTest {

@ClassRule
public static ElasticConnectionRule elasticRule =
new ElasticConnectionRule(System.getProperty("elasticConnectionString"));

private ElasticConnection elasticConnection;
private ContentRepository repo;
private ContentSession session;

@Before
public void setUp() throws Exception {
LuceneIndexProvider luceneProvider = new LuceneIndexProvider();
LuceneIndexEditorProvider luceneEditorProvider = new LuceneIndexEditorProvider();

elasticConnection = elasticRule.useDocker()
? elasticRule.getElasticConnectionForDocker()
: elasticRule.getElasticConnectionFromString();
ElasticIndexTracker elasticTracker = new ElasticIndexTracker(
elasticConnection,
new ElasticMetricHandler(StatisticsProvider.NOOP));
ElasticIndexProvider elasticProvider = new ElasticIndexProvider(elasticTracker);
// ElasticIndexEditorProvider is intentionally not registered here. See the comment
// in testLatestVersionIsUsedEvenWithHigherCost() for the full explanation.

repo = new Oak(new MemoryNodeStore())
.with(new InitialContent())
.with(new OpenSecurityProvider())
.with((QueryIndexProvider) luceneProvider)
.with((Observer) luceneProvider)
.with(luceneEditorProvider)
.with((QueryIndexProvider) elasticProvider)
.with((Observer) elasticTracker)
.with(new PropertyIndexEditorProvider())
.with(new NodeTypeIndexProvider())
.createContentRepository();

session = repo.login(null, null);
}

@After
public void tearDown() throws Exception {
if (session != null) {
session.close();
}
if (elasticConnection != null) {
elasticConnection.close();
}
}

/**
* Returns a fresh index definition builder for the given type, configured with
* {@code noAsync()} so that the index definition is registered in the respective
* index tracker via the Observer callback during commit, making query plans
* available immediately after {@code root.commit()} returns.
*/
private IndexDefinitionBuilder newBuilder(String type) {
if ("lucene".equals(type)) {
return new LuceneIndexDefinitionBuilder().noAsync();
}
return new ElasticIndexDefinitionBuilder().noAsync();
}

/**
* Verifies that the latest index version (asset-10-custom-3, of type {@code newType}) is
* selected even when its cost is set to a very high value (1 million), proving that
* version-based selection takes precedence over cost. The index family is:
* <pre>
* asset-10 (oldType)
* asset-10-custom-1 (oldType)
* asset-10-custom-2 (oldType)
* asset-10-custom-3 (newType, costPerEntry=costPerExecution=1_000_000)
* </pre>
* All indexes carry the tag "myTag" and use selectionPolicy="tag". The query uses
* {@code option(index tag myTag)} and is a {@code contains()} query so that traversal
* is not an option. Version selection must filter all but asset-10-custom-3 from the
* candidate list regardless of its higher cost.
*/
private void testLatestVersionIsUsedEvenWithHigherCost(String oldType, String newType)
throws Exception {
Root root = session.getLatestRoot();
Tree oakIndex = root.getTree("/" + INDEX_DEFINITIONS_NAME);

for (String name : new String[]{"asset-10", "asset-10-custom-1", "asset-10-custom-2"}) {
IndexDefinitionBuilder b = newBuilder(oldType);
b.tags("myTag", oldType);
b.selectionPolicy("tag");
b.indexRule("nt:base").property("asset").analyzed();
b.build(oakIndex.addChild(name));
}

// asset-10-custom-3: same setup but with a very high cost. Version selection must
// still pick this index (it is the latest) and not fall back to a cheaper older version.
IndexDefinitionBuilder b = newBuilder(newType);
b.tags("myTag", newType);
b.selectionPolicy("tag");
b.indexRule("nt:base").property("asset").analyzed();
Tree custom3Tree = oakIndex.addChild("asset-10-custom-3");
b.build(custom3Tree);
custom3Tree.setProperty(FulltextIndexConstants.COST_PER_ENTRY, 1_000_000.0);
custom3Tree.setProperty(FulltextIndexConstants.COST_PER_EXECUTION, 1_000_000.0);

root.getTree("/").addChild("content").setProperty("asset", "test-value");
root.commit();

// Why we create the ES index manually instead of registering ElasticIndexEditorProvider:
//
// Normally, ElasticIndexEditorProvider would create the Elasticsearch index during
// root.commit(). However, this module's test classpath has a Lucene version conflict:
// oak-lucene depends on Lucene 4.7.2, while oak-search-elastic requires Lucene 9.x.
// Maven's dependency resolution picks lucene-core:4.7.2 (the nearer declaration), so
// lucene-core:9.x is absent. ElasticIndexEditorProvider transitively loads
// ElasticCustomAnalyzer, which imports org.apache.lucene.util.ResourceLoader — a class
// that only exists in lucene-core 9.x (in 4.x it had a different package). The result is
// a NoClassDefFoundError at commit time, even when no custom analyzers are configured.
//
// This test does not need to index actual content — it only checks which index the query
// planner *selects* (via EXPLAIN). For plan generation, ElasticIndexStatistics.numDocs()
// is called to estimate the entry count; it issues a COUNT request to Elasticsearch and
// throws index_not_found_exception if the index does not exist. That exception propagates
// as UncheckedExecutionException and is caught in FulltextIndex.getPlans(), which silently
// skips the index — causing the test to fail with "traverse allNodes".
//
// The fix: after committing the Oak index definitions (which registers the definition in
// the ElasticIndexTracker via the Observer callback), we create an empty Elasticsearch
// index directly via the REST client. numDocs() then returns 0, the planner can generate
// a plan, and version selection is exercised correctly.
Comment on lines +164 to +185
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if we could avoid this. If the issue is just ElasticIndexStatistics.numDocs(), perhaps we can add a @TestOnly constructor in ElasticIndexTracker to pass a NoOp ElasticIndexStatistics instance. This will be used by ElasticIndexNodeManager/ElasticIndexNode. If this works, there would be no need to include oak-search-elastic and testcontainers dependencies along with ElasticConnectionRule that makes the test slow since it has to start an ES container.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's an IT test test, so I think it's fine if it's slower than other tests. I think it makes sense that we test with the "correct" setup, and don't mock it. And for this, I think we do need to include both oak-search-elastic and oak-lucene, because this is what we test here: they need to reply with a plan for a given query.

But I'm still trying to find is a way to make it more real-world: that is, I think it should be possible to configure things so that we don't risk NoClassDefFoundError, and we don't need to create a elastic index from within the test case...

String elasticName;
if ("elasticsearch".equals(newType)) {
elasticName = "asset-10-custom-3";
} else {
elasticName = "asset-10-custom-2";
}
String alias = ElasticIndexNameHelper.getElasticSafeIndexName(
elasticConnection.getIndexPrefix(),
"/" + INDEX_DEFINITIONS_NAME + "/" + elasticName);
elasticConnection.getClient().indices().create(c -> c.index(alias));

root = session.getLatestRoot();
Result result = root.getQueryEngine().executeQuery(
"explain select * from [nt:base] where contains([asset], 'test-value')" +
" option(index tag myTag)",
"JCR-SQL2",
QueryEngine.NO_BINDINGS,
QueryEngine.NO_MAPPINGS);

String plan = result.getRows().iterator().next().getValue("plan").getValue(Type.STRING);

// Version selection keeps only asset-10-custom-3 (the latest). The contains() constraint
// prevents traversal, so the high-cost index must be used.
assertTrue("Expected asset-10-custom-3 to be used, but got: " + plan,
plan.contains("asset-10-custom-3"));
}

@Test
public void latestLuceneVersionIsUsedEvenWithHigherCost() throws Exception {
testLatestVersionIsUsedEvenWithHigherCost("elasticsearch", "lucene");
}

@Test
public void latestElasticsearchVersionIsUsedEvenWithHigherCost() throws Exception {
testLatestVersionIsUsedEvenWithHigherCost("lucene", "elasticsearch");
}
}
Loading
Loading