Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix `cluster.remote.<cluster_alias>.server_name` setting no populating SNI ([#20321](https://github.com/opensearch-project/OpenSearch/pull/20321))
- Fix X-Opaque-Id header propagation (along with other response headers) for streaming Reactor Netty 4 transport ([#20371](https://github.com/opensearch-project/OpenSearch/pull/20371))
- Fix indexing regression and bug fixes for grouping criteria. ([20145](https://github.com/opensearch-project/OpenSearch/pull/20145))
- Fix CriteriaBasedCodec to work with delegate codec. ([20442](https://github.com/opensearch-project/OpenSearch/pull/20442))

### Dependencies
- Bump `com.google.auth:google-auth-library-oauth2-http` from 1.38.0 to 1.41.0 ([#20183](https://github.com/opensearch-project/OpenSearch/pull/20183))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,9 @@
package org.opensearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;

import java.io.IOException;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;

/**
* Filter codec used to attach bucket attributes to segments of child writer.
Expand All @@ -27,46 +21,29 @@ public class CriteriaBasedCodec extends FilterCodec {

private final String bucket;
public static final String BUCKET_NAME = "bucket";
private static final String PLACEHOLDER_BUCKET_FOR_PARENT_WRITER = "-2";

public CriteriaBasedCodec() {
super("CriteriaBasedCodec", new Lucene103Codec());
bucket = null;
}
public static final String ATTRIBUTE_BINDING_TARGET_FIELD = "_id";

public CriteriaBasedCodec(Codec delegate, String bucket) {
super("CriteriaBasedCodec", delegate);
super(delegate.getName(), delegate);
this.bucket = bucket;
}

@Override
public SegmentInfoFormat segmentInfoFormat() {
return new SegmentInfoFormat() {
@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
return delegate.segmentInfoFormat().read(directory, segmentName, segmentID, context);
}

@Override
public void write(Directory directory, SegmentInfo info, IOContext ioContext) throws IOException {
if (bucket != null) {
// We will set BUCKET_NAME attribute only for child writer where bucket will set.
info.putAttribute(BUCKET_NAME, bucket);
} else if (info.getAttribute(BUCKET_NAME) == null) {
// For segment belonging to parent writer, attributes will be set. In case write went to parent
// writer (like for no ops writes or for temporary tombstone entry which is added for deletes/updates
// to sync version across child and parent writers), segments corresponding to those writer does not
// have
info.putAttribute(BUCKET_NAME, PLACEHOLDER_BUCKET_FOR_PARENT_WRITER);
public PostingsFormat postingsFormat() {
PostingsFormat format = super.postingsFormat();
if (format instanceof PerFieldPostingsFormat) {
return new PerFieldPostingsFormat() {

@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals(ATTRIBUTE_BINDING_TARGET_FIELD)) {
return new CriteriaBasedPostingsFormat(((PerFieldPostingsFormat) format).getPostingsFormatForField(field), bucket);
} else {
return ((PerFieldPostingsFormat) format).getPostingsFormatForField(field);
}
}

delegate.segmentInfoFormat().write(directory, info, ioContext);
}
};
}

@Override
public DocValuesFormat docValuesFormat() {
return new CriteriaBasedDocValueFormat(bucket);
};
}
return format;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;

import java.io.IOException;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static org.opensearch.index.codec.CriteriaBasedCodec.ATTRIBUTE_BINDING_TARGET_FIELD;

/**
* Postings format to attach segment info attribute corresponding to grouping criteria associated with segments.
*
*/
public class CriteriaBasedPostingsFormat extends PostingsFormat {

public static final String CRITERIA_BASED_CODEC_NAME = "CriteriaBasedCodec99";
private final PostingsFormat delegatePostingsFormat;
private final String bucket;
/** Extension of CAS index to store delegate information. */
public static final String CAS_FILE_EXTENSION = "cas";
public static final int VERSION_START = 0;
public static final int VERSION_CURRENT = VERSION_START;
public static final String BUCKET_NAME = "bucket";
private static final String PLACEHOLDER_BUCKET_FOR_PARENT_WRITER = "-2";
private static final String DELEGATE_CODEC_KEY = "delegate_codec_key";

/**
* Creates a new postings format.
*
* <p>The provided name will be written into the index segment in some configurations (such as
* when using ): in such configurations, for the segment to be read
* this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar
* file, etc).
*
*/
protected CriteriaBasedPostingsFormat(PostingsFormat delegatePostingsFormat, String bucket) {
super(CRITERIA_BASED_CODEC_NAME);
this.delegatePostingsFormat = delegatePostingsFormat;
this.bucket = bucket;
}

// Needed for SPI
public CriteriaBasedPostingsFormat() {
this(null, null);
}

@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException(
"Error - " + getClass().getName() + " has been constructed without a choice of PostingsFormat"
);
}

FieldsConsumer fieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
return new CriteriaBasedFieldsConsumer(fieldsConsumer, state);
}

@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
assert state.segmentInfo.getAttribute(DELEGATE_CODEC_KEY) != null;
return PostingsFormat.forName(state.segmentInfo.getAttribute(DELEGATE_CODEC_KEY)).fieldsProducer(state);
}

class CriteriaBasedFieldsConsumer extends FieldsConsumer {
private final FieldsConsumer delegateFieldsConsumer;
private SegmentWriteState state;
private boolean closed;

public CriteriaBasedFieldsConsumer(FieldsConsumer delegateFieldsConsumer, SegmentWriteState state) {
this.delegateFieldsConsumer = delegateFieldsConsumer;
this.state = state;
}

@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
delegateFieldsConsumer.write(fields, norms);
FieldInfo idFieldInfo = state.fieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD);
if (bucket != null) {
state.segmentInfo.putAttribute(BUCKET_NAME, bucket);
if (idFieldInfo != null) {
idFieldInfo.putAttribute(BUCKET_NAME, bucket);
}
} else if (state.segmentInfo.getAttribute(BUCKET_NAME) == null) {
// For segment belonging to parent writer, attributes will be set. In case write went to parent
// writer (like for no ops writes or for temporary tombstone entry which is added for deletes/updates
// to sync version across child and parent writers), segments corresponding to those writer does not
// have
state.segmentInfo.putAttribute(BUCKET_NAME, PLACEHOLDER_BUCKET_FOR_PARENT_WRITER);
if (idFieldInfo != null) {
idFieldInfo.putAttribute(BUCKET_NAME, PLACEHOLDER_BUCKET_FOR_PARENT_WRITER);
}
} else if (idFieldInfo != null) {
idFieldInfo.putAttribute(BUCKET_NAME, state.segmentInfo.getAttribute(BUCKET_NAME));
}

state.segmentInfo.putAttribute(DELEGATE_CODEC_KEY, delegatePostingsFormat.getName());
}

@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
delegateFieldsConsumer.merge(mergeState, norms);
Set<String> mergeFieldNames = StreamSupport.stream(mergeState.mergeFieldInfos.spliterator(), false)
.map(FieldInfo::getName)
.collect(Collectors.toSet());
if (mergeFieldNames.contains(ATTRIBUTE_BINDING_TARGET_FIELD) && mergeState.fieldInfos.length > 0) {
String attribute = mergeState.fieldInfos[0].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).getAttribute(BUCKET_NAME);
assert attribute != null : "Attribute should not be null during merging segment";
mergeState.segmentInfo.putAttribute(BUCKET_NAME, attribute);

mergeState.mergeFieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).putAttribute(BUCKET_NAME, attribute);
}
Comment on lines +118 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

find . -name "CriteriaBasedPostingsFormat.java" -type f

Repository: opensearch-project/OpenSearch

Length of output: 155


🏁 Script executed:

wc -l ./server/src/main/java/org/opensearch/index/codec/CriteriaBasedPostingsFormat.java

Repository: opensearch-project/OpenSearch

Length of output: 159


🏁 Script executed:

cat -n ./server/src/main/java/org/opensearch/index/codec/CriteriaBasedPostingsFormat.java

Repository: opensearch-project/OpenSearch

Length of output: 7396


🏁 Script executed:

# Search for other uses of mergeState.fieldInfos to understand patterns
rg "mergeState\.fieldInfos" --type java -B 2 -A 2

Repository: opensearch-project/OpenSearch

Length of output: 1061


🏁 Script executed:

# Search for how fieldInfo() method is used in the codebase
rg "\.fieldInfo\(" --type java -B 1 -A 1 | head -50

Repository: opensearch-project/OpenSearch

Length of output: 6032


🏁 Script executed:

# Search for tests related to CriteriaBasedPostingsFormat to understand expected behavior
find . -name "*CriteriaBasedPostingsFormat*Test*" -type f

Repository: opensearch-project/OpenSearch

Length of output: 160


🏁 Script executed:

cat -n ./server/src/test/java/org/opensearch/index/codec/CriteriaBasedPostingsFormatTests.java

Repository: opensearch-project/OpenSearch

Length of output: 6445


🏁 Script executed:

# Search for documentation or code about parent-writer or tombstone segments
rg -i "parent.writer|tombstone|parent writer" --type java | head -30

Repository: opensearch-project/OpenSearch

Length of output: 4410


🏁 Script executed:

# Search for ATTRIBUTE_BINDING_TARGET_FIELD definition and usage
rg "ATTRIBUTE_BINDING_TARGET_FIELD" --type java -B 1 -A 1

Repository: opensearch-project/OpenSearch

Length of output: 2795


🏁 Script executed:

# Look at CriteriaBasedCodec to understand context
cat -n ./server/src/main/java/org/opensearch/index/codec/CriteriaBasedCodec.java

Repository: opensearch-project/OpenSearch

Length of output: 2091


🏁 Script executed:

# Search for how mergeState is typically used in other codecs
find . -name "*.java" -path "*/codec/*" -type f | head -10

Repository: opensearch-project/OpenSearch

Length of output: 880


🏁 Script executed:

cat -n ./server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValuesWriter.java | head -100

Repository: opensearch-project/OpenSearch

Length of output: 3146


🏁 Script executed:

sed -n '94,116p' ./server/src/main/java/org/opensearch/index/codec/CriteriaBasedPostingsFormat.java

Repository: opensearch-project/OpenSearch

Length of output: 1514


🏁 Script executed:

# Check if there are any merge tests or scenarios that might show this issue
rg "merge" ./server/src/test/java/org/opensearch/index/codec/CriteriaBasedPostingsFormatTests.java

Repository: opensearch-project/OpenSearch

Length of output: 55


🏁 Script executed:

# Search for how other FieldsConsumer implementations handle merge
rg "class.*FieldsConsumer" --type java -A 20 | grep -A 15 "merge" | head -40

Repository: opensearch-project/OpenSearch

Length of output: 55


🏁 Script executed:

# Let's verify the pattern: check if fieldInfo() can return null by looking at Lucene API expectations
# Search for null checks after fieldInfo() calls
rg "fieldInfo\([^)]+\)\s*==" --type java | head -20

Repository: opensearch-project/OpenSearch

Length of output: 243


NPE when the first merged segment lacks _id field.

Line 125 reads the bucket attribute from mergeState.fieldInfos[0].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD) without null-checking the result. If the first segment is a parent-writer or tombstone segment without the _id field (as the comments in the write() method acknowledge is possible), fieldInfo() returns null, and calling .getAttribute() causes an NPE. This is not protected by the assertion on line 126 since assertions are disabled in production.

Scan the merge inputs for the first segment that actually contains the _id field rather than assuming the first segment has it.

🐛 Proposed fix
-            if (mergeFieldNames.contains(ATTRIBUTE_BINDING_TARGET_FIELD) && mergeState.fieldInfos.length > 0) {
-                String attribute = mergeState.fieldInfos[0].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).getAttribute(BUCKET_NAME);
-                assert attribute != null : "Attribute should not be null during merging segment";
+            if (mergeFieldNames.contains(ATTRIBUTE_BINDING_TARGET_FIELD) && mergeState.fieldInfos.length > 0) {
+                String attribute = null;
+                for (int i = 0; i < mergeState.fieldInfos.length; i++) {
+                    FieldInfo idInfo = mergeState.fieldInfos[i].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD);
+                    if (idInfo != null) {
+                        attribute = idInfo.getAttribute(BUCKET_NAME);
+                        if (attribute != null) {
+                            break;
+                        }
+                    }
+                }
+                if (attribute == null) {
+                    throw new IllegalStateException(
+                        "Bucket attribute missing during merge for " + ATTRIBUTE_BINDING_TARGET_FIELD
+                    );
+                }
                 mergeState.segmentInfo.putAttribute(BUCKET_NAME, attribute);
 
                 mergeState.mergeFieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).putAttribute(BUCKET_NAME, attribute);
             }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
delegateFieldsConsumer.merge(mergeState, norms);
Set<String> mergeFieldNames = StreamSupport.stream(mergeState.mergeFieldInfos.spliterator(), false)
.map(FieldInfo::getName)
.collect(Collectors.toSet());
if (mergeFieldNames.contains(ATTRIBUTE_BINDING_TARGET_FIELD) && mergeState.fieldInfos.length > 0) {
String attribute = mergeState.fieldInfos[0].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).getAttribute(BUCKET_NAME);
assert attribute != null : "Attribute should not be null during merging segment";
mergeState.segmentInfo.putAttribute(BUCKET_NAME, attribute);
mergeState.mergeFieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).putAttribute(BUCKET_NAME, attribute);
}
`@Override`
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
delegateFieldsConsumer.merge(mergeState, norms);
Set<String> mergeFieldNames = StreamSupport.stream(mergeState.mergeFieldInfos.spliterator(), false)
.map(FieldInfo::getName)
.collect(Collectors.toSet());
if (mergeFieldNames.contains(ATTRIBUTE_BINDING_TARGET_FIELD) && mergeState.fieldInfos.length > 0) {
String attribute = null;
for (int i = 0; i < mergeState.fieldInfos.length; i++) {
FieldInfo idInfo = mergeState.fieldInfos[i].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD);
if (idInfo != null) {
attribute = idInfo.getAttribute(BUCKET_NAME);
if (attribute != null) {
break;
}
}
}
if (attribute == null) {
throw new IllegalStateException(
"Bucket attribute missing during merge for " + ATTRIBUTE_BINDING_TARGET_FIELD
);
}
mergeState.segmentInfo.putAttribute(BUCKET_NAME, attribute);
mergeState.mergeFieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).putAttribute(BUCKET_NAME, attribute);
}
🤖 Prompt for AI Agents
In
`@server/src/main/java/org/opensearch/index/codec/CriteriaBasedPostingsFormat.java`
around lines 118 - 130, In merge(MergeState mergeState, NormsProducer norms) the
code assumes mergeState.fieldInfos[0].fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD)
is non-null which can cause an NPE; instead iterate over mergeState.fieldInfos
to find the first FieldInfos instance whose
fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD) is non-null, read its
getAttribute(BUCKET_NAME) with a null-check, and only then call
mergeState.segmentInfo.putAttribute(BUCKET_NAME, attribute) and
mergeState.mergeFieldInfos.fieldInfo(ATTRIBUTE_BINDING_TARGET_FIELD).putAttribute(BUCKET_NAME,
attribute); if no non-null fieldInfo is found, skip setting the attribute.
Ensure you still call delegateFieldsConsumer.merge(mergeState, norms) as before.


mergeState.segmentInfo.putAttribute(DELEGATE_CODEC_KEY, delegatePostingsFormat.getName());
}

@Override
public void close() throws IOException {
if (closed) {
return;
}

closed = true;
delegateFieldsConsumer.close();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
org.opensearch.index.codec.composite.composite912.Composite912Codec
org.opensearch.index.codec.composite.composite103.Composite103Codec
org.opensearch.index.codec.composite.backward_codecs.composite101.Composite101Codec
org.opensearch.index.codec.CriteriaBasedCodec
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed to be registered in SPI?

Copy link
Contributor Author

@RS146BIJAY RS146BIJAY Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We won't need register name for Codec as SPI will no longer instantiate new Codec as we are passing delegate codec name in parameter. SPI will instantiate only new PostingsFormat: https://github.com/opensearch-project/OpenSearch/pull/20442/changes#diff-577d44ce3ec453c1808090ac68139e531a6d7ba4a0c65350553967c3e159289b

Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
org.apache.lucene.search.suggest.document.Completion50PostingsFormat
org.opensearch.index.codec.fuzzy.FuzzyFilterPostingsFormat
org.opensearch.index.codec.CriteriaBasedPostingsFormat
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.BasePostingsFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;

import java.io.IOException;

public class CriteriaBasedPostingsFormatTests extends BasePostingsFormatTestCase {

private static final String TEST_BUCKET = "test_bucket";
private Codec criteriaBasedPostingFormat = TestUtil.alwaysPostingsFormat(
new CriteriaBasedPostingsFormat(TestUtil.getDefaultPostingsFormat(), TEST_BUCKET)
);

public void testBasicFunctionality() throws IOException {
Directory dir = new ByteBuffersDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(criteriaBasedPostingFormat);

try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < 100; i++) {
Document doc = new Document();
doc.add(new StringField("_id", "doc" + i, Field.Store.YES));
doc.add(new TextField("field", "value" + i, Field.Store.YES));
writer.addDocument(doc);
}
}

try (IndexReader reader = DirectoryReader.open(dir)) {
assertEquals(100, reader.numDocs());

Terms terms = MultiTerms.getTerms(reader, "field");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();

int count = 0;
BytesRef term;
while ((term = termsEnum.next()) != null) {
assertTrue(term.utf8ToString().startsWith("value"));
count++;
}
assertEquals(100, count);
}
Comment on lines +41 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

find . -type f -name "CriteriaBasedPostingsFormatTests.java" | head -5

Repository: opensearch-project/OpenSearch

Length of output: 160


🏁 Script executed:

# List test files in the codec directory
fd -t f ".*Tests\.java$" server/src/test/java/org/opensearch/index/codec/

Repository: opensearch-project/OpenSearch

Length of output: 1725


🏁 Script executed:

# Get file size first
wc -l server/src/test/java/org/opensearch/index/codec/CriteriaBasedPostingsFormatTests.java

Repository: opensearch-project/OpenSearch

Length of output: 162


🏁 Script executed:

cat -n server/src/test/java/org/opensearch/index/codec/CriteriaBasedPostingsFormatTests.java

Repository: opensearch-project/OpenSearch

Length of output: 6445


🌐 Web query:

Lucene ByteBuffersDirectory AutoCloseable resource management

💡 Result:

Short answer: ByteBuffersDirectory implements Closeable/AutoCloseable and must be closed to free its heap ByteBuffers — use try-with-resources or call close() when you’re done. [1][2]

Key points

  • ByteBuffersDirectory implements Closeable/AutoCloseable and exposes close() which throws IOException, so it participates in Java's try-with-resources. [1]
  • Directory (and BaseDirectory) semantics expect callers to close the Directory when finished; do not close it while an IndexWriter/IndexReader/IndexSearcher is still using it (closing will make the Directory unusable). [2]

Minimal example (conceptual)

  • try (Directory dir = new ByteBuffersDirectory()) { /* use IndexWriter/Readers here */ } // dir.close() called automatically.

Sources
[1] ByteBuffersDirectory Javadoc (implements Closeable/AutoCloseable, has close()).
[2] Directory / BaseDirectory Javadoc (Directory is Closeable; close() semantics).


🌐 Web query:

Lucene DirectoryReader close directory resource leak

💡 Result:

Yes — if you open a DirectoryReader and do not close it (or properly decRef it), you will leak file handles and other resources. Fixes:

  • Always close the reader when you’re done (try-with-resources or finally). DirectoryReader implements Closeable. [1]
  • In long-running apps use SearcherManager (or your own ref-counting) to safely reuse/reopen readers without closing readers still in use. DirectoryReader.openIfChanged returns a new reader but does NOT close the old one — you must close it when safe. [1][2]
  • If you manually share readers, use incRef()/decRef() (or ensure only one owner closes) to avoid closing while others still use it. [1]

Minimal example (concept):
try (DirectoryReader r = DirectoryReader.open(dir)) {
IndexSearcher s = new IndexSearcher(r);
// use searcher
} // reader closed automatically

References:
[1] Lucene DirectoryReader javadoc (notes on close, openIfChanged, SearcherManager).
[2] StackOverflow discussion: need to close DirectoryReader / openIfChanged behavior.


Close Directory instances to avoid resource leaks in tests.

All four test methods create ByteBuffersDirectory instances without closing them. ByteBuffersDirectory implements Closeable and must be closed to free heap resources. Wrap the directory in try-with-resources, ensuring the IndexReader closes before the directory:

♻️ Suggested fix (apply to all four test methods)
-        Directory dir = new ByteBuffersDirectory();
-        IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
-        iwc.setCodec(criteriaBasedPostingFormat);
-
-        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
-            for (int i = 0; i < 100; i++) {
-                Document doc = new Document();
-                doc.add(new StringField("_id", "doc" + i, Field.Store.YES));
-                doc.add(new TextField("field", "value" + i, Field.Store.YES));
-                writer.addDocument(doc);
-            }
-        }
-
-        try (IndexReader reader = DirectoryReader.open(dir)) {
-            assertEquals(100, reader.numDocs());
-
-            Terms terms = MultiTerms.getTerms(reader, "field");
-            assertNotNull(terms);
-            TermsEnum termsEnum = terms.iterator();
-
-            int count = 0;
-            BytesRef term;
-            while ((term = termsEnum.next()) != null) {
-                assertTrue(term.utf8ToString().startsWith("value"));
-                count++;
-            }
-            assertEquals(100, count);
-        }
+        try (Directory dir = new ByteBuffersDirectory()) {
+            IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
+            iwc.setCodec(criteriaBasedPostingFormat);
+
+            try (IndexWriter writer = new IndexWriter(dir, iwc)) {
+                for (int i = 0; i < 100; i++) {
+                    Document doc = new Document();
+                    doc.add(new StringField("_id", "doc" + i, Field.Store.YES));
+                    doc.add(new TextField("field", "value" + i, Field.Store.YES));
+                    writer.addDocument(doc);
+                }
+            }
+
+            try (IndexReader reader = DirectoryReader.open(dir)) {
+                assertEquals(100, reader.numDocs());
+
+                Terms terms = MultiTerms.getTerms(reader, "field");
+                assertNotNull(terms);
+                TermsEnum termsEnum = terms.iterator();
+
+                int count = 0;
+                BytesRef term;
+                while ((term = termsEnum.next()) != null) {
+                    assertTrue(term.utf8ToString().startsWith("value"));
+                    count++;
+                }
+                assertEquals(100, count);
+            }
+        }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
public void testBasicFunctionality() throws IOException {
Directory dir = new ByteBuffersDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(criteriaBasedPostingFormat);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < 100; i++) {
Document doc = new Document();
doc.add(new StringField("_id", "doc" + i, Field.Store.YES));
doc.add(new TextField("field", "value" + i, Field.Store.YES));
writer.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(dir)) {
assertEquals(100, reader.numDocs());
Terms terms = MultiTerms.getTerms(reader, "field");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
int count = 0;
BytesRef term;
while ((term = termsEnum.next()) != null) {
assertTrue(term.utf8ToString().startsWith("value"));
count++;
}
assertEquals(100, count);
}
public void testBasicFunctionality() throws IOException {
try (Directory dir = new ByteBuffersDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(criteriaBasedPostingFormat);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < 100; i++) {
Document doc = new Document();
doc.add(new StringField("_id", "doc" + i, Field.Store.YES));
doc.add(new TextField("field", "value" + i, Field.Store.YES));
writer.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(dir)) {
assertEquals(100, reader.numDocs());
Terms terms = MultiTerms.getTerms(reader, "field");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
int count = 0;
BytesRef term;
while ((term = termsEnum.next()) != null) {
assertTrue(term.utf8ToString().startsWith("value"));
count++;
}
assertEquals(100, count);
}
}
}
🤖 Prompt for AI Agents
In
`@server/src/test/java/org/opensearch/index/codec/CriteriaBasedPostingsFormatTests.java`
around lines 41 - 69, The test creates a ByteBuffersDirectory (Directory dir =
new ByteBuffersDirectory()) but never closes it; wrap the Directory in a
try-with-resources that encloses the IndexWriter and IndexReader usage in
testBasicFunctionality (and apply the same change to the other three test
methods) so the Directory is closed after the reader; ensure the IndexReader is
opened/closed inside the Directory try block (or nest the reader/writer
try-with-resources inside the Directory try-with-resources) to guarantee the
reader is closed before the Directory is closed.

}

public void testBucketAttributeIsSetOnSegment() throws IOException {
Directory dir = new ByteBuffersDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(criteriaBasedPostingFormat);

try (IndexWriter writer = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new StringField("_id", "doc1", Field.Store.YES));
doc.add(new TextField("content", "test content", Field.Store.YES));
writer.addDocument(doc);
}

SegmentInfos segmentInfos = SegmentInfos.readLatestCommit(dir);
assertFalse("Should have at least one segment", segmentInfos.asList().isEmpty());

for (SegmentCommitInfo segmentCommitInfo : segmentInfos) {
String bucketValue = segmentCommitInfo.info.getAttribute(CriteriaBasedPostingsFormat.BUCKET_NAME);
assertEquals("Bucket attribute should be set", TEST_BUCKET, bucketValue);
}
}

public void testNullBucketSetsPlaceholder() throws IOException {
Directory dir = new ByteBuffersDirectory();
Codec nullBucketCodec = TestUtil.alwaysPostingsFormat(new CriteriaBasedPostingsFormat(TestUtil.getDefaultPostingsFormat(), null));

IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(nullBucketCodec);

try (IndexWriter writer = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new StringField("_id", "doc1", Field.Store.YES));
doc.add(new TextField("content", "test content", Field.Store.YES));
writer.addDocument(doc);
}

SegmentInfos segmentInfos = SegmentInfos.readLatestCommit(dir);
for (SegmentCommitInfo segmentCommitInfo : segmentInfos) {
String bucketValue = segmentCommitInfo.info.getAttribute(CriteriaBasedPostingsFormat.BUCKET_NAME);
assertEquals("Placeholder bucket should be set for null bucket", "-2", bucketValue);
}
}

public void testEmptyIndex() throws IOException {
Directory dir = new ByteBuffersDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
iwc.setCodec(criteriaBasedPostingFormat);

try (IndexWriter writer = new IndexWriter(dir, iwc)) {
// Don't add any documents
}

try (IndexReader reader = DirectoryReader.open(dir)) {
assertEquals(0, reader.numDocs());
Terms terms = MultiTerms.getTerms(reader, "_id");
assertNull("Terms should be null for empty index", terms);
}
}

@Override
protected Codec getCodec() {
return criteriaBasedPostingFormat;
}
}
Loading
Loading