Skip to content

Commit c5ada66

Browse files
authored
Copy Lucene99FlatVectorsReader allowing direct IO to be specified directly (#125921)
We want to use DirectIO to access raw vector data randomly so it doesn't load everything into the page cache
1 parent f599fe3 commit c5ada66

File tree

9 files changed

+513
-5
lines changed

9 files changed

+513
-5
lines changed

libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,10 @@ private static PolicyManager createPolicyManager() {
279279
new FilesEntitlement(List.of(FileData.ofBaseDirPath(CONFIG, READ), FileData.ofBaseDirPath(DATA, READ_WRITE)))
280280
)
281281
),
282-
new Scope("org.apache.lucene.misc", List.of(new FilesEntitlement(List.of(FileData.ofBaseDirPath(DATA, READ_WRITE))))),
282+
new Scope(
283+
"org.apache.lucene.misc",
284+
List.of(new FilesEntitlement(List.of(FileData.ofBaseDirPath(DATA, READ_WRITE))), new ReadStoreAttributesEntitlement())
285+
),
283286
new Scope(
284287
"org.apache.logging.log4j.core",
285288
List.of(new ManageThreadsEntitlement(), new FilesEntitlement(List.of(FileData.ofBaseDirPath(LOGS, READ_WRITE))))
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.es818;
11+
12+
import org.apache.lucene.store.IOContext;
13+
import org.apache.lucene.store.IndexInput;
14+
15+
import java.io.IOException;
16+
17+
/**
18+
* A hook for {@link DirectIOLucene99FlatVectorsReader} to specify the input should be opened using DirectIO.
19+
* Remove when IOContext allows more extensible payloads to be specified.
20+
*/
21+
public interface DirectIOIndexInputSupplier {
22+
IndexInput openInputDirect(String name, IOContext context) throws IOException;
23+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* @notice
3+
* Licensed to the Apache Software Foundation (ASF) under one or more
4+
* contributor license agreements. See the NOTICE file distributed with
5+
* this work for additional information regarding copyright ownership.
6+
* The ASF licenses this file to You under the Apache License, Version 2.0
7+
* (the "License"); you may not use this file except in compliance with
8+
* the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Modifications copyright (C) 2024 Elasticsearch B.V.
19+
*/
20+
package org.elasticsearch.index.codec.vectors.es818;
21+
22+
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
23+
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
24+
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
25+
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
26+
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter;
27+
import org.apache.lucene.index.SegmentReadState;
28+
import org.apache.lucene.index.SegmentWriteState;
29+
30+
import java.io.IOException;
31+
32+
/**
33+
* Copied from Lucene99FlatVectorsFormat in Lucene 10.1
34+
*
35+
* This is copied to change the implementation of {@link #fieldsReader} only.
36+
* The codec format itself is not changed, so we keep the original {@link #NAME}
37+
*/
38+
public class DirectIOLucene99FlatVectorsFormat extends FlatVectorsFormat {
39+
40+
static final String NAME = "Lucene99FlatVectorsFormat";
41+
static final String META_CODEC_NAME = "Lucene99FlatVectorsFormatMeta";
42+
static final String VECTOR_DATA_CODEC_NAME = "Lucene99FlatVectorsFormatData";
43+
static final String META_EXTENSION = "vemf";
44+
static final String VECTOR_DATA_EXTENSION = "vec";
45+
46+
public static final int VERSION_START = 0;
47+
public static final int VERSION_CURRENT = VERSION_START;
48+
49+
static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
50+
private final FlatVectorsScorer vectorsScorer;
51+
52+
/** Constructs a format */
53+
public DirectIOLucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) {
54+
super(NAME);
55+
this.vectorsScorer = vectorsScorer;
56+
}
57+
58+
@Override
59+
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
60+
return new Lucene99FlatVectorsWriter(state, vectorsScorer);
61+
}
62+
63+
@Override
64+
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
65+
return new DirectIOLucene99FlatVectorsReader(state, vectorsScorer);
66+
}
67+
68+
@Override
69+
public String toString() {
70+
return "ES818FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')';
71+
}
72+
}

0 commit comments

Comments
 (0)