Skip to content

Commit 7a32b26

Browse files
📝 Add docstrings to handle-file
Docstrings generation was requested by @xuxiong1. * #20157 (comment) The following files were modified: * `modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java`
1 parent 2b6d266 commit 7a32b26

File tree

1 file changed

+71
-14
lines changed

1 file changed

+71
-14
lines changed

modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.apache.logging.log4j.LogManager;
1212
import org.apache.logging.log4j.Logger;
13+
import org.apache.lucene.codecs.CodecUtil;
1314
import org.apache.lucene.index.IndexCommit;
1415
import org.apache.lucene.index.IndexFileNames;
1516
import org.apache.lucene.index.SegmentInfos;
@@ -19,6 +20,7 @@
1920
import org.apache.lucene.store.IOContext;
2021
import org.apache.lucene.store.IndexInput;
2122
import org.apache.lucene.store.IndexOutput;
23+
import org.apache.lucene.util.Version;
2224
import org.opensearch.common.lucene.Lucene;
2325
import org.opensearch.core.index.shard.ShardId;
2426
import org.opensearch.env.ShardLock;
@@ -111,6 +113,15 @@ public SubdirectoryAwareStore(
111113
);
112114
}
113115

116+
/**
117+
* Build a MetadataSnapshot that includes file metadata and user data from the commit and from files located in shard subdirectories.
118+
*
119+
* Aggregates metadata read from the commit's SegmentInfos and augments it with metadata discovered in subdirectories; the snapshot's document count includes documents found in subdirectory segment files.
120+
*
121+
* @param commit the Lucene index commit to read metadata from
122+
* @return a MetadataSnapshot containing an immutable map of file metadata, an immutable map of commit user data, and the total document count across root and subdirectory files
123+
* @throws IOException if reading segment information or subdirectory files fails
124+
*/
114125
@Override
115126
public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException {
116127
long totalNumDocs = 0;
@@ -122,31 +133,49 @@ public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException {
122133
Map<String, String> commitUserDataBuilder = new HashMap<>(regularMetadata.userData);
123134
totalNumDocs += regularMetadata.numDocs;
124135

125-
// Load subdirectory files metadata from segments_N files in subdirectories
126-
totalNumDocs += this.loadSubdirectoryMetadataFromSegments(commit, builder);
136+
// Load subdirectory files metadata (both segment files and non-segment files like custom metadata file)
137+
totalNumDocs += this.loadSubdirectoryMetadata(commit, builder);
127138

128139
return new MetadataSnapshot(Collections.unmodifiableMap(builder), Collections.unmodifiableMap(commitUserDataBuilder), totalNumDocs);
129140
}
130141

131142
/**
132-
* Load subdirectory file metadata by reading segments_N files from any subdirectories.
133-
* This leverages the same approach as Store.loadMetadata but for files in subdirectories.
143+
* Load metadata for files located in shard subdirectories, reading segment files (segments_*)
144+
* to collect per-segment document counts and computing metadata for other subdirectory files when missing.
134145
*
135-
* @return the total number of documents in all subdirectory segments
146+
* @param commit the index commit whose file list may include subdirectory paths
147+
* @param builder a mutable map to populate with discovered StoreFileMetadata keyed by file path
148+
* @return the total number of documents contained in all discovered subdirectory segment files
149+
* @throws IOException if reading subdirectory segment or file contents fails
136150
*/
137-
private long loadSubdirectoryMetadataFromSegments(IndexCommit commit, Map<String, StoreFileMetadata> builder) throws IOException {
138-
// Find all segments_N files in subdirectories from the commit
139-
Set<String> subdirectorySegmentFiles = new HashSet<>();
151+
private long loadSubdirectoryMetadata(IndexCommit commit, Map<String, StoreFileMetadata> builder) throws IOException {
152+
// Categorize subdirectory files into segment info files (segments_N) and non-segment-info files
153+
Set<String> subdirectorySegmentInfoFiles = new HashSet<>();
154+
Set<String> subdirectoryNonSegmentInfoFiles = new HashSet<>();
155+
140156
for (String fileName : commit.getFileNames()) {
141-
if (Path.of(fileName).getParent() != null && fileName.contains(IndexFileNames.SEGMENTS)) {
142-
subdirectorySegmentFiles.add(fileName);
157+
Path filePath = Path.of(fileName);
158+
// Only process subdirectory files (files with a parent path)
159+
if (filePath.getParent() != null) {
160+
if (fileName.contains(IndexFileNames.SEGMENTS)) {
161+
subdirectorySegmentInfoFiles.add(fileName);
162+
} else {
163+
subdirectoryNonSegmentInfoFiles.add(fileName);
164+
}
143165
}
144166
}
145167

146168
long totalSubdirectoryNumDocs = 0;
147169
// Process each subdirectory segments_N file
148-
for (String segmentsFilePath : subdirectorySegmentFiles) {
149-
totalSubdirectoryNumDocs += this.loadMetadataFromSubdirectorySegmentsFile(segmentsFilePath, builder);
170+
for (String segmentInfoFilePath : subdirectorySegmentInfoFiles) {
171+
totalSubdirectoryNumDocs += this.loadMetadataFromSubdirectorySegmentsFile(segmentInfoFilePath, builder);
172+
}
173+
174+
// Process non-segment files that weren't loaded by segmentInfo
175+
for (String nonSegmentInfoFile : subdirectoryNonSegmentInfoFiles) {
176+
if (!builder.containsKey(nonSegmentInfoFile)) {
177+
computeFileMetadata(nonSegmentInfoFile, builder);
178+
}
150179
}
151180

152181
return totalSubdirectoryNumDocs;
@@ -183,7 +212,15 @@ private long loadMetadataFromSubdirectorySegmentsFile(String segmentsFilePath, M
183212
}
184213

185214
/**
186-
* Load metadata from SegmentInfos by reusing Store.MetadataSnapshot.loadMetadata
215+
* Load file metadata from the given SegmentInfos and insert entries into the provided builder
216+
* with each file name prefixed by the given path prefix.
217+
*
218+
* @param segmentInfos the SegmentInfos to read metadata from
219+
* @param directory the Directory that contains the segment files
220+
* @param builder a map into which prefixed StoreFileMetadata entries will be inserted; existing
221+
* entries with the same prefixed name will be overwritten
222+
* @param pathPrefix the relative path prefix to prepend to each file name when creating map keys
223+
* @throws IOException if reading segment metadata from the directory fails
187224
*/
188225
private static void loadMetadataFromSegmentInfos(
189226
SegmentInfos segmentInfos,
@@ -213,6 +250,26 @@ private static void loadMetadataFromSegmentInfos(
213250
}
214251
}
215252

253+
/**
254+
* Computes metadata for a single file under the shard data path and inserts it into the provided builder map.
255+
*
256+
* @param fileName the file path relative to the shard data path
257+
* @param builder map to receive the computed StoreFileMetadata keyed by the relative file path
258+
* @throws IOException if reading the file fails
259+
*/
260+
private void computeFileMetadata(String fileName, Map<String, StoreFileMetadata> builder) throws IOException {
261+
Path filePath = shardPath().getDataPath().resolve(fileName);
262+
try (Directory dir = FSDirectory.open(filePath.getParent())) {
263+
String localFileName = filePath.getFileName().toString();
264+
try (IndexInput in = dir.openInput(localFileName, IOContext.READONCE)) {
265+
long length = in.length();
266+
String checksum = Store.digestToString(CodecUtil.checksumEntireFile(in));
267+
Version version = org.opensearch.Version.CURRENT.minimumIndexCompatibilityVersion().luceneVersion;
268+
builder.put(fileName, new StoreFileMetadata(fileName, length, checksum, version, null));
269+
}
270+
}
271+
}
272+
216273
/**
217274
* A Lucene Directory implementation that handles files in subdirectories.
218275
*
@@ -320,4 +377,4 @@ private String parseFilePath(String fileName) {
320377
}
321378
}
322379
}
323-
}
380+
}

0 commit comments

Comments
 (0)