diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index 3916afe77caf9..44c43cf3a93fb 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -71,6 +71,7 @@ final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter { private static final Logger logger = LogManager.getLogger(ES92GpuHnswVectorsWriter.class); private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ES92GpuHnswVectorsWriter.class); private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + private static final long DIRECT_COPY_THRESHOLD_IN_BYTES = 128 * 1024 * 1024; // 128MB private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; @@ -191,10 +192,14 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO // Will not be indexed on the GPU flushFieldWithMockGraph(fieldInfo, numVectors, sortMap); } else { - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); - try { + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT) + ) + ) { var builder = CuVSMatrix.deviceBuilder( - cuVSResources, + resourcesHolder.resources(), numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT @@ -203,10 +208,8 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO builder.addVector(vector); } try (var dataset = builder.build()) { - flushFieldWithGpuGraph(cuVSResources, fieldInfo, dataset, sortMap); + flushFieldWithGpuGraph(resourcesHolder, fieldInfo, dataset, sortMap); } - } finally { - cuVSResourceManager.release(cuVSResources); } } var elapsed = started - System.nanoTime(); @@ -223,17 +226,13 @@ private void flushFieldWithMockGraph(FieldInfo fieldInfo, int numVectors, Sorter } } - private void flushFieldWithGpuGraph( - CuVSResourceManager.ManagedCuVSResources resources, - FieldInfo fieldInfo, - CuVSMatrix dataset, - Sorter.DocMap sortMap - ) throws IOException { + private void flushFieldWithGpuGraph(ResourcesHolder resourcesHolder, FieldInfo fieldInfo, CuVSMatrix dataset, Sorter.DocMap sortMap) + throws IOException { if (sortMap == null) { - generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } else { // TODO: use sortMap - generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } } @@ -265,20 +264,25 @@ public long ramBytesUsed() { return total; } - private void generateGpuGraphAndWriteMeta( - CuVSResourceManager.ManagedCuVSResources cuVSResources, - FieldInfo fieldInfo, - CuVSMatrix dataset - ) throws IOException { + private void generateGpuGraphAndWriteMeta(ResourcesHolder resourcesHolder, FieldInfo fieldInfo, CuVSMatrix dataset) throws IOException { try { assert dataset.size() >= MIN_NUM_VECTORS_FOR_GPU_BUILD; long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; final HnswGraph graph; - try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + try (var index = buildGPUIndex(resourcesHolder.resources(), fieldInfo.getVectorSimilarityFunction(), dataset)) { assert index != null : "GPU index should be built for field: " + fieldInfo.name; - graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + var deviceGraph = index.getGraph(); + var graphSize = deviceGraph.size() * deviceGraph.columns() * Integer.BYTES; + if (graphSize < DIRECT_COPY_THRESHOLD_IN_BYTES) { + try (var hostGraph = deviceGraph.toHost()) { + resourcesHolder.close(); + graph = writeGraph(hostGraph, graphLevelNodeOffsets); + } + } else { + graph = writeGraph(deviceGraph, graphLevelNodeOffsets); + } } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, (int) dataset.size(), graph, graphLevelNodeOffsets); @@ -479,25 +483,35 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { // Direct access to mmapped file - final var dataset = DatasetUtils.getInstance() - .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); - - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); - try { - generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); - } finally { - dataset.close(); - cuVSResourceManager.release(cuVSResources); + + try ( + var dataset = DatasetUtils.getInstance() + .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } } else { logger.debug( () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" ); - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); - try { + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { // Read vector-by-vector - var builder = CuVSMatrix.deviceBuilder(cuVSResources, numVectors, fieldInfo.getVectorDimension(), dataType); + var builder = CuVSMatrix.deviceBuilder( + resourcesHolder.resources(), + numVectors, + fieldInfo.getVectorDimension(), + dataType + ); // During merging, we use quantized data, so we need to support byte[] too. // That's how our current formats work: use floats during indexing, and quantized data to build a graph @@ -517,10 +531,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } try (var dataset = builder.build()) { - generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } - } finally { - cuVSResourceManager.release(cuVSResources); } } } else { diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java new file mode 100644 index 0000000000000..7517cb04713aa --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +/** + * Holds an acquired resource, allows to manually release it while ensuring it gets released (closed) via the {@link AutoCloseable} + * pattern. + */ +class ResourcesHolder implements AutoCloseable { + + private final CuVSResourceManager resourceManager; + private CuVSResourceManager.ManagedCuVSResources managedResource; + + ResourcesHolder(CuVSResourceManager cuVSResourceManager, CuVSResourceManager.ManagedCuVSResources managedResource) { + this.resourceManager = cuVSResourceManager; + this.managedResource = managedResource; + } + + CuVSResourceManager.ManagedCuVSResources resources() { + return managedResource; + } + + void release() { + if (managedResource != null) { + var toRelease = managedResource; + managedResource = null; + resourceManager.release(toRelease); + } + } + + @Override + public void close() { + release(); + } +}