-
Notifications
You must be signed in to change notification settings - Fork 25.5k
[GPU] Support for performance profiling #136021
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
c20fa4f
c5224b8
1bc1b68
9eee2a6
f034d79
5f0f4b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,7 @@ build/ | |
**/.local* | ||
.vagrant/ | ||
/logs/ | ||
**/target/ | ||
|
||
# osx stuff | ||
.DS_Store | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,13 +51,27 @@ tasks.register("checkVec", JavaExec) { | |
systemProperty "es.logger.out", "console" | ||
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed | ||
systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString()) | ||
jvmArgs '-Xms4g', '-Xmx4g', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError' | ||
jvmArgs '-Xms16g', '-Xmx16g', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError' | ||
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) { | ||
jvmArgs '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED' | ||
} | ||
if (System.getenv("DO_PROFILING") != null) { | ||
jvmArgs '-XX:StartFlightRecording=dumponexit=true,maxsize=250M,filename=knn.jfr,settings=profile.jfc' | ||
} | ||
def asyncProfilerPath = System.getProperty("asyncProfiler.path", null) | ||
if (asyncProfilerPath != null) { | ||
if (OS.current().equals(OS.MAC)) { | ||
def asyncProfilerAgent = "${asyncProfilerPath}/lib/libasyncProfiler.dylib" | ||
println "Using async-profiler agent ${asyncProfilerAgent}" | ||
jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=cpu,interval=10ms,file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr" | ||
} else if (OS.current().equals(OS.LINUX)) { | ||
def asyncProfilerAgent = "${asyncProfilerPath}/lib/libasyncProfiler.so" | ||
println "Using async-profiler agent ${asyncProfilerAgent}" | ||
jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=cpu,interval=10ms,wall=50ms,file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr" | ||
} else { | ||
println "Ignoring 'asyncProfiler.path': not available on ${OS.current()}"; | ||
} | ||
} | ||
Comment on lines
+62
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am cool with this. However, why don't we add wall to MAC as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried it and had to back off. Looking at the error I got and at the async-profiler code, apparently only Linux has an implementation that uses perf events, which let you record both cpu time and wall time at the same time. On Mac, the engine behind is less flexible/precise, and you can have one or the other. I'm wondering: maybe I should add an option of that, like adding a |
||
if (buildParams.getIsRuntimeJavaHomeSet()) { | ||
executable = "${buildParams.runtimeJavaHome.get()}/bin/java" + (OS.current() == OS.WINDOWS ? '.exe' : '') | ||
} else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
import org.apache.lucene.index.ConcurrentMergeScheduler; | ||
import org.apache.lucene.index.IndexWriter; | ||
import org.apache.lucene.index.IndexWriterConfig; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.index.MergePolicy; | ||
import org.apache.lucene.index.VectorEncoding; | ||
import org.apache.lucene.index.VectorSimilarityFunction; | ||
|
@@ -65,7 +66,6 @@ | |
import static org.elasticsearch.test.knn.KnnIndexTester.logger; | ||
|
||
class KnnIndexer { | ||
private static final double WRITER_BUFFER_MB = 128; | ||
static final String ID_FIELD = "id"; | ||
static final String VECTOR_FIELD = "vector"; | ||
|
||
|
@@ -78,6 +78,7 @@ class KnnIndexer { | |
private final int numDocs; | ||
private final int numIndexThreads; | ||
private final MergePolicy mergePolicy; | ||
private final double writerBufferSizeInMb; | ||
|
||
KnnIndexer( | ||
List<Path> docsPath, | ||
|
@@ -88,7 +89,8 @@ class KnnIndexer { | |
int dim, | ||
VectorSimilarityFunction similarityFunction, | ||
int numDocs, | ||
MergePolicy mergePolicy | ||
MergePolicy mergePolicy, | ||
double writerBufferSizeInMb | ||
) { | ||
this.docsPath = docsPath; | ||
this.indexPath = indexPath; | ||
|
@@ -99,12 +101,14 @@ class KnnIndexer { | |
this.similarityFunction = similarityFunction; | ||
this.numDocs = numDocs; | ||
this.mergePolicy = mergePolicy; | ||
this.writerBufferSizeInMb = writerBufferSizeInMb; | ||
} | ||
|
||
void createIndex(KnnIndexTester.Results result) throws IOException, InterruptedException, ExecutionException { | ||
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE); | ||
iwc.setCodec(codec); | ||
iwc.setRAMBufferSizeMB(WRITER_BUFFER_MB); | ||
iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); | ||
iwc.setRAMBufferSizeMB(writerBufferSizeInMb); | ||
Comment on lines
+110
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to be careful, we should by default benchmark with ES defaults. Optimizing our benchmarks but not our production code can give a false sense of improvement There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
iwc.setUseCompoundFile(false); | ||
if (mergePolicy != null) { | ||
iwc.setMergePolicy(mergePolicy); | ||
|
@@ -248,6 +252,9 @@ static class IndexerThread extends Thread { | |
private final float[] floatVectorBuffer; | ||
private final VectorReader in; | ||
|
||
long readTime; | ||
long docAddTime; | ||
|
||
private IndexerThread( | ||
IndexWriter iw, | ||
VectorReader in, | ||
|
@@ -283,6 +290,7 @@ public void run() { | |
} catch (IOException ioe) { | ||
throw new UncheckedIOException(ioe); | ||
} | ||
logger.debug("Index thread times: [{}s] read, [{}s] add doc", readTime / 1e-9, docAddTime / 1e-9); | ||
ldematte marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
|
||
private void _run() throws IOException { | ||
|
@@ -294,23 +302,32 @@ private void _run() throws IOException { | |
continue; | ||
} | ||
|
||
Document doc = new Document(); | ||
var startRead = System.nanoTime(); | ||
final IndexableField field; | ||
switch (vectorEncoding) { | ||
case BYTE -> { | ||
in.next(byteVectorBuffer); | ||
doc.add(new KnnByteVectorField(VECTOR_FIELD, byteVectorBuffer, fieldType)); | ||
field = new KnnByteVectorField(VECTOR_FIELD, byteVectorBuffer, fieldType); | ||
} | ||
case FLOAT32 -> { | ||
in.next(floatVectorBuffer); | ||
doc.add(new KnnFloatVectorField(VECTOR_FIELD, floatVectorBuffer, fieldType)); | ||
field = new KnnFloatVectorField(VECTOR_FIELD, floatVectorBuffer, fieldType); | ||
} | ||
default -> throw new UnsupportedOperationException(); | ||
} | ||
long endRead = System.nanoTime(); | ||
readTime += (endRead - startRead); | ||
|
||
Document doc = new Document(); | ||
doc.add(field); | ||
|
||
if ((id + 1) % 25000 == 0) { | ||
logger.debug("Done indexing " + (id + 1) + " documents."); | ||
} | ||
doc.add(new StoredField(ID_FIELD, id)); | ||
iw.addDocument(doc); | ||
|
||
docAddTime += (System.nanoTime() - endRead); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am uncertain about this part about async profiler, but I trust your expertise on this.