Skip to content

Commit f255541

Browse files
committed
Performance optimizations for getitem and setitem
1 parent 6231a21 commit f255541

File tree

1 file changed

+148
-40
lines changed
  • graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview

1 file changed

+148
-40
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview/MemoryViewNodes.java

Lines changed: 148 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import com.oracle.graal.python.runtime.object.PythonObjectFactory;
7171
import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage;
7272
import com.oracle.truffle.api.CompilerDirectives;
73+
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
7374
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
7475
import com.oracle.truffle.api.CompilerDirectives.ValueType;
7576
import com.oracle.truffle.api.dsl.Cached;
@@ -84,6 +85,7 @@
8485
import com.oracle.truffle.api.interop.UnsupportedMessageException;
8586
import com.oracle.truffle.api.interop.UnsupportedTypeException;
8687
import com.oracle.truffle.api.library.CachedLibrary;
88+
import com.oracle.truffle.api.nodes.ExplodeLoop;
8789
import com.oracle.truffle.api.nodes.Node;
8890
import com.oracle.truffle.api.profiles.ConditionProfile;
8991

@@ -480,14 +482,15 @@ static void doManaged(byte[] src, int srcOffset, int len, PMemoryView self, @Sup
480482
abstract static class ReadItemAtNode extends Node {
481483
public abstract Object execute(PMemoryView self, Object ptr, int offset);
482484

483-
@Specialization(guards = "ptr != null")
484-
static Object doNative(PMemoryView self, Object ptr, int offset,
485+
@Specialization(guards = {"ptr != null", "cachedItemSize == self.getItemSize()", "cachedItemSize <= 8"})
486+
@ExplodeLoop
487+
static Object doNativeCached(PMemoryView self, Object ptr, int offset,
488+
@Cached("self.getItemSize()") int cachedItemSize,
485489
@CachedLibrary(limit = "1") InteropLibrary lib,
486490
@Cached UnpackValueNode unpackValueNode) {
487-
int itemsize = self.getItemSize();
488-
byte[] bytes = new byte[itemsize];
491+
byte[] bytes = new byte[cachedItemSize];
489492
try {
490-
for (int i = 0; i < itemsize; i++) {
493+
for (int i = 0; i < cachedItemSize; i++) {
491494
bytes[i] = (byte) lib.readArrayElement(ptr, offset + i);
492495
}
493496
} catch (UnsupportedMessageException | InvalidArrayIndexException e) {
@@ -496,14 +499,46 @@ static Object doNative(PMemoryView self, Object ptr, int offset,
496499
return unpackValueNode.execute(self.getFormat(), self.getFormatString(), bytes);
497500
}
498501

499-
@Specialization(guards = "ptr == null")
500-
static Object doManaged(PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset,
502+
@Specialization(guards = "ptr != null", replaces = "doNativeCached")
503+
static Object doNativeGeneric(PMemoryView self, Object ptr, int offset,
504+
@CachedLibrary(limit = "1") InteropLibrary lib,
505+
@Cached UnpackValueNode unpackValueNode) {
506+
int itemSize = self.getItemSize();
507+
byte[] bytes = new byte[itemSize];
508+
try {
509+
for (int i = 0; i < itemSize; i++) {
510+
bytes[i] = (byte) lib.readArrayElement(ptr, offset + i);
511+
}
512+
} catch (UnsupportedMessageException | InvalidArrayIndexException e) {
513+
throw CompilerDirectives.shouldNotReachHere("native buffer read failed");
514+
}
515+
return unpackValueNode.execute(self.getFormat(), self.getFormatString(), bytes);
516+
}
517+
518+
@Specialization(guards = {"ptr == null", "cachedItemSize == self.getItemSize()", "cachedItemSize <= 8"})
519+
@ExplodeLoop
520+
static Object doManagedCached(PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset,
521+
@Cached("self.getItemSize()") int cachedItemSize,
501522
@Cached SequenceNodes.GetSequenceStorageNode getStorageNode,
502523
@Cached SequenceStorageNodes.GetItemScalarNode getItemNode,
503524
@Cached UnpackValueNode unpackValueNode) {
504525
// TODO assumes byte storage
505-
byte[] bytes = new byte[self.getItemSize()];
506-
for (int i = 0; i < self.getItemSize(); i++) {
526+
byte[] bytes = new byte[cachedItemSize];
527+
for (int i = 0; i < cachedItemSize; i++) {
528+
bytes[i] = (byte) getItemNode.executeInt(getStorageNode.execute(self.getOwner()), offset + i);
529+
}
530+
return unpackValueNode.execute(self.getFormat(), self.getFormatString(), bytes);
531+
}
532+
533+
@Specialization(guards = "ptr == null", replaces = "doManagedCached")
534+
static Object doManagedGeneric(PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset,
535+
@Cached SequenceNodes.GetSequenceStorageNode getStorageNode,
536+
@Cached SequenceStorageNodes.GetItemScalarNode getItemNode,
537+
@Cached UnpackValueNode unpackValueNode) {
538+
// TODO assumes byte storage
539+
int itemSize = self.getItemSize();
540+
byte[] bytes = new byte[itemSize];
541+
for (int i = 0; i < itemSize; i++) {
507542
bytes[i] = (byte) getItemNode.executeInt(getStorageNode.execute(self.getOwner()), offset + i);
508543
}
509544
return unpackValueNode.execute(self.getFormat(), self.getFormatString(), bytes);
@@ -513,31 +548,64 @@ static Object doManaged(PMemoryView self, @SuppressWarnings("unused") Object ptr
513548
abstract static class WriteItemAtNode extends Node {
514549
public abstract void execute(VirtualFrame frame, PMemoryView self, Object ptr, int offset, Object object);
515550

516-
@Specialization(guards = "ptr != null")
517-
static void doNative(VirtualFrame frame, PMemoryView self, Object ptr, int offset, Object object,
551+
@Specialization(guards = {"ptr != null", "cachedItemSize == self.getItemSize()", "cachedItemSize <= 8"})
552+
@ExplodeLoop
553+
static void doNativeCached(VirtualFrame frame, PMemoryView self, Object ptr, int offset, Object object,
554+
@Cached("self.getItemSize()") int cachedItemSize,
518555
@CachedLibrary(limit = "1") InteropLibrary lib,
519556
@Cached PackValueNode packValueNode) {
520-
int itemsize = self.getItemSize();
521-
byte[] bytes = new byte[itemsize];
557+
byte[] bytes = new byte[cachedItemSize];
522558
packValueNode.execute(frame, self.getFormat(), self.getFormatString(), object, bytes);
523559
try {
524-
for (int i = 0; i < itemsize; i++) {
560+
for (int i = 0; i < cachedItemSize; i++) {
525561
lib.writeArrayElement(ptr, offset + i, bytes[i]);
526562
}
527563
} catch (UnsupportedMessageException | InvalidArrayIndexException | UnsupportedTypeException e) {
528564
throw CompilerDirectives.shouldNotReachHere("native buffer read failed");
529565
}
530566
}
531567

532-
@Specialization(guards = "ptr == null")
533-
static void doManaged(VirtualFrame frame, PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset, Object object,
568+
@Specialization(guards = "ptr != null", replaces = "doNativeCached")
569+
static void doNativeGeneric(VirtualFrame frame, PMemoryView self, Object ptr, int offset, Object object,
570+
@CachedLibrary(limit = "1") InteropLibrary lib,
571+
@Cached PackValueNode packValueNode) {
572+
int itemSize = self.getItemSize();
573+
byte[] bytes = new byte[itemSize];
574+
packValueNode.execute(frame, self.getFormat(), self.getFormatString(), object, bytes);
575+
try {
576+
for (int i = 0; i < itemSize; i++) {
577+
lib.writeArrayElement(ptr, offset + i, bytes[i]);
578+
}
579+
} catch (UnsupportedMessageException | InvalidArrayIndexException | UnsupportedTypeException e) {
580+
throw CompilerDirectives.shouldNotReachHere("native buffer read failed");
581+
}
582+
}
583+
584+
@Specialization(guards = {"ptr == null", "cachedItemSize == self.getItemSize()", "cachedItemSize <= 8"})
585+
@ExplodeLoop
586+
static void doManagedCached(VirtualFrame frame, PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset, Object object,
587+
@Cached("self.getItemSize()") int cachedItemSize,
534588
@Cached PackValueNode packValueNode,
535589
@Cached SequenceNodes.GetSequenceStorageNode getStorageNode,
536590
@Cached SequenceStorageNodes.SetItemScalarNode setItemNode) {
537591
// TODO assumes bytes storage
538-
byte[] bytes = new byte[self.getItemSize()];
592+
byte[] bytes = new byte[cachedItemSize];
539593
packValueNode.execute(frame, self.getFormat(), self.getFormatString(), object, bytes);
540-
for (int i = 0; i < self.getItemSize(); i++) {
594+
for (int i = 0; i < cachedItemSize; i++) {
595+
setItemNode.execute(getStorageNode.execute(self.getOwner()), offset + i, bytes[i]);
596+
}
597+
}
598+
599+
@Specialization(guards = "ptr == null", replaces = "doManagedCached")
600+
static void doManagedGeneric(VirtualFrame frame, PMemoryView self, @SuppressWarnings("unused") Object ptr, int offset, Object object,
601+
@Cached PackValueNode packValueNode,
602+
@Cached SequenceNodes.GetSequenceStorageNode getStorageNode,
603+
@Cached SequenceStorageNodes.SetItemScalarNode setItemNode) {
604+
// TODO assumes bytes storage
605+
int itemSize = self.getItemSize();
606+
byte[] bytes = new byte[itemSize];
607+
packValueNode.execute(frame, self.getFormat(), self.getFormatString(), object, bytes);
608+
for (int i = 0; i < itemSize; i++) {
541609
setItemNode.execute(getStorageNode.execute(self.getOwner()), offset + i, bytes[i]);
542610
}
543611
}
@@ -558,6 +626,8 @@ public MemoryPointer(Object ptr, int offset) {
558626
abstract static class PointerLookupNode extends Node {
559627
@Child private PRaiseNode raiseNode;
560628
@Child private CExtNodes.PCallCapiFunction callCapiFunction;
629+
@Child private PythonObjectLibrary indexLib;
630+
@CompilationFinal private ConditionProfile hasSuboffsetsProfile;
561631

562632
// index can be a tuple, int or int-convertible
563633
public abstract MemoryPointer execute(VirtualFrame frame, PMemoryView self, Object index);
@@ -572,75 +642,113 @@ private void lookupDimension(PMemoryView self, MemoryPointer ptr, int dim, int i
572642
index += nitems;
573643
}
574644
if (index < 0 || index >= nitems) {
575-
throw raise(IndexError, ErrorMessages.INDEX_OUT_OF_BOUNDS_ON_DIMENSION_D, dim + 1);
645+
throw raise(IndexError, ErrorMessages.INDEX_OUT_OF_BOUNDS_ON_DIMENSION_D, dim);
576646
}
577647

578648
ptr.offset += self.getBufferStrides()[dim] * index;
579649

580650
int[] suboffsets = self.getBufferSuboffsets();
581-
if (suboffsets != null && suboffsets[dim] >= 0) {
651+
if (getHasSuboffsetsProfile().profile(suboffsets != null) && suboffsets[dim] >= 0) {
582652
// The length may be out of bounds, but sulong shouldn't care if we don't
583653
// access the out-of-bound part
584654
ptr.ptr = getCallCapiFunction().call(NativeCAPISymbols.FUN_TRUFFLE_ADD_SUBOFFSET, ptr.ptr, ptr.offset, suboffsets[dim], self.getLength());
585655
ptr.offset = 0;
586656
}
587657
}
588658

589-
@Specialization
659+
@Specialization(guards = "self.getDimensions() == 1")
590660
MemoryPointer resolveInt(PMemoryView self, int index) {
591-
if (self.getDimensions() > 1) {
592-
// CPython doesn't implement this either, as of 3.8
593-
throw raise(NotImplementedError, ErrorMessages.MULTI_DIMENSIONAL_SUB_VIEWS_NOT_IMPLEMENTED);
594-
} else if (self.getDimensions() == 0) {
661+
MemoryPointer ptr = new MemoryPointer(self.getBufferPointer(), self.getOffset());
662+
lookupDimension(self, ptr, 0, index);
663+
return ptr;
664+
}
665+
666+
@Specialization(guards = "self.getDimensions() != 1")
667+
MemoryPointer resolveIntError(PMemoryView self, @SuppressWarnings("unused") int index) {
668+
if (self.getDimensions() == 0) {
595669
throw raise(TypeError, ErrorMessages.INVALID_INDEXING_OF_0_DIM_MEMORY);
596670
}
671+
// CPython doesn't implement this either, as of 3.8
672+
throw raise(NotImplementedError, ErrorMessages.MULTI_DIMENSIONAL_SUB_VIEWS_NOT_IMPLEMENTED);
673+
}
674+
675+
@Specialization(guards = {"cachedDimensions == self.getDimensions()", "cachedDimensions <= 8"})
676+
@ExplodeLoop
677+
MemoryPointer resolveTupleCached(PMemoryView self, PTuple indices,
678+
@Cached("self.getDimensions()") int cachedDimensions,
679+
@Cached SequenceNodes.GetSequenceStorageNode getSequenceStorageNode,
680+
@Cached SequenceStorageNodes.LenNode lenNode,
681+
@Cached SequenceStorageNodes.GetItemScalarNode getItemNode) {
682+
SequenceStorage indicesStorage = getSequenceStorageNode.execute(indices);
683+
checkTupleLength(lenNode, indicesStorage, cachedDimensions);
597684
MemoryPointer ptr = new MemoryPointer(self.getBufferPointer(), self.getOffset());
598-
lookupDimension(self, ptr, 0, index);
685+
for (int dim = 0; dim < cachedDimensions; dim++) {
686+
Object indexObj = getItemNode.execute(indicesStorage, dim);
687+
int index = convertIndex(indexObj);
688+
lookupDimension(self, ptr, dim, index);
689+
}
599690
return ptr;
600691
}
601692

602-
// TODO explode loop
603-
@Specialization
604-
MemoryPointer resolveTuple(PMemoryView self, PTuple indices,
693+
@Specialization(replaces = "resolveTupleCached")
694+
MemoryPointer resolveTupleGeneric(PMemoryView self, PTuple indices,
605695
@Cached SequenceNodes.GetSequenceStorageNode getSequenceStorageNode,
606696
@Cached SequenceStorageNodes.LenNode lenNode,
607-
@Cached SequenceStorageNodes.GetItemScalarNode getItemNode,
608-
@Shared("indexLib") @CachedLibrary(limit = "2") PythonObjectLibrary lib) {
697+
@Cached SequenceStorageNodes.GetItemScalarNode getItemNode) {
609698
SequenceStorage indicesStorage = getSequenceStorageNode.execute(indices);
610699
int ndim = self.getDimensions();
611700
checkTupleLength(lenNode, indicesStorage, ndim);
612701
MemoryPointer ptr = new MemoryPointer(self.getBufferPointer(), self.getOffset());
613702
for (int dim = 0; dim < ndim; dim++) {
614703
Object indexObj = getItemNode.execute(indicesStorage, dim);
615-
int index = convertIndex(lib, indexObj);
704+
int index = convertIndex(indexObj);
616705
lookupDimension(self, ptr, dim, index);
617706
}
618707
return ptr;
619708
}
620709

621710
@Specialization(guards = "!isPTuple(indexObj)")
622-
MemoryPointer resolveInt(PMemoryView self, Object indexObj,
623-
@Shared("indexLib") @CachedLibrary(limit = "2") PythonObjectLibrary lib) {
624-
return resolveInt(self, convertIndex(lib, indexObj));
711+
MemoryPointer resolveInt(PMemoryView self, Object indexObj) {
712+
return resolveInt(self, convertIndex(indexObj));
625713
}
626714

627715
private void checkTupleLength(SequenceStorageNodes.LenNode lenNode, SequenceStorage indicesStorage, int ndim) {
628716
int length = lenNode.execute(indicesStorage);
629-
if (ndim == 0 && length != 0) {
717+
if (length == ndim) {
718+
return;
719+
}
720+
// Error cases
721+
if (ndim == 0) {
630722
throw raise(TypeError, ErrorMessages.INVALID_INDEXING_OF_0_DIM_MEMORY);
631723
} else if (length > ndim) {
632724
throw raise(TypeError, ErrorMessages.CANNOT_INDEX_D_DIMENSION_VIEW_WITH_D, ndim, length);
633-
} else if (length < ndim) {
725+
} else {
634726
// CPython doesn't implement this either, as of 3.8
635727
throw raise(NotImplementedError, ErrorMessages.SUB_VIEWS_NOT_IMPLEMENTED);
636728
}
637729
}
638730

639-
private int convertIndex(PythonObjectLibrary lib, Object indexObj) {
640-
if (!lib.canBeIndex(indexObj)) {
731+
private int convertIndex(Object indexObj) {
732+
if (!getIndexLib().canBeIndex(indexObj)) {
641733
throw raise(TypeError, ErrorMessages.MEMORYVIEW_INVALID_SLICE_KEY);
642734
}
643-
return lib.asSize(indexObj, IndexError);
735+
return getIndexLib().asSize(indexObj, IndexError);
736+
}
737+
738+
private PythonObjectLibrary getIndexLib() {
739+
if (indexLib == null) {
740+
CompilerDirectives.transferToInterpreterAndInvalidate();
741+
indexLib = insert(PythonObjectLibrary.getFactory().createDispatched(3));
742+
}
743+
return indexLib;
744+
}
745+
746+
private ConditionProfile getHasSuboffsetsProfile() {
747+
if (hasSuboffsetsProfile == null) {
748+
CompilerDirectives.transferToInterpreterAndInvalidate();
749+
hasSuboffsetsProfile = ConditionProfile.create();
750+
}
751+
return hasSuboffsetsProfile;
644752
}
645753

646754
private PException raise(PythonBuiltinClassType type, String message, Object... args) {

0 commit comments

Comments
 (0)