Skip to content

Commit 2c98ca5

Browse files
committed
Make the latest schedule sorting within one block more efficient by
avoiding allocations and optimize innermost loop.
1 parent 75b5b6e commit 2c98ca5

File tree

2 files changed

+57
-43
lines changed

2 files changed

+57
-43
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/graph/NodeStack.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,20 @@ private void grow() {
5858
values = newValues;
5959
}
6060

61+
/**
62+
* Reverse the order of the top n elements of this stack.
63+
*/
64+
public void reverseTopElements(int n) {
65+
if (n > 1) {
66+
for (int i = 0; i < (n >> 1); ++i) {
67+
Node a = values[tos - 1 - i];
68+
Node b = values[tos - n + i];
69+
values[tos - 1 - i] = b;
70+
values[tos - n + i] = a;
71+
}
72+
}
73+
}
74+
6175
public Node get(int index) {
6276
return values[index];
6377
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/phases/schedule/SchedulePhase.java

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -476,12 +476,14 @@ private static void fillKillSet(LocationSet killed, List<Node> subList) {
476476

477477
private static void sortNodesLatestWithinBlock(ControlFlowGraph cfg, BlockMap<List<Node>> earliestBlockToNodesMap, BlockMap<List<Node>> latestBlockToNodesMap, NodeMap<HIRBlock> currentNodeMap,
478478
BlockMap<ArrayList<FloatingReadNode>> watchListMap, NodeBitMap visited, boolean supportsImplicitNullChecks) {
479+
NodeStack nodeStack = new NodeStack();
479480
for (HIRBlock b : cfg.getBlocks()) {
480-
sortNodesLatestWithinBlock(b, earliestBlockToNodesMap, latestBlockToNodesMap, currentNodeMap, watchListMap, visited, supportsImplicitNullChecks);
481+
sortNodesLatestWithinBlock(nodeStack, b, earliestBlockToNodesMap, latestBlockToNodesMap, currentNodeMap, watchListMap, visited, supportsImplicitNullChecks);
481482
}
482483
}
483484

484-
private static void sortNodesLatestWithinBlock(HIRBlock b, BlockMap<List<Node>> earliestBlockToNodesMap, BlockMap<List<Node>> latestBlockToNodesMap, NodeMap<HIRBlock> nodeMap,
485+
private static void sortNodesLatestWithinBlock(NodeStack nodeStack, HIRBlock b, BlockMap<List<Node>> earliestBlockToNodesMap, BlockMap<List<Node>> latestBlockToNodesMap,
486+
NodeMap<HIRBlock> nodeMap,
485487
BlockMap<ArrayList<FloatingReadNode>> watchListMap, NodeBitMap unprocessed, boolean supportsImplicitNullChecks) {
486488
List<Node> earliestSorting = earliestBlockToNodesMap.get(b);
487489
ArrayList<Node> result = new ArrayList<>(earliestSorting.size());
@@ -499,7 +501,7 @@ private static void sortNodesLatestWithinBlock(HIRBlock b, BlockMap<List<Node>>
499501
// if multiple proxies reference the same value, schedule the value of a
500502
// proxy once
501503
if (value != null && nodeMap.get(value) == b && unprocessed.isMarked(value)) {
502-
sortIntoList(value, b, result, nodeMap, unprocessed, null);
504+
sortIntoList(nodeStack, value, b, result, nodeMap, unprocessed);
503505
}
504506
}
505507
}
@@ -509,18 +511,19 @@ private static void sortNodesLatestWithinBlock(HIRBlock b, BlockMap<List<Node>>
509511
// Only if the end node is either a control split or an end node, we need to force
510512
// it to be the last node in the schedule.
511513
fixedEndNode = endNode;
514+
unprocessed.clear(fixedEndNode);
512515
}
513516
for (Node n : earliestSorting) {
514517
if (n != fixedEndNode) {
515518
if (n instanceof FixedNode) {
516519
assert nodeMap.get(n) == b : Assertions.errorMessageContext("n", n, "b", b);
517-
checkWatchList(b, nodeMap, unprocessed, result, watchList, n);
518-
sortIntoList(n, b, result, nodeMap, unprocessed, null);
520+
checkWatchList(nodeStack, b, nodeMap, unprocessed, result, watchList, n);
521+
sortIntoList(nodeStack, n, b, result, nodeMap, unprocessed);
519522
} else if (nodeMap.get(n) == b && n instanceof FloatingReadNode) {
520523
FloatingReadNode floatingReadNode = (FloatingReadNode) n;
521524
if (isImplicitNullOpportunity(floatingReadNode, b, supportsImplicitNullChecks)) {
522525
// Schedule at the beginning of the block.
523-
sortIntoList(floatingReadNode, b, result, nodeMap, unprocessed, null);
526+
sortIntoList(nodeStack, floatingReadNode, b, result, nodeMap, unprocessed);
524527
} else {
525528
LocationIdentity location = floatingReadNode.getLocationIdentity();
526529
if (b.canKill(location)) {
@@ -539,38 +542,41 @@ private static void sortNodesLatestWithinBlock(HIRBlock b, BlockMap<List<Node>>
539542
assert nodeMap.get(n) == b : n;
540543
assert !(n instanceof FixedNode) : n;
541544
if (unprocessed.isMarked(n)) {
542-
sortIntoList(n, b, result, nodeMap, unprocessed, fixedEndNode);
545+
sortIntoList(nodeStack, n, b, result, nodeMap, unprocessed);
543546
}
544547
}
545548

546-
if (endNode != null && unprocessed.isMarked(endNode)) {
547-
sortIntoList(endNode, b, result, nodeMap, unprocessed, null);
549+
if (fixedEndNode != null) {
550+
result.add(fixedEndNode);
551+
} else if (endNode != null && unprocessed.isMarked(endNode)) {
552+
sortIntoList(nodeStack, endNode, b, result, nodeMap, unprocessed);
548553
}
549554

550555
latestBlockToNodesMap.put(b, result);
551556
}
552557

553-
private static void checkWatchList(HIRBlock b, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed, ArrayList<Node> result, ArrayList<FloatingReadNode> watchList, Node n) {
558+
private static void checkWatchList(NodeStack nodeStack, HIRBlock b, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed, ArrayList<Node> result, ArrayList<FloatingReadNode> watchList, Node n) {
554559
if (watchList != null && !watchList.isEmpty()) {
555560
// Check if this node kills a node in the watch list.
556561
if (MemoryKill.isSingleMemoryKill(n)) {
557562
LocationIdentity identity = ((SingleMemoryKill) n).getKilledLocationIdentity();
558-
checkWatchList(watchList, identity, b, result, nodeMap, unprocessed);
563+
checkWatchList(nodeStack, watchList, identity, b, result, nodeMap, unprocessed);
559564
} else if (MemoryKill.isMultiMemoryKill(n)) {
560565
for (LocationIdentity identity : ((MultiMemoryKill) n).getKilledLocationIdentities()) {
561-
checkWatchList(watchList, identity, b, result, nodeMap, unprocessed);
566+
checkWatchList(nodeStack, watchList, identity, b, result, nodeMap, unprocessed);
562567
}
563568
}
564569
}
565570
}
566571

567-
private static void checkWatchList(ArrayList<FloatingReadNode> watchList, LocationIdentity identity, HIRBlock b, ArrayList<Node> result, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed) {
572+
private static void checkWatchList(NodeStack nodeStack, ArrayList<FloatingReadNode> watchList, LocationIdentity identity, HIRBlock b, ArrayList<Node> result, NodeMap<HIRBlock> nodeMap,
573+
NodeBitMap unprocessed) {
568574
if (identity.isImmutable()) {
569575
// Nothing to do. This can happen for an initialization write.
570576
} else if (identity.isAny()) {
571577
for (FloatingReadNode r : watchList) {
572578
if (unprocessed.isMarked(r)) {
573-
sortIntoList(r, b, result, nodeMap, unprocessed, null);
579+
sortIntoList(nodeStack, r, b, result, nodeMap, unprocessed);
574580
}
575581
}
576582
watchList.clear();
@@ -582,7 +588,7 @@ private static void checkWatchList(ArrayList<FloatingReadNode> watchList, Locati
582588
assert locationIdentity.isMutable();
583589
if (unprocessed.isMarked(r)) {
584590
if (identity.overlaps(locationIdentity)) {
585-
sortIntoList(r, b, result, nodeMap, unprocessed, null);
591+
sortIntoList(nodeStack, r, b, result, nodeMap, unprocessed);
586592
} else {
587593
++index;
588594
continue;
@@ -595,60 +601,54 @@ private static void checkWatchList(ArrayList<FloatingReadNode> watchList, Locati
595601
}
596602
}
597603

598-
private static void sortIntoList(Node n, HIRBlock b, ArrayList<Node> result, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed, Node excludeNode) {
604+
private static void sortIntoList(NodeStack stack, Node n, HIRBlock b, ArrayList<Node> result, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed) {
599605
assert unprocessed.isMarked(n) : Assertions.errorMessage(n);
600606
assert nodeMap.get(n) == b : Assertions.errorMessage(n);
601-
602-
if (n instanceof PhiNode) {
603-
return;
604-
}
607+
assert stack.isEmpty() : "Node stack must be pre-allocated, but empty.";
608+
assert !(n instanceof PhiNode);
609+
assert !(n instanceof ProxyNode);
605610

606611
unprocessed.clear(n);
607612

608613
/*
609614
* Schedule all unprocessed transitive inputs. This uses an explicit stack instead of
610615
* recursion to avoid overflowing the call stack.
611616
*/
612-
NodeStack stack = new NodeStack();
613-
ArrayList<Node> tempList = new ArrayList<>();
614-
stack.push(n);
617+
pushUnprocessedInputs(n, b, nodeMap, unprocessed, stack);
615618
while (!stack.isEmpty()) {
616619
Node top = stack.peek();
617-
pushUnprocessedInputs(top, b, nodeMap, unprocessed, excludeNode, stack, tempList);
618-
if (stack.peek() == top) {
619-
if (top != n) {
620-
if (unprocessed.isMarked(top) && !(top instanceof ProxyNode)) {
621-
result.add(top);
622-
}
620+
int added = pushUnprocessedInputs(top, b, nodeMap, unprocessed, stack);
621+
if (added == 0) {
622+
if (unprocessed.isMarked(top)) {
623+
result.add(top);
623624
unprocessed.clear(top);
624625
}
625626
stack.pop();
626627
}
627628
}
629+
result.add(n);
630+
}
628631

629-
if (n instanceof ProxyNode) {
630-
// Skip proxy nodes.
631-
} else {
632-
result.add(n);
632+
private static int pushUnprocessedInputs(Node n, HIRBlock b, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed, NodeStack stack) {
633+
int pushCount = 0;
634+
for (Node input : n.inputs()) {
635+
if (nodeMap.get(input) == b && unprocessed.isMarked(input)) {
636+
assert !(input instanceof PhiNode);
637+
assert !(input instanceof ProxyNode);
638+
stack.push(input);
639+
pushCount++;
640+
}
633641
}
634-
}
635642

636-
private static void pushUnprocessedInputs(Node n, HIRBlock b, NodeMap<HIRBlock> nodeMap, NodeBitMap unprocessed, Node excludeNode, NodeStack stack, ArrayList<Node> tempList) {
637-
tempList.clear();
638-
n.inputs().snapshotTo(tempList);
639643
/*
640644
* Nodes on top of the stack are scheduled first. Pushing inputs left to right would
641645
* therefore mean scheduling them right to left. We observe the best performance when
642646
* scheduling inputs left to right, therefore we push them in reverse order. We could
643647
* explore more elaborate scheduling policies, like scheduling for reduced register
644648
* pressure using Sethi-Ullman numbering (GR-34624).
645649
*/
646-
for (int i = tempList.size() - 1; i >= 0; i--) {
647-
Node input = tempList.get(i);
648-
if (nodeMap.get(input) == b && unprocessed.isMarked(input) && input != excludeNode && !(input instanceof PhiNode)) {
649-
stack.push(input);
650-
}
651-
}
650+
stack.reverseTopElements(pushCount);
651+
return pushCount;
652652
}
653653

654654
protected void calcLatestBlock(HIRBlock earliestBlock, SchedulingStrategy strategy, Node currentNode, NodeMap<HIRBlock> currentNodeMap, LocationIdentity constrainingLocation,

0 commit comments

Comments
 (0)