8339733: C2: some nodes can have incorrect control after do_range_check()

rwestrel · rwestrel · commit 315abdf8c835 · 2024-09-12T07:19:54.000Z
Reviewed-by: chagedorn, thartmann
diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp
@@ -1683,8 +1683,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
   // use by range check elimination.
   Node *pre_opaq  = new Opaque1Node(C, pre_limit, limit);
 
-  register_new_node(pre_limit, pre_head->in(0));
-  register_new_node(pre_opaq , pre_head->in(0));
+  register_new_node(pre_limit, pre_head->in(LoopNode::EntryControl));
+  register_new_node(pre_opaq , pre_head->in(LoopNode::EntryControl));
 
   // Since no other users of pre-loop compare, I can hack limit directly
   assert(cmp_end->outcnt() == 1, "no other users");
@@ -2790,6 +2790,7 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
   // Find the main loop limit; we will trim it's iterations
   // to not ever trip end tests
   Node *main_limit = cl->limit();
+  Node* main_limit_ctrl = get_ctrl(main_limit);
 
   // Check graph shape. Cannot optimize a loop if zero-trip
   // Opaque1 node is optimized away and then another round
@@ -2822,9 +2823,15 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
   }
   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
   Node *pre_limit = pre_opaq->in(1);
+  Node* pre_limit_ctrl = get_ctrl(pre_limit);
 
   // Where do we put new limit calculations
-  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+  Node* pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+  // Range check elimination optimizes out conditions whose parameters are loop invariant in the main loop. They usually
+  // have control above the pre loop, but there's no guarantee that they do. There's no guarantee either that the pre
+  // loop limit has control that's out of loop (a previous round of range check elimination could have set a limit that's
+  // not loop invariant).
+  Node* new_limit_ctrl = dominated_node(pre_ctrl, pre_limit_ctrl);
 
   // Ensure the original loop limit is available from the
   // pre-loop Opaque1 node.
@@ -2884,14 +2891,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
       Node *limit  = cmp->in(2);
       int scale_con= 1;        // Assume trip counter not scaled
 
-      Node *limit_c = get_ctrl(limit);
-      if (loop->is_member(get_loop(limit_c))) {
+      Node* limit_ctrl = get_ctrl(limit);
+      if (loop->is_member(get_loop(limit_ctrl))) {
         // Compare might have operands swapped; commute them
         b_test = b_test.commute();
         rc_exp = cmp->in(2);
         limit  = cmp->in(1);
-        limit_c = get_ctrl(limit);
-        if (loop->is_member(get_loop(limit_c))) {
+        limit_ctrl = get_ctrl(limit);
+        if (loop->is_member(get_loop(limit_ctrl))) {
           continue;             // Both inputs are loop varying; cannot RCE
         }
       }
@@ -2900,11 +2907,11 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
       // 'limit' maybe pinned below the zero trip test (probably from a
       // previous round of rce), in which case, it can't be used in the
       // zero trip test expression which must occur before the zero test's if.
-      if (is_dominator(ctrl, limit_c)) {
+      if (is_dominator(ctrl, limit_ctrl)) {
         continue;  // Don't rce this check but continue looking for other candidates.
       }
 
-      assert(is_dominator(compute_early_ctrl(limit, limit_c), pre_end), "node pinned on loop exit test?");
+      assert(is_dominator(compute_early_ctrl(limit, limit_ctrl), pre_end), "node pinned on loop exit test?");
 
       // Check for scaled induction variable plus an offset
       Node *offset = nullptr;
@@ -2913,19 +2920,26 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
         continue;
       }
 
-      Node *offset_c = get_ctrl(offset);
-      if (loop->is_member(get_loop(offset_c))) {
+      Node* offset_ctrl = get_ctrl(offset);
+      if (loop->is_member(get_loop(offset_ctrl))) {
         continue;               // Offset is not really loop invariant
       }
       // Here we know 'offset' is loop invariant.
 
       // As above for the 'limit', the 'offset' maybe pinned below the
       // zero trip test.
-      if (is_dominator(ctrl, offset_c)) {
+      if (is_dominator(ctrl, offset_ctrl)) {
         continue; // Don't rce this check but continue looking for other candidates.
       }
 
-      assert(is_dominator(compute_early_ctrl(offset, offset_c), pre_end), "node pinned on loop exit test?");
+      // offset and limit can have control set below the pre loop when they are not loop invariant in the pre loop.
+      // Update their control (and the control of inputs as needed) to be above pre_end
+      offset_ctrl = ensure_node_and_inputs_are_above_pre_end(pre_end, offset);
+      limit_ctrl = ensure_node_and_inputs_are_above_pre_end(pre_end, limit);
+
+      // offset and limit could have control below new_limit_ctrl if they are not loop invariant in the pre loop.
+      new_limit_ctrl = dominated_node(new_limit_ctrl, offset_ctrl, limit_ctrl);
+
 #ifdef ASSERT
       if (TraceRangeLimitCheck) {
         tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
@@ -2945,16 +2959,16 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
       jlong lscale_con = scale_con;
       Node* int_offset = offset;
       offset = new ConvI2LNode(offset);
-      register_new_node(offset, pre_ctrl);
+      register_new_node(offset, new_limit_ctrl);
       Node* int_limit = limit;
       limit = new ConvI2LNode(limit);
-      register_new_node(limit, pre_ctrl);
+      register_new_node(limit, new_limit_ctrl);
 
       // Adjust pre and main loop limits to guard the correct iteration set
       if (cmp->Opcode() == Op_CmpU) { // Unsigned compare is really 2 tests
         if (b_test._test == BoolTest::lt) { // Range checks always use lt
           // The underflow and overflow limits: 0 <= scale*I+offset < limit
-          add_constraint(stride_con, lscale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit);
+          add_constraint(stride_con, lscale_con, offset, zero, limit, new_limit_ctrl, &pre_limit, &main_limit);
           Node* init = cl->init_trip();
           Node* opaque_init = new OpaqueLoopInitNode(C, init);
           register_new_node(opaque_init, loop_entry);
@@ -3013,22 +3027,22 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
           // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
           lscale_con = -lscale_con;
           offset = new SubLNode(zero, offset);
-          register_new_node(offset, pre_ctrl);
+          register_new_node(offset, new_limit_ctrl);
           limit  = new SubLNode(zero, limit);
-          register_new_node(limit, pre_ctrl);
+          register_new_node(limit, new_limit_ctrl);
           // Fall into LE case
         case BoolTest::le:
           if (b_test._test != BoolTest::gt) {
             // Convert X <= Y to X < Y+1
             limit = new AddLNode(limit, one);
-            register_new_node(limit, pre_ctrl);
+            register_new_node(limit, new_limit_ctrl);
           }
           // Fall into LT case
         case BoolTest::lt:
           // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
           // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
           // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
-          add_constraint(stride_con, lscale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit);
+          add_constraint(stride_con, lscale_con, offset, mini, limit, new_limit_ctrl, &pre_limit, &main_limit);
           break;
         default:
           if (PrintOpto) {
@@ -3072,8 +3086,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
     // Computed pre-loop limit can be outside of loop iterations range.
     pre_limit = (stride_con > 0) ? (Node*)new MinINode(pre_limit, orig_limit)
                                  : (Node*)new MaxINode(pre_limit, orig_limit);
-    register_new_node(pre_limit, pre_ctrl);
+    register_new_node(pre_limit, new_limit_ctrl);
   }
+  // new pre_limit can push Bool/Cmp/Opaque nodes down (when one of the eliminated condition has parameters that are not
+  // loop invariant in the pre loop.
+  set_ctrl(pre_opaq, new_limit_ctrl);
+  set_ctrl(pre_end->cmp_node(), new_limit_ctrl);
+  set_ctrl(pre_end->in(1), new_limit_ctrl);
+
   _igvn.replace_input_of(pre_opaq, 1, pre_limit);
 
   // Note:: we are making the main loop limit no longer precise;
@@ -3093,15 +3113,15 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
     register_new_node(main_cmp, main_cle->in(0));
     _igvn.replace_input_of(main_bol, 1, main_cmp);
   }
-  assert(main_limit == cl->limit() || get_ctrl(main_limit) == pre_ctrl, "wrong control for added limit");
+  assert(main_limit == cl->limit() || get_ctrl(main_limit) == new_limit_ctrl, "wrong control for added limit");
   const TypeInt* orig_limit_t = _igvn.type(orig_limit)->is_int();
   bool upward = cl->stride_con() > 0;
   // The new loop limit is <= (for an upward loop) >= (for a downward loop) than the orig limit.
   // The expression that computes the new limit may be too complicated and the computed type of the new limit
   // may be too pessimistic. A CastII here guarantees it's not lost.
   main_limit = new CastIINode(pre_ctrl, main_limit, TypeInt::make(upward ? min_jint : orig_limit_t->_lo,
                                                         upward ? orig_limit_t->_hi : max_jint, Type::WidenMax));
-  register_new_node(main_limit, pre_ctrl);
+  register_new_node(main_limit, new_limit_ctrl);
   // Hack the now-private loop bounds
   _igvn.replace_input_of(main_cmp, 2, main_limit);
   if (abs_stride_is_one) {
@@ -3112,6 +3132,37 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
   // The OpaqueNode is unshared by design
   assert(opqzm->outcnt() == 1, "cannot hack shared node");
   _igvn.replace_input_of(opqzm, 1, main_limit);
+  // new main_limit can push Bool/Cmp nodes down (when one of the eliminated condition has parameters that are not loop
+  // invariant in the pre loop.
+  set_ctrl(opqzm, new_limit_ctrl);
+  set_ctrl(iffm->in(1)->in(1), new_limit_ctrl);
+  set_ctrl(iffm->in(1), new_limit_ctrl);
+}
+
+// Adjust control for node and its inputs (and inputs of its inputs) to be above the pre end
+Node* PhaseIdealLoop::ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node) {
+  Node* control = get_ctrl(node);
+  assert(is_dominator(compute_early_ctrl(node, control), pre_end), "node pinned on loop exit test?");
+
+  if (is_dominator(control, pre_end)) {
+    return control;
+  }
+  control = pre_end->in(0);
+  ResourceMark rm;
+  Unique_Node_List wq;
+  wq.push(node);
+  for (uint i = 0; i < wq.size(); i++) {
+    Node* n = wq.at(i);
+    assert(is_dominator(compute_early_ctrl(n, get_ctrl(n)), pre_end), "node pinned on loop exit test?");
+    set_ctrl(n, control);
+    for (uint j = 0; j < n->req(); j++) {
+      Node* in = n->in(j);
+      if (in != nullptr && has_ctrl(in) && !is_dominator(get_ctrl(in), pre_end)) {
+        wq.push(in);
+      }
+    }
+  }
+  return control;
 }
 
 bool IdealLoopTree::compute_has_range_checks() const {
diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp
@@ -1180,6 +1180,16 @@ class PhaseIdealLoop : public PhaseTransform {
   }
   Node *dom_lca_internal( Node *n1, Node *n2 ) const;
 
+  Node* dominated_node(Node* c1, Node* c2) {
+    assert(is_dominator(c1, c2) || is_dominator(c2, c1), "nodes must be related");
+    return is_dominator(c1, c2) ? c2 : c1;
+  }
+
+  // Return control node that's dominated by the 2 others
+  Node* dominated_node(Node* c1, Node* c2, Node* c3) {
+    return dominated_node(c1, dominated_node(c2, c3));
+  }
+
   // Build and verify the loop tree without modifying the graph.  This
   // is useful to verify that all inputs properly dominate their uses.
   static void verify(PhaseIterGVN& igvn) {
@@ -1780,6 +1790,8 @@ class PhaseIdealLoop : public PhaseTransform {
   bool can_move_to_inner_loop(Node* n, LoopNode* n_loop, Node* x);
 
   void pin_array_access_nodes_dependent_on(Node* ctrl);
+
+  Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node);
 };