@@ -1683,8 +1683,8 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n
16831683 // use by range check elimination.
16841684 Node *pre_opaq = new Opaque1Node (C, pre_limit, limit);
16851685
1686- register_new_node (pre_limit, pre_head->in (0 ));
1687- register_new_node (pre_opaq , pre_head->in (0 ));
1686+ register_new_node (pre_limit, pre_head->in (LoopNode::EntryControl ));
1687+ register_new_node (pre_opaq , pre_head->in (LoopNode::EntryControl ));
16881688
16891689 // Since no other users of pre-loop compare, I can hack limit directly
16901690 assert (cmp_end->outcnt () == 1 , " no other users" );
@@ -2790,6 +2790,7 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
27902790 // Find the main loop limit; we will trim it's iterations
27912791 // to not ever trip end tests
27922792 Node *main_limit = cl->limit ();
2793+ Node* main_limit_ctrl = get_ctrl (main_limit);
27932794
27942795 // Check graph shape. Cannot optimize a loop if zero-trip
27952796 // Opaque1 node is optimized away and then another round
@@ -2822,9 +2823,15 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
28222823 }
28232824 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
28242825 Node *pre_limit = pre_opaq->in (1 );
2826+ Node* pre_limit_ctrl = get_ctrl (pre_limit);
28252827
28262828 // Where do we put new limit calculations
2827- Node *pre_ctrl = pre_end->loopnode ()->in (LoopNode::EntryControl);
2829+ Node* pre_ctrl = pre_end->loopnode ()->in (LoopNode::EntryControl);
2830+ // Range check elimination optimizes out conditions whose parameters are loop invariant in the main loop. They usually
2831+ // have control above the pre loop, but there's no guarantee that they do. There's no guarantee either that the pre
2832+ // loop limit has control that's out of loop (a previous round of range check elimination could have set a limit that's
2833+ // not loop invariant).
2834+ Node* new_limit_ctrl = dominated_node (pre_ctrl, pre_limit_ctrl);
28282835
28292836 // Ensure the original loop limit is available from the
28302837 // pre-loop Opaque1 node.
@@ -2884,14 +2891,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
28842891 Node *limit = cmp->in (2 );
28852892 int scale_con= 1 ; // Assume trip counter not scaled
28862893
2887- Node *limit_c = get_ctrl (limit);
2888- if (loop->is_member (get_loop (limit_c ))) {
2894+ Node* limit_ctrl = get_ctrl (limit);
2895+ if (loop->is_member (get_loop (limit_ctrl ))) {
28892896 // Compare might have operands swapped; commute them
28902897 b_test = b_test.commute ();
28912898 rc_exp = cmp->in (2 );
28922899 limit = cmp->in (1 );
2893- limit_c = get_ctrl (limit);
2894- if (loop->is_member (get_loop (limit_c ))) {
2900+ limit_ctrl = get_ctrl (limit);
2901+ if (loop->is_member (get_loop (limit_ctrl ))) {
28952902 continue ; // Both inputs are loop varying; cannot RCE
28962903 }
28972904 }
@@ -2900,11 +2907,11 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
29002907 // 'limit' maybe pinned below the zero trip test (probably from a
29012908 // previous round of rce), in which case, it can't be used in the
29022909 // zero trip test expression which must occur before the zero test's if.
2903- if (is_dominator (ctrl, limit_c )) {
2910+ if (is_dominator (ctrl, limit_ctrl )) {
29042911 continue ; // Don't rce this check but continue looking for other candidates.
29052912 }
29062913
2907- assert (is_dominator (compute_early_ctrl (limit, limit_c ), pre_end), " node pinned on loop exit test?" );
2914+ assert (is_dominator (compute_early_ctrl (limit, limit_ctrl ), pre_end), " node pinned on loop exit test?" );
29082915
29092916 // Check for scaled induction variable plus an offset
29102917 Node *offset = nullptr ;
@@ -2913,19 +2920,26 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
29132920 continue ;
29142921 }
29152922
2916- Node *offset_c = get_ctrl (offset);
2917- if (loop->is_member (get_loop (offset_c ))) {
2923+ Node* offset_ctrl = get_ctrl (offset);
2924+ if (loop->is_member (get_loop (offset_ctrl ))) {
29182925 continue ; // Offset is not really loop invariant
29192926 }
29202927 // Here we know 'offset' is loop invariant.
29212928
29222929 // As above for the 'limit', the 'offset' maybe pinned below the
29232930 // zero trip test.
2924- if (is_dominator (ctrl, offset_c )) {
2931+ if (is_dominator (ctrl, offset_ctrl )) {
29252932 continue ; // Don't rce this check but continue looking for other candidates.
29262933 }
29272934
2928- assert (is_dominator (compute_early_ctrl (offset, offset_c), pre_end), " node pinned on loop exit test?" );
2935+ // offset and limit can have control set below the pre loop when they are not loop invariant in the pre loop.
2936+ // Update their control (and the control of inputs as needed) to be above pre_end
2937+ offset_ctrl = ensure_node_and_inputs_are_above_pre_end (pre_end, offset);
2938+ limit_ctrl = ensure_node_and_inputs_are_above_pre_end (pre_end, limit);
2939+
2940+ // offset and limit could have control below new_limit_ctrl if they are not loop invariant in the pre loop.
2941+ new_limit_ctrl = dominated_node (new_limit_ctrl, offset_ctrl, limit_ctrl);
2942+
29292943#ifdef ASSERT
29302944 if (TraceRangeLimitCheck) {
29312945 tty->print_cr (" RC bool node%s" , flip ? " flipped:" : " :" );
@@ -2945,16 +2959,16 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
29452959 jlong lscale_con = scale_con;
29462960 Node* int_offset = offset;
29472961 offset = new ConvI2LNode (offset);
2948- register_new_node (offset, pre_ctrl );
2962+ register_new_node (offset, new_limit_ctrl );
29492963 Node* int_limit = limit;
29502964 limit = new ConvI2LNode (limit);
2951- register_new_node (limit, pre_ctrl );
2965+ register_new_node (limit, new_limit_ctrl );
29522966
29532967 // Adjust pre and main loop limits to guard the correct iteration set
29542968 if (cmp->Opcode () == Op_CmpU) { // Unsigned compare is really 2 tests
29552969 if (b_test._test == BoolTest::lt) { // Range checks always use lt
29562970 // The underflow and overflow limits: 0 <= scale*I+offset < limit
2957- add_constraint (stride_con, lscale_con, offset, zero, limit, pre_ctrl , &pre_limit, &main_limit);
2971+ add_constraint (stride_con, lscale_con, offset, zero, limit, new_limit_ctrl , &pre_limit, &main_limit);
29582972 Node* init = cl->init_trip ();
29592973 Node* opaque_init = new OpaqueLoopInitNode (C, init);
29602974 register_new_node (opaque_init, loop_entry);
@@ -3013,22 +3027,22 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30133027 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
30143028 lscale_con = -lscale_con;
30153029 offset = new SubLNode (zero, offset);
3016- register_new_node (offset, pre_ctrl );
3030+ register_new_node (offset, new_limit_ctrl );
30173031 limit = new SubLNode (zero, limit);
3018- register_new_node (limit, pre_ctrl );
3032+ register_new_node (limit, new_limit_ctrl );
30193033 // Fall into LE case
30203034 case BoolTest::le:
30213035 if (b_test._test != BoolTest::gt) {
30223036 // Convert X <= Y to X < Y+1
30233037 limit = new AddLNode (limit, one);
3024- register_new_node (limit, pre_ctrl );
3038+ register_new_node (limit, new_limit_ctrl );
30253039 }
30263040 // Fall into LT case
30273041 case BoolTest::lt:
30283042 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
30293043 // Note: (MIN_INT+1 == -MAX_INT) is used instead of MIN_INT here
30303044 // to avoid problem with scale == -1: MIN_INT/(-1) == MIN_INT.
3031- add_constraint (stride_con, lscale_con, offset, mini, limit, pre_ctrl , &pre_limit, &main_limit);
3045+ add_constraint (stride_con, lscale_con, offset, mini, limit, new_limit_ctrl , &pre_limit, &main_limit);
30323046 break ;
30333047 default :
30343048 if (PrintOpto) {
@@ -3072,8 +3086,14 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30723086 // Computed pre-loop limit can be outside of loop iterations range.
30733087 pre_limit = (stride_con > 0 ) ? (Node*)new MinINode (pre_limit, orig_limit)
30743088 : (Node*)new MaxINode (pre_limit, orig_limit);
3075- register_new_node (pre_limit, pre_ctrl );
3089+ register_new_node (pre_limit, new_limit_ctrl );
30763090 }
3091+ // new pre_limit can push Bool/Cmp/Opaque nodes down (when one of the eliminated condition has parameters that are not
3092+ // loop invariant in the pre loop.
3093+ set_ctrl (pre_opaq, new_limit_ctrl);
3094+ set_ctrl (pre_end->cmp_node (), new_limit_ctrl);
3095+ set_ctrl (pre_end->in (1 ), new_limit_ctrl);
3096+
30773097 _igvn.replace_input_of (pre_opaq, 1 , pre_limit);
30783098
30793099 // Note:: we are making the main loop limit no longer precise;
@@ -3093,15 +3113,15 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
30933113 register_new_node (main_cmp, main_cle->in (0 ));
30943114 _igvn.replace_input_of (main_bol, 1 , main_cmp);
30953115 }
3096- assert (main_limit == cl->limit () || get_ctrl (main_limit) == pre_ctrl , " wrong control for added limit" );
3116+ assert (main_limit == cl->limit () || get_ctrl (main_limit) == new_limit_ctrl , " wrong control for added limit" );
30973117 const TypeInt* orig_limit_t = _igvn.type (orig_limit)->is_int ();
30983118 bool upward = cl->stride_con () > 0 ;
30993119 // The new loop limit is <= (for an upward loop) >= (for a downward loop) than the orig limit.
31003120 // The expression that computes the new limit may be too complicated and the computed type of the new limit
31013121 // may be too pessimistic. A CastII here guarantees it's not lost.
31023122 main_limit = new CastIINode (pre_ctrl, main_limit, TypeInt::make (upward ? min_jint : orig_limit_t ->_lo ,
31033123 upward ? orig_limit_t ->_hi : max_jint, Type::WidenMax));
3104- register_new_node (main_limit, pre_ctrl );
3124+ register_new_node (main_limit, new_limit_ctrl );
31053125 // Hack the now-private loop bounds
31063126 _igvn.replace_input_of (main_cmp, 2 , main_limit);
31073127 if (abs_stride_is_one) {
@@ -3112,6 +3132,37 @@ void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) {
31123132 // The OpaqueNode is unshared by design
31133133 assert (opqzm->outcnt () == 1 , " cannot hack shared node" );
31143134 _igvn.replace_input_of (opqzm, 1 , main_limit);
3135+ // new main_limit can push Bool/Cmp nodes down (when one of the eliminated condition has parameters that are not loop
3136+ // invariant in the pre loop.
3137+ set_ctrl (opqzm, new_limit_ctrl);
3138+ set_ctrl (iffm->in (1 )->in (1 ), new_limit_ctrl);
3139+ set_ctrl (iffm->in (1 ), new_limit_ctrl);
3140+ }
3141+
3142+ // Adjust control for node and its inputs (and inputs of its inputs) to be above the pre end
3143+ Node* PhaseIdealLoop::ensure_node_and_inputs_are_above_pre_end (CountedLoopEndNode* pre_end, Node* node) {
3144+ Node* control = get_ctrl (node);
3145+ assert (is_dominator (compute_early_ctrl (node, control), pre_end), " node pinned on loop exit test?" );
3146+
3147+ if (is_dominator (control, pre_end)) {
3148+ return control;
3149+ }
3150+ control = pre_end->in (0 );
3151+ ResourceMark rm;
3152+ Unique_Node_List wq;
3153+ wq.push (node);
3154+ for (uint i = 0 ; i < wq.size (); i++) {
3155+ Node* n = wq.at (i);
3156+ assert (is_dominator (compute_early_ctrl (n, get_ctrl (n)), pre_end), " node pinned on loop exit test?" );
3157+ set_ctrl (n, control);
3158+ for (uint j = 0 ; j < n->req (); j++) {
3159+ Node* in = n->in (j);
3160+ if (in != nullptr && has_ctrl (in) && !is_dominator (get_ctrl (in), pre_end)) {
3161+ wq.push (in);
3162+ }
3163+ }
3164+ }
3165+ return control;
31153166}
31163167
31173168bool IdealLoopTree::compute_has_range_checks () const {
0 commit comments