@@ -2987,52 +2987,101 @@ RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
29872987 return reg;
29882988}
29892989
2990- // ------------------------------ insert_cmpi_loop_exit -------------------------------------
2991- // Clone a signed compare loop exit from an unsigned compare and
2992- // insert it before the unsigned cmp on the stay-in-loop path.
2993- // All new nodes inserted in the dominator tree between the original
2994- // if and it's projections. The original if test is replaced with
2995- // a constant to force the stay-in-loop path.
2990+ // Idea
2991+ // ----
2992+ // Partial Peeling tries to rotate the loop in such a way that it can later be turned into a counted loop. Counted loops
2993+ // require a signed loop exit test. When calling this method, we've only found a suitable unsigned test to partial peel
2994+ // with. Therefore, we try to split off a signed loop exit test from the unsigned test such that it can be used as new
2995+ // loop exit while keeping the unsigned test unchanged and preserving the same behavior as if we've used the unsigned
2996+ // test alone instead:
29962997//
2997- // This is done to make sure that the original if and it's projections
2998- // still dominate the same set of control nodes, that the ctrl() relation
2999- // from data nodes to them is preserved, and that their loop nesting is
3000- // preserved.
2998+ // Before Partial Peeling:
2999+ // Loop:
3000+ // <peeled section>
3001+ // Split off signed loop exit test
3002+ // <-- CUT HERE -->
3003+ // Unchanged unsigned loop exit test
3004+ // <rest of unpeeled section>
3005+ // goto Loop
30013006//
3002- // before
3003- // if(i <u limit) unsigned compare loop exit
3007+ // After Partial Peeling:
3008+ // <cloned peeled section>
3009+ // Cloned split off signed loop exit test
3010+ // Loop:
3011+ // Unchanged unsigned loop exit test
3012+ // <rest of unpeeled section>
3013+ // <peeled section>
3014+ // Split off signed loop exit test
3015+ // goto Loop
3016+ //
3017+ // Details
3018+ // -------
3019+ // Before:
3020+ // if (i <u limit) Unsigned loop exit condition
30043021// / |
30053022// v v
30063023// exit-proj stay-in-loop-proj
30073024//
3008- // after
3009- // if(stay-in-loop-const) original if
3010- // / |
3011- // / v
3012- // / if(i < limit) new signed test
3025+ // Split off a signed loop exit test (i.e. with CmpI) from an unsigned loop exit test (i.e. with CmpU) and insert it
3026+ // before the CmpU on the stay-in-loop path and keep both tests:
3027+ //
3028+ // if (i <u limit) Signed loop exit test
3029+ // / |
3030+ // / if (i <u limit) Unsigned loop exit test
30133031// / / |
3014- // / / v
3015- // / / if(i <u limit) new cloned unsigned test
3016- // / / / |
3017- // v v v |
3018- // region |
3019- // | |
3020- // dum-if |
3021- // / | |
3022- // ether | |
3023- // v v
3032+ // v v v
3033+ // exit-region stay-in-loop-proj
3034+ //
3035+ // Implementation
3036+ // --------------
3037+ // We need to make sure that the new signed loop exit test is properly inserted into the graph such that the unsigned
3038+ // loop exit test still dominates the same set of control nodes, the ctrl() relation from data nodes to both loop
3039+ // exit tests is preserved, and their loop nesting is correct.
3040+ //
3041+ // To achieve that, we clone the unsigned loop exit test completely (leave it unchanged), insert the signed loop exit
3042+ // test above it and kill the original unsigned loop exit test by setting it's condition to a constant
3043+ // (i.e. stay-in-loop-const in graph below) such that IGVN can fold it later:
3044+ //
3045+ // if (stay-in-loop-const) Killed original unsigned loop exit test
3046+ // / |
3047+ // / v
3048+ // / if (i < limit) Split off signed loop exit test
3049+ // / / |
3050+ // / / v
3051+ // / / if (i <u limit) Cloned unsigned loop exit test
3052+ // / / / |
3053+ // v v v |
3054+ // exit-region |
3055+ // | |
3056+ // dummy-if |
3057+ // / | |
3058+ // dead | |
3059+ // v v
30243060// exit-proj stay-in-loop-proj
30253061//
3026- IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree *loop) {
3062+ // Note: The dummy-if is inserted to create a region to merge the loop exits between the original to be killed unsigned
3063+ // loop exit test and its exit projection while keeping the exit projection (also see insert_region_before_proj()).
3064+ //
3065+ // Requirements
3066+ // ------------
3067+ // Note that we can only split off a signed loop exit test from the unsigned loop exit test when the behavior is exactly
3068+ // the same as before with only a single unsigned test. This is only possible if certain requirements are met.
3069+ // Otherwise, we need to bail out (see comments in the code below).
3070+ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit (IfNode* if_cmpu, IdealLoopTree* loop) {
30273071 const bool Signed = true ;
30283072 const bool Unsigned = false ;
30293073
30303074 BoolNode* bol = if_cmpu->in (1 )->as_Bool ();
3031- if (bol->_test ._test != BoolTest::lt) return nullptr ;
3075+ if (bol->_test ._test != BoolTest::lt) {
3076+ return nullptr ;
3077+ }
30323078 CmpNode* cmpu = bol->in (1 )->as_Cmp ();
3033- if (cmpu->Opcode () != Op_CmpU) return nullptr ;
3079+ assert (cmpu->Opcode () == Op_CmpU, " must be unsigned comparison" );
3080+
30343081 int stride = stride_of_possible_iv (if_cmpu);
3035- if (stride == 0 ) return nullptr ;
3082+ if (stride == 0 ) {
3083+ return nullptr ;
3084+ }
30363085
30373086 Node* lp_proj = stay_in_loop (if_cmpu, loop);
30383087 guarantee (lp_proj != nullptr , " null loop node" );
@@ -3044,22 +3093,101 @@ IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *lo
30443093 // We therefore can't add a single exit condition.
30453094 return nullptr ;
30463095 }
3047- // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
3048- // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
3049- Node* limit = nullptr ;
3096+ // The unsigned loop exit condition is
3097+ // !(i <u limit)
3098+ // = i >=u limit
3099+ //
3100+ // First, we note that for any x for which
3101+ // 0 <= x <= INT_MAX
3102+ // we can convert x to an unsigned int and still get the same guarantee:
3103+ // 0 <= (uint) x <= INT_MAX = (uint) INT_MAX
3104+ // 0 <=u (uint) x <=u INT_MAX = (uint) INT_MAX (LEMMA)
3105+ //
3106+ // With that in mind, if
3107+ // limit >= 0 (COND)
3108+ // then the unsigned loop exit condition
3109+ // i >=u limit (ULE)
3110+ // is equivalent to
3111+ // i < 0 || i >= limit (SLE-full)
3112+ // because either i is negative and therefore always greater than MAX_INT when converting to unsigned
3113+ // (uint) i >=u MAX_INT >= limit >= 0
3114+ // or otherwise
3115+ // i >= limit >= 0
3116+ // holds due to (LEMMA).
3117+ //
3118+ // For completeness, a counterexample with limit < 0:
3119+ // Assume i = -3 and limit = -2:
3120+ // i < 0
3121+ // -2 < 0
3122+ // is true and thus also "i < 0 || i >= limit". But
3123+ // i >=u limit
3124+ // -3 >=u -2
3125+ // is false.
3126+ Node* limit = cmpu->in (2 );
3127+ const TypeInt* type_limit = _igvn.type (limit)->is_int ();
3128+ if (type_limit->_lo < 0 ) {
3129+ return nullptr ;
3130+ }
3131+
3132+ // We prove below that we can extract a single signed loop exit condition from (SLE-full), depending on the stride:
3133+ // stride < 0:
3134+ // i < 0 (SLE = SLE-negative)
3135+ // stride > 0:
3136+ // i >= limit (SLE = SLE-positive)
3137+ // such that we have the following graph before Partial Peeling with stride > 0 (similar for stride < 0):
3138+ //
3139+ // Loop:
3140+ // <peeled section>
3141+ // i >= limit (SLE-positive)
3142+ // <-- CUT HERE -->
3143+ // i >=u limit (ULE)
3144+ // <rest of unpeeled section>
3145+ // goto Loop
3146+ //
3147+ // We exit the loop if:
3148+ // (SLE) is true OR (ULE) is true
3149+ // However, if (SLE) is true then (ULE) also needs to be true to ensure the exact same behavior. Otherwise, we wrongly
3150+ // exit a loop that should not have been exited if we did not apply Partial Peeling. More formally, we need to ensure:
3151+ // (SLE) IMPLIES (ULE)
3152+ // This indeed holds when (COND) is given:
3153+ // - stride > 0:
3154+ // i >= limit // (SLE = SLE-positive)
3155+ // i >= limit >= 0 // (COND)
3156+ // i >=u limit >= 0 // (LEMMA)
3157+ // which is the unsigned loop exit condition (ULE).
3158+ // - stride < 0:
3159+ // i < 0 // (SLE = SLE-negative)
3160+ // (uint) i >u MAX_INT // (NEG) all negative values are greater than MAX_INT when converted to unsigned
3161+ // MAX_INT >= limit >= 0 // (COND)
3162+ // MAX_INT >=u limit >= 0 // (LEMMA)
3163+ // and thus from (NEG) and (LEMMA):
3164+ // i >=u limit
3165+ // which is the unsigned loop exit condition (ULE).
3166+ //
3167+ //
3168+ // After Partial Peeling, we have the following structure for stride > 0 (similar for stride < 0):
3169+ // <cloned peeled section>
3170+ // i >= limit (SLE-positive)
3171+ // Loop:
3172+ // i >=u limit (ULE)
3173+ // <rest of unpeeled section>
3174+ // <peeled section>
3175+ // i >= limit (SLE-positive)
3176+ // goto Loop
3177+ Node* rhs_cmpi;
30503178 if (stride > 0 ) {
3051- limit = cmpu-> in ( 2 );
3179+ rhs_cmpi = limit; // For i >= limit
30523180 } else {
3053- limit = _igvn.makecon (TypeInt::ZERO);
3054- set_ctrl (limit , C->root ());
3181+ rhs_cmpi = _igvn.makecon (TypeInt::ZERO); // For i < 0
3182+ set_ctrl (rhs_cmpi , C->root ());
30553183 }
30563184 // Create a new region on the exit path
30573185 RegionNode* reg = insert_region_before_proj (lp_exit);
30583186 guarantee (reg != nullptr , " null region node" );
30593187
30603188 // Clone the if-cmpu-true-false using a signed compare
30613189 BoolTest::mask rel_i = stride > 0 ? bol->_test ._test : BoolTest::ge;
3062- ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, limit , lp_continue);
3190+ ProjNode* cmpi_exit = insert_if_before_proj (cmpu->in (1 ), Signed, rel_i, rhs_cmpi , lp_continue);
30633191 reg->add_req (cmpi_exit);
30643192
30653193 // Clone the if-cmpu-true-false
0 commit comments