diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 2ee00416b7dc3..815551a9b7d53 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -1110,11 +1110,13 @@ ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n) ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos) { + IR_ASSERT(ctx->ir_base[region].op == IR_START); return ir_emit(ctx, IR_OPT(IR_PARAM, type), region, ir_str(ctx, name), pos); } ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name) { + IR_ASSERT(IR_IS_BB_START(ctx->ir_base[region].op)); return ir_emit(ctx, IR_OPT(IR_VAR, type), region, ir_str(ctx, name), IR_UNUSED); } @@ -1963,7 +1965,7 @@ ir_ref _ir_VAR(ir_ctx *ctx, ir_type type, const char* name) ir_ref ref = ctx->control; while (1) { - IR_ASSERT(ctx->control); + IR_ASSERT(ref); if (IR_IS_BB_START(ctx->ir_base[ref].op)) { break; } diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index d9d0041c01bce..27595ad31248d 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -3731,6 +3731,10 @@ static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) int32_t offset; ir_mem mem; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(var_insn->op == IR_VAR); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); @@ -4128,6 +4132,10 @@ static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } | mov Rx(def_reg), sp if (IR_REG_SPILLED(ctx->regs[def][0])) { diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 0d816ab88e229..ed1cd7e39be78 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -890,9 +890,11 @@ int ir_schedule(ir_ctx *ctx) /* Topological sort according dependencies inside each basic block */ for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { + ir_ref start; + IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); /* Schedule BB start */ - i = bb->start; + start = i = bb->start; _xlat[i] = bb->start = insns_count; insn = &ctx->ir_base[i]; if (insn->op == IR_CASE_VAL) { @@ -904,12 +906,15 @@ int ir_schedule(ir_ctx *ctx) i = _next[i]; insn = &ctx->ir_base[i]; if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { + int count = 0; + /* Schedule PARAM, VAR, PI */ while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { _xlat[i] = insns_count; insns_count += 1; i = _next[i]; insn = &ctx->ir_base[i]; + count++; } /* Schedule PHIs */ while (insn->op == IR_PHI) { @@ -926,6 +931,52 @@ int ir_schedule(ir_ctx *ctx) } i = _next[i]; insn = &ctx->ir_base[i]; + count++; + } + /* Schedule remaining PHIs */ + if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) { + ir_use_list *use_list = &ctx->use_lists[start]; + ir_ref *p, count = use_list->count; + ir_ref phis = _prev[i]; + + for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) { + ir_ref use = *p; + if (!_xlat[use]) { + ir_insn *use_insn = &ctx->ir_base[use]; + if (use_insn->op == IR_PARAM + || use_insn->op == IR_VAR + || use_insn->op == IR_PI + || use_insn->op == IR_PHI) { + if (_prev[use] != phis) { + /* remove "use" */ + _prev[_next[use]] = _prev[use]; + _next[_prev[use]] = _next[use]; + /* insert "use" after "phis" */ + _prev[use] = phis; + _next[use] = _next[phis]; + _prev[_next[phis]] = use; + _next[phis] = use; + } + phis = use; + _xlat[use] = insns_count; + if (use_insn->op == IR_PHI) { + ir_ref *q; + /* Reuse "n" from MERGE and skip first input */ + insns_count += ir_insn_inputs_to_len(n + 1); + for (j = n, q = use_insn->ops + 2; j > 0; q++, j--) { + ir_ref input = *q; + if (input < IR_TRUE) { + consts_count += ir_count_constant(_xlat, input); + } + } + } else { + insns_count += 1; + } + } + } + } + i = _next[phis]; + insn = &ctx->ir_base[i]; } } if (bb->successors_count > 1) { diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 3705df45901e0..05577f05b31ff 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -255,7 +255,7 @@ static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn { if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) { return ctx->use_lists[ref].count == 1; - } else if (insn->op == IR_ALLOCA) { + } else if (insn->op == IR_ALLOCA || insn->op == IR_BLOCK_BEGIN) { return ctx->use_lists[ref].count == 1; } return 0; @@ -644,8 +644,13 @@ static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values next_insn = use_insn; } else if (use_insn->op != IR_NOP) { IR_ASSERT(use_insn->op1 == ref); - use_insn->op1 = prev; - ir_use_list_add(ctx, prev, use); + IR_ASSERT(use_insn->op == IR_VAR); + ir_ref region = prev; + while (!IR_IS_BB_START(ctx->ir_base[region].op)) { + region = ctx->ir_base[region].op1; + } + use_insn->op1 = region; + ir_use_list_add(ctx, region, use); p = &ctx->use_edges[use_list->refs + k]; } } @@ -1240,6 +1245,22 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue * } } +static void ir_remove_unused_vars(ir_ctx *ctx, ir_ref start, ir_ref end) +{ + ir_use_list *use_list = &ctx->use_lists[start]; + ir_ref *p, use, n = use_list->count; + + for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { + use = *p; + if (use != end) { + ir_insn *use_insn = &ctx->ir_base[use]; + IR_ASSERT(use_insn->op == IR_VAR); + IR_ASSERT(ctx->use_lists[use].count == 0); + MAKE_NOP(use_insn); + } + } +} + static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist) { if (insn->inputs_count == 2) { @@ -1289,8 +1310,12 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs]; ir_insn *next = &ctx->ir_base[next_ref]; - IR_ASSERT(ctx->use_lists[start1_ref].count == 1); - IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + if (ctx->use_lists[start1_ref].count != 1) { + ir_remove_unused_vars(ctx, start1_ref, end1_ref); + } + if (ctx->use_lists[start2_ref].count != 1) { + ir_remove_unused_vars(ctx, start2_ref, end2_ref); + } next->op1 = root->op1; ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref); @@ -1331,7 +1356,9 @@ static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) { return 0; } - IR_ASSERT(ctx->use_lists[start_ref].count == 1); + if (ctx->use_lists[start_ref].count != 1) { + ir_remove_unused_vars(ctx, start_ref, end_ref); + } if (!root_ref) { root_ref = start->op1; if (ctx->use_lists[root_ref].count != count) { @@ -1454,8 +1481,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } next = &ctx->ir_base[next_ref]; - IR_ASSERT(ctx->use_lists[start1_ref].count == 1); - IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + if (ctx->use_lists[start1_ref].count != 1) { + ir_remove_unused_vars(ctx, start1_ref, end1_ref); + } + if (ctx->use_lists[start2_ref].count != 1) { + ir_remove_unused_vars(ctx, start2_ref, end2_ref); + } insn->op = ( (is_less ? cond->op1 : cond->op2) @@ -1540,8 +1571,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } next = &ctx->ir_base[next_ref]; - IR_ASSERT(ctx->use_lists[start1_ref].count == 1); - IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + if (ctx->use_lists[start1_ref].count != 1) { + ir_remove_unused_vars(ctx, start1_ref, end1_ref); + } + if (ctx->use_lists[start2_ref].count != 1) { + ir_remove_unused_vars(ctx, start2_ref, end2_ref); + } insn->op = IR_ABS; insn->inputs_count = 1; @@ -1605,8 +1640,12 @@ static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_re } next = &ctx->ir_base[next_ref]; - IR_ASSERT(ctx->use_lists[start1_ref].count == 1); - IR_ASSERT(ctx->use_lists[start2_ref].count == 1); + if (ctx->use_lists[start1_ref].count != 1) { + ir_remove_unused_vars(ctx, start1_ref, end1_ref); + } + if (ctx->use_lists[start2_ref].count != 1) { + ir_remove_unused_vars(ctx, start2_ref, end2_ref); + } insn->op = IR_COND; insn->inputs_count = 3; @@ -2126,9 +2165,13 @@ static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ ir_ref next_ref = ctx->use_edges[use_list->refs + 1]; ir_insn *next = &ctx->ir_base[next_ref]; - IR_ASSERT(next->op != IR_PHI); - if (phi->op == IR_PHI) { + if (next->op == IR_PHI) { + SWAP_REFS(phi_ref, next_ref); + SWAP_INSNS(phi, next); + } + + if (phi->op == IR_PHI && next->op != IR_PHI) { if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) { if (next->op2 == phi_ref) { if (ir_try_split_if(ctx, next_ref, next, worklist)) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 1fa7001198c94..284e1480d3835 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1149,8 +1149,10 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co } else { flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; } - if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { - n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); + if (IR_IS_CONST_REF(insn->op2)) { + if (insn->op1 != insn->op2) { + n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); + } } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; @@ -1223,9 +1225,11 @@ op2_const: } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; } - if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { - flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; - n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); + if (IR_IS_CONST_REF(insn->op2)) { + if (insn->op1 != insn->op2) { + flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; + n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); + } } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; @@ -3360,10 +3364,23 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = insn; break; case IR_LEA_IB_O: - base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; - index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + op1_insn = &ctx->ir_base[insn->op1]; offset_insn = insn; scale = 1; + if (ir_rule(ctx, op1_insn->op2) == IR_STATIC_ALLOCA) { + offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op2].op3); + base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + base_reg_ref = IR_UNUSED; + index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; + } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { + offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); + base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + base_reg_ref = IR_UNUSED; + index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + } else { + base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; + index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; + } break; case IR_LEA_OB_SI: index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; @@ -7463,6 +7480,10 @@ static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; ir_mem mem; + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } IR_ASSERT(var_insn->op == IR_VAR); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); @@ -7909,6 +7930,10 @@ static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + if (ctx->use_lists[def].count == 1) { + /* dead load */ + return; + } | mov Ra(def_reg), Ra(IR_REG_RSP) if (IR_REG_SPILLED(ctx->regs[def][0])) {