diff --git a/NEWS b/NEWS index ba5c303224c99..0b66ccd0a91b3 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,9 @@ PHP NEWS . Casting floats that are not representable as ints now emits a warning. (Girgias) +- Bz2: + . Fixed bug GH-19810 (Broken bzopen() stream mode validation). (ilutov) + - Curl: . Fix cloning of CURLOPT_POSTFIELDS when using the clone operator instead of the curl_copy_handle() function to clone a CurlHandle. (timwolla) @@ -45,6 +48,7 @@ PHP NEWS - Opcache: . Fixed bug GH-19669 (assertion failure in zend_jit_trace_type_to_info_ex). (Arnaud) + . Fixed bug GH-19831 (function JIT may not deref property value). (Arnaud) - MBstring: . Updated Unicode data tables to Unicode 17.0. (Yuya Hamada) diff --git a/ext/bz2/bz2.c b/ext/bz2/bz2.c index 9ed5342a7df8f..aff9d5c11b9ed 100644 --- a/ext/bz2/bz2.c +++ b/ext/bz2/bz2.c @@ -366,10 +366,23 @@ PHP_FUNCTION(bzopen) php_stream_from_zval(stream, file); stream_mode_len = strlen(stream->mode); - if (stream_mode_len != 1 && !(stream_mode_len == 2 && memchr(stream->mode, 'b', 2))) { - php_error_docref(NULL, E_WARNING, "Cannot use stream opened in mode '%s'", stream->mode); - RETURN_FALSE; - } else if (stream_mode_len == 1 && stream->mode[0] != 'r' && stream->mode[0] != 'w' && stream->mode[0] != 'a' && stream->mode[0] != 'x') { + char primary_stream_mode; + if (stream_mode_len == 1) { + primary_stream_mode = stream->mode[0]; + } else if (stream_mode_len == 2) { + char secondary_stream_mode = 0; + if (stream->mode[0] != 'b') { + primary_stream_mode = stream->mode[0]; + secondary_stream_mode = stream->mode[1]; + } else { + primary_stream_mode = stream->mode[1]; + secondary_stream_mode = stream->mode[0]; + } + if (secondary_stream_mode != 'b') { + goto unsupported_mode; + } + } else { +unsupported_mode: php_error_docref(NULL, E_WARNING, "Cannot use stream opened in mode '%s'", stream->mode); RETURN_FALSE; } @@ -377,16 +390,14 @@ PHP_FUNCTION(bzopen) switch(mode[0]) { case 'r': /* only "r" and "rb" are supported */ - if (stream->mode[0] != mode[0] && !(stream_mode_len == 2 && stream->mode[1] != mode[0])) { + if (primary_stream_mode != 'r') { php_error_docref(NULL, E_WARNING, "Cannot read from a stream opened in write only mode"); RETURN_FALSE; } break; case 'w': /* support only "w"(b), "a"(b), "x"(b) */ - if (stream->mode[0] != mode[0] && !(stream_mode_len == 2 && stream->mode[1] != mode[0]) - && stream->mode[0] != 'a' && !(stream_mode_len == 2 && stream->mode[1] != 'a') - && stream->mode[0] != 'x' && !(stream_mode_len == 2 && stream->mode[1] != 'x')) { + if (!strchr("wax", primary_stream_mode)) { php_error_docref(NULL, E_WARNING, "cannot write to a stream opened in read only mode"); RETURN_FALSE; } diff --git a/ext/bz2/tests/gh19810.phpt b/ext/bz2/tests/gh19810.phpt new file mode 100644 index 0000000000000..a230fbb6ffeff --- /dev/null +++ b/ext/bz2/tests/gh19810.phpt @@ -0,0 +1,11 @@ +--TEST-- +GH-19810: bzopen() stream mode validation +--EXTENSIONS-- +bz2 +--FILE-- + +--EXPECTF-- +Warning: bzopen(): Cannot read from a stream opened in write only mode in %s on line %d +bool(false) diff --git a/ext/opcache/jit/ir/LICENSE b/ext/opcache/jit/ir/LICENSE index c43a12a770f8f..340f9c37225f6 100644 --- a/ext/opcache/jit/ir/LICENSE +++ b/ext/opcache/jit/ir/LICENSE @@ -1,6 +1,7 @@ MIT License Copyright (c) 2022 Zend by Perforce +Copyright (c) 2025 Dmitry Stogov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 9d1b698761fbb..81621ce11bd36 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -227,6 +227,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_flag_d0 ir_op_flag_d #define ir_op_flag_d1 (ir_op_flag_d | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_d1X1 (ir_op_flag_d | 1 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_d1X2 (ir_op_flag_d | 1 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_d2 (ir_op_flag_d | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_d2C (ir_op_flag_d | IR_OP_FLAG_COMMUTATIVE | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_d3 (ir_op_flag_d | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) @@ -270,6 +271,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_flag_s3 (ir_op_flag_s | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x2 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (2 << IR_OP_FLAG_OPERANDS_SHIFT)) +#define ir_op_flag_x2X1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 2 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_x3 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | 3 | (3 << IR_OP_FLAG_OPERANDS_SHIFT)) #define ir_op_flag_xN (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_CALL | IR_OP_FLAG_VAR_INPUTS) #define ir_op_flag_a1 (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_ALLOC | 1 | (1 << IR_OP_FLAG_OPERANDS_SHIFT)) @@ -392,6 +394,8 @@ void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limi ctx->insns_limit = insns_limit; ctx->consts_count = -(IR_TRUE - 1); ctx->consts_limit = consts_limit; + ctx->const_hash = ctx->_const_hash; + ctx->const_hash_mask = IR_CONST_HASH_SIZE - 1; ctx->fold_cse_limit = IR_UNUSED + 1; ctx->flags = flags; @@ -414,6 +418,9 @@ void ir_free(ir_ctx *ctx) { ir_insn *buf = ctx->ir_base - ctx->consts_limit; ir_mem_free(buf); + if (ctx->value_params) { + ir_mem_free(ctx->value_params); + } if (ctx->strtab.data) { ir_strtab_free(&ctx->strtab); } @@ -468,6 +475,10 @@ void ir_free(ir_ctx *ctx) ir_list_free((ir_list*)ctx->osr_entry_loads); ir_mem_free(ctx->osr_entry_loads); } + + if (ctx->const_hash_mask != IR_CONST_HASH_SIZE - 1) { + ir_mem_free(ctx->const_hash); + } } ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) @@ -479,72 +490,64 @@ ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t addr) insn->val.u64 = addr; /* don't insert into constants chain */ insn->prev_const = IR_UNUSED; -#if 0 - insn->prev_const = ctx->prev_const_chain[IR_ADDR]; - ctx->prev_const_chain[IR_ADDR] = ref; -#endif -#if 0 - ir_insn *prev_insn, *next_insn; - ir_ref next; - - prev_insn = NULL; - next = ctx->prev_const_chain[IR_ADDR]; - while (next) { - next_insn = &ctx->ir_base[next]; - if (UNEXPECTED(next_insn->val.u64 >= addr)) { - break; - } - prev_insn = next_insn; - next = next_insn->prev_const; - } - - if (prev_insn) { - insn->prev_const = prev_insn->prev_const; - prev_insn->prev_const = ref; - } else { - insn->prev_const = ctx->prev_const_chain[IR_ADDR]; - ctx->prev_const_chain[IR_ADDR] = ref; - } -#endif return ref; } +IR_ALWAYS_INLINE uintptr_t ir_const_hash(ir_val val, uint32_t optx) +{ + return (val.u64 ^ (val.u64 >> 32) ^ optx); +} + +static IR_NEVER_INLINE void ir_const_hash_rehash(ir_ctx *ctx) +{ + ir_insn *insn; + ir_ref ref; + uintptr_t hash; + + if (ctx->const_hash_mask != IR_CONST_HASH_SIZE - 1) { + ir_mem_free(ctx->const_hash); + } + ctx->const_hash_mask = (ctx->const_hash_mask + 1) * 2 - 1; + ctx->const_hash = ir_mem_calloc(ctx->const_hash_mask + 1, sizeof(ir_ref)); + for (ref = IR_TRUE - 1; ref > -ctx->consts_count; ref--) { + insn = &ctx->ir_base[ref]; + hash = ir_const_hash(insn->val, insn->optx) & ctx->const_hash_mask; + insn->prev_const = ctx->const_hash[hash]; + ctx->const_hash[hash] = ref; + } +} + ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx) { - ir_insn *insn, *prev_insn; + ir_insn *insn; ir_ref ref, prev; + uintptr_t hash; if (type == IR_BOOL) { return val.u64 ? IR_TRUE : IR_FALSE; } else if (type == IR_ADDR && val.u64 == 0) { return IR_NULL; } - prev_insn = NULL; - ref = ctx->prev_const_chain[type]; + + hash = ir_const_hash(val, optx) & ctx->const_hash_mask; + ref = ctx->const_hash[hash]; while (ref) { insn = &ctx->ir_base[ref]; - if (UNEXPECTED(insn->val.u64 >= val.u64)) { - if (insn->val.u64 == val.u64) { - if (insn->optx == optx) { - return ref; - } - } else { - break; - } + if (insn->val.u64 == val.u64 && insn->optx == optx) { + return ref; } - prev_insn = insn; ref = insn->prev_const; } - if (prev_insn) { - prev = prev_insn->prev_const; - prev_insn->prev_const = -ctx->consts_count; - } else { - prev = ctx->prev_const_chain[type]; - ctx->prev_const_chain[type] = -ctx->consts_count; + if ((uintptr_t)ctx->consts_count > ctx->const_hash_mask) { + ir_const_hash_rehash(ctx); + hash = ir_const_hash(val, optx) & ctx->const_hash_mask; } + prev = ctx->const_hash[hash]; + ctx->const_hash[hash] = -ctx->consts_count; + ref = ir_next_const(ctx); insn = &ctx->ir_base[ref]; insn->prev_const = prev; @@ -2092,10 +2095,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ if (insn->type == type) { return ref; /* load forwarding (L2L) */ } else if (ir_type_size[insn->type] == ir_type_size[type]) { - return ir_fold1(ctx, IR_OPT(IR_BITCAST, type), ref); /* load forwarding with bitcast (L2L) */ + return ref; /* load forwarding with bitcast (L2L) */ } else if (ir_type_size[insn->type] > ir_type_size[type] && IR_IS_TYPE_INT(type) && IR_IS_TYPE_INT(insn->type)) { - return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), ref); /* partial load forwarding (L2L) */ + return ref; /* partial load forwarding (L2L) */ } } } else if (insn->op == IR_VSTORE) { @@ -2105,10 +2108,10 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ if (type2 == type) { return insn->op3; /* store forwarding (S2L) */ } else if (ir_type_size[type2] == ir_type_size[type]) { - return ir_fold1(ctx, IR_OPT(IR_BITCAST, type), insn->op3); /* store forwarding with bitcast (S2L) */ + return insn->op3; /* store forwarding with bitcast (S2L) */ } else if (ir_type_size[type2] > ir_type_size[type] && IR_IS_TYPE_INT(type) && IR_IS_TYPE_INT(type2)) { - return ir_fold1(ctx, IR_OPT(IR_TRUNC, type), insn->op3); /* partial store forwarding (S2L) */ + return insn->op3; /* partial store forwarding (S2L) */ } else { break; } @@ -3214,6 +3217,13 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list) return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list); } +ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size) +{ + IR_ASSERT(ctx->control); + IR_ASSERT(size <= 0x7fffffff); + return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list, (ir_ref)size); +} + ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx) { IR_ASSERT(ctx->control); diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 9575348ff5450..52cbc06b153ee 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -310,6 +310,8 @@ typedef enum _ir_type { _(PHI, pN, reg, def, def) /* SSA Phi function */ \ _(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \ _(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \ + _(ARGVAL, d1X2, def, num, num) /* pass struct arg by value */ \ + /* (op2 - size, op3 - align) */ \ /* (USE, RENAME) */ \ \ /* data ops */ \ @@ -343,7 +345,8 @@ typedef enum _ir_type { _(VA_START, x2, src, def, ___) /* va_start(va_list) */ \ _(VA_END, x2, src, def, ___) /* va_end(va_list) */ \ _(VA_COPY, x3, src, def, def) /* va_copy(dst, stc) */ \ - _(VA_ARG, x2, src, def, ___) /* va_arg(va_list) */ \ + _(VA_ARG, x2X1, src, def, opt) /* va_arg(va_list) */ \ + /* op3 - (size<<3)+log2(align) */ \ \ /* guards */ \ _(GUARD, c3, src, def, def) /* IF without second successor */ \ @@ -583,12 +586,22 @@ typedef struct _ir_code_buffer { void *pos; } ir_code_buffer; +typedef struct { + int size; + int align; + int offset; +} ir_value_param; + +#define IR_CONST_HASH_SIZE 64 + struct _ir_ctx { ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */ ir_ref insns_count; /* number of instructions stored in instructions buffer */ ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */ ir_ref consts_count; /* number of constants stored in constants buffer */ ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */ + uintptr_t const_hash_mask; + ir_ref *const_hash; uint32_t flags; /* IR context flags (see IR_* defines above) */ uint32_t flags2; /* IR context private flags (see IR_* defines in ir_private.h) */ ir_type ret_type; /* Function return type */ @@ -596,6 +609,7 @@ struct _ir_ctx { int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */ ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */ ir_insn fold_insn; /* temporary storage for folding engine */ + ir_value_param *value_params; /* information about "by-val" struct parameters */ ir_hashtab *binding; ir_use_list *use_lists; /* def->use lists for each instruction */ ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */ @@ -655,7 +669,7 @@ struct _ir_ctx { ir_loader *loader; ir_strtab strtab; ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1]; - ir_ref prev_const_chain[IR_LAST_TYPE]; + ir_ref _const_hash[IR_CONST_HASH_SIZE]; }; /* Basic IR Construction API (implementation in ir.c) */ @@ -896,6 +910,7 @@ int ir_load_llvm_asm(ir_loader *loader, const char *filename); #define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */ void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f); +void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f); void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f); /* IR debug dump API (implementation in ir_dump.c) */ diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 3a2c3687042cd..4cdf4d68c2179 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -574,6 +574,10 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; break; + case IR_ARGVAL: + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); + n = 1; + break; case IR_CALL: insn = &ctx->ir_base[ref]; constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; @@ -1103,6 +1107,8 @@ binop_fp: } } return IR_SKIPPED | IR_NOP; + case IR_ARGVAL: + return IR_FUSED | IR_ARGVAL; case IR_NOP: return IR_SKIPPED | IR_NOP; default: @@ -1386,6 +1392,12 @@ static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) } } +static int32_t ir_local_offset(ir_ctx *ctx, ir_insn *insn) +{ + IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA || insn->op == IR_VADDR); + return IR_SPILL_POS_TO_OFFSET(insn->op3); +} + static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) { ir_backend_data *data = ctx->data; @@ -1399,8 +1411,7 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) if (var_insn->op == IR_VADDR) { var_insn = &ctx->ir_base[var_insn->op1]; } - IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); - offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + offset = ir_local_offset(ctx, var_insn); if (aarch64_may_encode_imm12(offset)) { | add Rx(reg), Rx(base), #offset } else { @@ -3858,7 +3869,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op)); if (ir_rule(ctx, addr_insn->op1) == IR_STATIC_ALLOCA) { reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[addr_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[addr_insn->op1]); offset += ctx->ir_base[addr_insn->op2].val.i32; return IR_MEM_BO(reg, offset); } else { @@ -3876,7 +3887,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) } else { IR_ASSERT(addr_insn->op == IR_ALLOCA || addr_insn->op == IR_VADDR); reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[ref].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[ref]); return IR_MEM_BO(reg, offset); } } @@ -4265,7 +4276,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (ctx->flags & IR_USE_FRAME_POINTER) { @@ -4297,7 +4308,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (ctx->flags & IR_USE_FRAME_POINTER) { @@ -4364,7 +4375,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { @@ -4375,7 +4386,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] @@ -4397,7 +4408,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { @@ -4408,7 +4419,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] @@ -4446,7 +4457,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ct, &ctx->ir_base[insn->op2]); } | ldr Rx(tmp_reg), [Rx(op2_reg), #offset] ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); @@ -4478,7 +4489,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (IR_IS_TYPE_INT(type)) { | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] @@ -4741,7 +4752,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } } -static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stack_ptr) { int j, n; ir_type type; @@ -4749,7 +4760,7 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) int fp_param = 0; int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; - int32_t used_stack = 0; + int32_t used_stack = 0, copy_stack = 0; #ifdef __APPLE__ const ir_proto_t *proto = ir_call_proto(ctx, insn); int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; @@ -4757,7 +4768,16 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) n = insn->inputs_count; for (j = 3; j <= n; j++) { - type = ctx->ir_base[ir_insn_op(insn, j)].type; + ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; + type = arg->type; + if (arg->op == IR_ARGVAL) { + int size = arg->op2; + int align = arg->op3; + copy_stack += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack = IR_ALIGNED_SIZE(copy_stack, align); + type = IR_ADDR; + } #ifdef __APPLE__ if (j > last_named_input) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); @@ -4777,7 +4797,9 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) } } - return used_stack; + copy_stack = IR_ALIGNED_SIZE(copy_stack, 16); + *copy_stack_ptr = copy_stack; + return used_stack + copy_stack; } static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) @@ -4796,7 +4818,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; - int32_t used_stack, stack_offset = 0; + int32_t used_stack, copy_stack = 0, stack_offset = 0, copy_stack_offset = 0; ir_copy *copies; bool do_pass3 = 0; /* For temporaries we may use any scratch registers except for registers used for parameters */ @@ -4815,7 +4837,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg // TODO: support for preallocated stack used_stack = 0; } else { - used_stack = ir_call_used_stack(ctx, insn); + used_stack = ir_call_used_stack(ctx, insn, ©_stack); /* Stack must be 16 byte aligned */ used_stack = IR_ALIGNED_SIZE(used_stack, 16); if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { @@ -4838,6 +4860,48 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; #endif + if (copy_stack) { + /* Copy struct arguments */ + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + + if (arg_insn->op == IR_ARGVAL) { + /* make a stack copy */ + void *addr = memcpy; + int size = arg_insn->op2; + int align = arg_insn->op3; + + copy_stack_offset += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); + src_reg = ctx->regs[arg][1]; + + | add Rx(IR_REG_INT_ARG1), sp, #(used_stack - copy_stack_offset) + if (src_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1); + } + | mov Rx(IR_REG_INT_ARG2), Rx(src_reg) + } else { + ir_emit_load(ctx, IR_ADDR, IR_REG_INT_ARG2, arg_insn->op1); + } + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_ARG3, size); + + if (aarch64_may_use_b(ctx->code_buffer, addr)) { + | bl &addr + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); + | blr Rx(IR_REG_INT_TMP) + } + } + } + copy_stack_offset = 0; + } + /* 1. move all register arguments that should be passed through stack * and collect arguments that should be passed through registers */ copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); @@ -4846,8 +4910,13 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg src_reg = ir_get_alocated_reg(ctx, def, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; + #ifdef __APPLE__ if (j > last_named_input) { + if (arg_insn->op == IR_ARGVAL) { + do_pass3 = 1; + continue; + } dst_reg = IR_REG_NONE; /* pass argument through stack */ } else #endif @@ -4858,6 +4927,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg dst_reg = IR_REG_NONE; /* pass argument through stack */ } int_param++; + if (arg_insn->op == IR_ARGVAL) { + do_pass3 = 1; + continue; + } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (fp_param < fp_reg_params_count) { @@ -4914,6 +4987,31 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg src_reg = ir_get_alocated_reg(ctx, def, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; + if (arg_insn->op == IR_ARGVAL) { + /* pass pointer to the copy on stack */ + int size = arg_insn->op2; + int align = arg_insn->op3; + + copy_stack_offset += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); +#ifdef __APPLE__ + if (j > last_named_input) { + | add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset) + ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); + } else +#endif + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + | add Rx(dst_reg), sp, #(used_stack - copy_stack_offset) + } else { + | add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset) + ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); + stack_offset += sizeof(void*); + } + int_param++; + continue; + } #ifdef __APPLE__ if (j > last_named_input) { dst_reg = IR_REG_NONE; /* pass argument through stack */ @@ -5896,7 +5994,8 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { if (insn->op == IR_CALL) { - call_stack_size = ir_call_used_stack(ctx, insn); + int32_t copy_stack; + call_stack_size = ir_call_used_stack(ctx, insn, ©_stack); if (call_stack_size > peak_call_stack_size) { peak_call_stack_size = call_stack_size; } diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index 358ae241e2dd4..c1dcffdbaa084 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -586,6 +586,7 @@ extern "C" { #define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list) #define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src) #define ir_VA_ARG(_list, _type) _ir_VA_ARG(_ir_CTX, _type, _list) +#define ir_VA_ARG_EX(_list, _type, size) _ir_VA_ARG_EX(_ir_CTX, _type, _list, size) #define ir_START() _ir_START(_ir_CTX) #define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num)) @@ -661,6 +662,7 @@ void _ir_VA_START(ir_ctx *ctx, ir_ref list); void _ir_VA_END(ir_ctx *ctx, ir_ref list); void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src); ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list); +ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size); void _ir_START(ir_ctx *ctx); void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num); void _ir_BEGIN(ir_ctx *ctx, ir_ref src); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 13d66a7130283..00923387bb21c 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -605,7 +605,7 @@ static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy. */ -static int ir_build_dominators_tree_slow(ir_ctx *ctx) +static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx) { uint32_t blocks_count, b, postnum; ir_block *blocks, *bb; @@ -690,28 +690,13 @@ static int ir_build_dominators_tree_slow(ir_ctx *ctx) /* Build dominators tree */ blocks[1].idom = 0; blocks[1].dom_depth = 0; - for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { - uint32_t idom = bb->idom; - ir_block *idom_bb = &blocks[idom]; + /* Construct children lists sorted by block number */ + for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) { + ir_block *idom_bb = &blocks[bb->idom]; bb->dom_depth = 0; - /* Sort by block number to traverse children in pre-order */ - if (idom_bb->dom_child == 0) { - idom_bb->dom_child = b; - } else if (b < idom_bb->dom_child) { - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; - } else { - int child = idom_bb->dom_child; - ir_block *child_bb = &blocks[child]; - - while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { - child = child_bb->dom_next_child; - child_bb = &blocks[child]; - } - bb->dom_next_child = child_bb->dom_next_child; - child_bb->dom_next_child = b; - } + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; } /* Recalculate dom_depth for all blocks */ @@ -769,6 +754,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) ctx->flags2 &= ~IR_NO_LOOPS; // IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); if (UNEXPECTED(k <= 1)) { +slow_case: ir_list_free(&worklist); return ir_build_dominators_tree_slow(ctx); } @@ -780,7 +766,9 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (idom < b) { break; } - IR_ASSERT(k > 0); + if (UNEXPECTED(k == 0)) { + goto slow_case; + } ir_list_push(&worklist, idom); } } @@ -808,25 +796,14 @@ int ir_build_dominators_tree(ir_ctx *ctx) } bb->idom = idom; idom_bb = &blocks[idom]; - bb->dom_depth = idom_bb->dom_depth + 1; - /* Sort by block number to traverse children in pre-order */ - if (idom_bb->dom_child == 0) { - idom_bb->dom_child = b; - } else if (b < idom_bb->dom_child) { - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; - } else { - int child = idom_bb->dom_child; - ir_block *child_bb = &blocks[child]; + } - while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { - child = child_bb->dom_next_child; - child_bb = &blocks[child]; - } - bb->dom_next_child = child_bb->dom_next_child; - child_bb->dom_next_child = b; - } + /* Construct children lists sorted by block number */ + for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) { + ir_block *idom_bb = &blocks[bb->idom]; + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; } blocks[1].idom = 0; @@ -945,23 +922,13 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx) ir_block *idom_bb = &blocks[idom]; bb->dom_depth = idom_bb->dom_depth + 1; - /* Sort by block number to traverse children in pre-order */ - if (idom_bb->dom_child == 0) { - idom_bb->dom_child = b; - } else if (b < idom_bb->dom_child) { - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; - } else { - int child = idom_bb->dom_child; - ir_block *child_bb = &blocks[child]; + } - while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) { - child = child_bb->dom_next_child; - child_bb = &blocks[child]; - } - bb->dom_next_child = child_bb->dom_next_child; - child_bb->dom_next_child = b; - } + /* Construct children lists sorted by block number */ + for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) { + ir_block *idom_bb = &blocks[bb->idom]; + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; } return 1; diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index 54fddf50ac066..a501d261f30a7 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -660,6 +660,12 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) } if (first) { fprintf(f, ";"); + } else if (ctx->value_params + && insn->op == IR_PARAM + && ctx->value_params[insn->op3 - 1].align) { + fprintf(f, ") ByVal(%d, %d);", + ctx->value_params[insn->op3 - 1].size, + ctx->value_params[insn->op3 - 1].align); } else { fprintf(f, ");"); } diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index fab9f56228d80..c9e65229c3969 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -167,11 +167,24 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { if (use == ref) { +#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + /* struct passed by value on stack */ + return IR_REG_NONE; + } else +#endif if (int_param < int_reg_params_count) { return int_reg_params[int_param]; } else { return IR_REG_NONE; } +#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) + } else { + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + /* struct passed by value on stack */ + continue; + } +#endif } int_param++; #ifdef _WIN64 @@ -222,9 +235,12 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs n = insn->inputs_count; n = IR_MIN(n, IR_MAX_REG_ARGS + 2); for (j = 3; j <= n; j++) { - type = ctx->ir_base[ir_insn_op(insn, j)].type; + ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; + type = arg->type; if (IR_IS_TYPE_INT(type)) { - if (int_param < int_reg_params_count) { + if (arg->op == IR_ARGVAL) { + continue; + } else if (int_param < int_reg_params_count) { regs[j] = int_reg_params[int_param]; count = j + 1; } else { diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 2f5be6ca2e00b..7ae6ca539da8d 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -1703,6 +1703,11 @@ IR_FOLD(SUB_OV(_, C_ADDR)) { if (op2_insn->val.u64 == 0) { /* a +/- 0 => a */ + if (op1_insn->type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op1); } IR_FOLD_NEXT; @@ -1721,6 +1726,12 @@ IR_FOLD(ADD(C_ADDR, _)) { if (op1_insn->val.u64 == 0) { /* 0 + a => a */ + if (op2_insn->type != IR_OPT_TYPE(opt)) { + opt = IR_BITCAST | (opt & IR_OPT_TYPE_MASK); + op1 = op2; + op2 = IR_UNUSED; + IR_FOLD_RESTART; + } IR_FOLD_COPY(op2); } IR_FOLD_NEXT; @@ -2927,7 +2938,7 @@ IR_FOLD(SUB(C_ADDR, SUB)) /* c1 - (x - c2) => (c1 + c2) - x */ val.u64 = op1_insn->val.u64 + ctx->ir_base[op2_insn->op2].val.u64; op2 = op2_insn->op1; - op1 = ir_const(ctx, val, op1_insn->op1); + op1 = ir_const(ctx, val, op1_insn->type); IR_FOLD_RESTART; } else if (IR_IS_CONST_REF(op2_insn->op1) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op1].op)) { /* c1 - (c2 - x) => x + (c1 - c2) */ diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 3ea78850249ab..c170fa47476ec 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -792,21 +792,26 @@ IR_ALWAYS_INLINE bool ir_is_good_bb_order(ir_ctx *ctx, uint32_t b, ir_block *bb, ir_ref *p = insn->ops + 1; if (n == 1) { - return *p < start; + return ctx->cfg_map[*p] < b; } else { IR_ASSERT(n > 1); for (; n > 0; p++, n--) { ir_ref input = *p; - if (input < start) { - /* ordered */ - } else if ((bb->flags & IR_BB_LOOP_HEADER) - && (ctx->cfg_map[input] == b || ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == b)) { - /* back-edge of reducible loop */ - } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) - && (ctx->cfg_blocks[ctx->cfg_map[input]].loop_header == ctx->cfg_blocks[b].loop_header)) { - /* closing edge of irreducible loop */ - } else { - return 0; + + if (!IR_IS_CONST_REF(input)) { + uint32_t input_b = ctx->cfg_map[input]; + + if (input_b < b) { + /* ordered */ + } else if ((bb->flags & IR_BB_LOOP_HEADER) + && (input_b == b || ctx->cfg_blocks[input_b].loop_header == b)) { + /* back-edge of reducible loop */ + } else if ((bb->flags & IR_BB_IRREDUCIBLE_LOOP) + && (ctx->cfg_blocks[input_b].loop_header == bb->loop_header)) { + /* closing edge of irreducible loop */ + } else { + return 0; + } } } return 1; @@ -925,121 +930,54 @@ int ir_schedule(ir_ctx *ctx) ir_ref *_xlat; ir_ref *edges; ir_ref prev_b_end; - uint32_t b, prev_b; + uint32_t b; uint32_t *_blocks = ctx->cfg_map; ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); - ir_ref _move_down = 0; ir_block *bb; ir_insn *insn, *new_insn; ir_use_list *lists, *use_list, *new_list; bool bad_bb_order = 0; + /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */ IR_ASSERT(_blocks[1] == 1); - prev_b = 1; - prev_b_end = ctx->cfg_blocks[1].end; + + /* link BB boundaries */ _prev[1] = 0; - _prev[prev_b_end] = 0; - for (i = 2, j = 1; i < ctx->insns_count; i++) { - b = _blocks[i]; - IR_ASSERT((int32_t)b >= 0); - if (b == prev_b && i <= prev_b_end) { - /* add to the end of the list */ - _next[j] = i; - _prev[i] = j; - j = i; - } else if (b > prev_b) { - bb = &ctx->cfg_blocks[b]; - if (i == bb->start) { - if (bb->end > bb->start) { - prev_b = b; - prev_b_end = bb->end; - /* add to the end of the list */ - _next[j] = i; - _prev[i] = j; - j = i; - } else { - prev_b = 0; - prev_b_end = 0; - k = bb->end; - while (_blocks[_prev[k]] == b) { - k = _prev[k]; - } - /* insert before "k" */ - _prev[i] = _prev[k]; - _next[i] = k; - _next[_prev[k]] = i; - _prev[k] = i; - } - if (!ir_is_good_bb_order(ctx, b, bb, i)) { - bad_bb_order = 1; - } - } else if (i != bb->end) { - /* move down late (see the following loop) */ - _next[i] = _move_down; - _move_down = i; - } else { - prev_b = 0; - prev_b_end = 0; - if (bb->start > bb->end) { - /* add to the end of the list */ - _next[j] = i; - _prev[i] = j; - j = i; - } else { - k = bb->start; - while (_blocks[_next[k]] == b) { - k = _next[k]; - } - /* insert after "k" */ - _next[i] = _next[k]; - _prev[i] = k; - _prev[_next[k]] = i; - _next[k] = i; - } - } - } else if (b) { - bb = &ctx->cfg_blocks[b]; - IR_ASSERT(i != bb->start); - if (i > bb->end) { - /* move up, insert before the end of the already scheduled BB */ - k = bb->end; - } else { - IR_ASSERT(i > bb->start); - /* move up, insert at the end of the block */ - k = ctx->cfg_blocks[b + 1].start; - } - /* insert before "k" */ - _prev[i] = _prev[k]; - _next[i] = k; - _next[_prev[k]] = i; - _prev[k] = i; + prev_b_end = ctx->cfg_blocks[1].end; + _next[1] = prev_b_end; + _prev[prev_b_end] = 1; + for (b = 2, bb = ctx->cfg_blocks + 2; b <= ctx->cfg_blocks_count; b++, bb++) { + _next[prev_b_end] = bb->start; + _prev[bb->start] = prev_b_end; + _next[bb->start] = bb->end; + _prev[bb->end] = bb->start; + prev_b_end = bb->end; + if (!ir_is_good_bb_order(ctx, b, bb, bb->start)) { + bad_bb_order = 1; } } - _next[j] = 0; + _next[prev_b_end] = 0; - while (_move_down) { - i = _move_down; - _move_down = _next[i]; + /* insert intermediate BB nodes */ + for (i = 2, j = 1; i < ctx->insns_count; i++) { b = _blocks[i]; + if (!b) continue; bb = &ctx->cfg_blocks[b]; - k = _next[bb->start]; - - if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) { - /* insert after the start of the block and all PARAM, VAR, PI, PHI */ - insn = &ctx->ir_base[k]; - while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) { - k = _next[k]; - insn = &ctx->ir_base[k]; - } + if (i != bb->start && i != bb->end) { + /* insert before "end" */ + ir_ref n = bb->end; + ir_ref p = _prev[n]; + _prev[i] = p; + _next[i] = n; + _next[p] = i; + _prev[n] = i; } + } - /* insert before "k" */ - _prev[i] = _prev[k]; - _next[i] = k; - _next[_prev[k]] = i; - _prev[k] = i; + if (bad_bb_order) { + ir_fix_bb_order(ctx, _prev, _next); } #ifdef IR_DEBUG @@ -1051,10 +989,6 @@ int ir_schedule(ir_ctx *ctx) } #endif - if (bad_bb_order) { - ir_fix_bb_order(ctx, _prev, _next); - } - _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); _xlat += ctx->consts_count; _xlat[IR_TRUE] = IR_TRUE; @@ -1168,7 +1102,11 @@ int ir_schedule(ir_ctx *ctx) if (end->op == IR_IF) { /* Move condition closer to IF */ input = end->op2; - if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) { + if (input > 0 + && _blocks[input] == b + && !_xlat[input] + && _prev[j] != input + && (!(ir_op_flags[ctx->ir_base[input].op] & IR_OP_FLAG_CONTROL) || end->op1 == input)) { if (input == i) { i = _next[i]; insn = &ctx->ir_base[i]; @@ -1188,6 +1126,7 @@ int ir_schedule(ir_ctx *ctx) ir_ref n, j, *p, input; restart: + IR_ASSERT(_blocks[i] == b); n = insn->inputs_count; for (j = n, p = insn->ops + 1; j > 0; p++, j--) { input = *p; @@ -1221,6 +1160,7 @@ int ir_schedule(ir_ctx *ctx) } _xlat[i] = insns_count; insns_count += ir_insn_inputs_to_len(n); + IR_ASSERT(_next[i] != IR_UNUSED); i = _next[i]; insn = &ctx->ir_base[i]; } @@ -1274,6 +1214,7 @@ int ir_schedule(ir_ctx *ctx) new_ctx.insns_count = insns_count; new_ctx.flags2 = ctx->flags2; new_ctx.ret_type = ctx->ret_type; + new_ctx.value_params = ctx->value_params; new_ctx.mflags = ctx->mflags; new_ctx.spill_base = ctx->spill_base; new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; @@ -1511,6 +1452,7 @@ int ir_schedule(ir_ctx *ctx) new_ctx.cfg_edges = ctx->cfg_edges; ctx->cfg_blocks = NULL; ctx->cfg_edges = NULL; + ctx->value_params = NULL; ir_code_buffer *saved_code_buffer = ctx->code_buffer; ir_free(ctx); diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 0c0e8dec3b47a..21c7ee3ac64e5 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -1193,7 +1193,7 @@ static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block n = IR_INPUT_EDGES_COUNT(flags); j = 1; p = insn->ops + j; - if (flags & IR_OP_FLAG_CONTROL) { + if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_PINNED)) { j++; p++; } @@ -1340,7 +1340,7 @@ int ir_compute_live_ranges(ir_ctx *ctx) || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA) && ctx->use_lists[ref].count > 0) { insn = &ctx->ir_base[ref]; - if (insn->op != IR_VADDR) { + if (insn->op != IR_VADDR && insn->op != IR_PARAM) { insn->op3 = ctx->vars; ctx->vars = ref; } @@ -1630,6 +1630,10 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) { ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD; } + if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) { + IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM)); + ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; + } ctx->live_intervals[r2] = NULL; // TODO: remember to reuse ??? diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index ea787f162ec1f..5ba986fadd481 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -10,31 +10,35 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f) { - ir_ref j; - if (func_proto) { const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func_proto); + ir_print_proto_ex(proto->flags, proto->ret_type, proto->params_count, proto->param_types, f); + } else { + fprintf(f, "(): int32_t"); + } +} - fprintf(f, "("); - if (proto->params_count > 0) { - fprintf(f, "%s", ir_type_cname[proto->param_types[0]]); - for (j = 1; j < proto->params_count; j++) { - fprintf(f, ", %s", ir_type_cname[proto->param_types[j]]); - } - if (proto->flags & IR_VARARG_FUNC) { - fprintf(f, ", ..."); - } - } else if (proto->flags & IR_VARARG_FUNC) { - fprintf(f, "..."); +void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f) +{ + uint32_t j; + + fprintf(f, "("); + if (params_count > 0) { + fprintf(f, "%s", ir_type_cname[param_types[0]]); + for (j = 1; j < params_count; j++) { + fprintf(f, ", %s", ir_type_cname[param_types[j]]); } - fprintf(f, "): %s", ir_type_cname[proto->ret_type]); - if (proto->flags & IR_FASTCALL_FUNC) { - fprintf(f, " __fastcall"); - } else if (proto->flags & IR_BUILTIN_FUNC) { - fprintf(f, " __builtin"); + if (flags & IR_VARARG_FUNC) { + fprintf(f, ", ..."); } - } else { - fprintf(f, "(): int32_t"); + } else if (flags & IR_VARARG_FUNC) { + fprintf(f, "..."); + } + fprintf(f, "): %s", ir_type_cname[ret_type]); + if (flags & IR_FASTCALL_FUNC) { + fprintf(f, " __fastcall"); + } else if (flags & IR_BUILTIN_FUNC) { + fprintf(f, " __builtin"); } } @@ -280,6 +284,12 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) } if (first) { fprintf(f, ";"); + } else if (ctx->value_params + && insn->op == IR_PARAM + && ctx->value_params[insn->op3 - 1].align) { + fprintf(f, ") ByVal(%d, %d);", + ctx->value_params[insn->op3 - 1].size, + ctx->value_params[insn->op3 - 1].align); } else { fprintf(f, ");"); } diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 58de0d726f781..48659cd4bd71b 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -1875,6 +1875,7 @@ static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type typ case IR_I8: case IR_U8: case IR_BOOL: + case IR_CHAR: if (op == IR_SEXT) { new_val.i64 = (int64_t)val_insn->val.i8; } else { @@ -1928,7 +1929,7 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, return ref; } -static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val) +static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val, ir_ref loop) { ir_insn *insn; ir_ref n, *p, input, result, ctrl; @@ -1953,7 +1954,8 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val) result = 1; for (; n > 0; p++, n--) { input = *p; - ctrl = _ir_estimated_control(ctx, input); + ctrl = _ir_estimated_control(ctx, input, loop); + if (ctrl >= loop) return ctrl; if (ctrl > result) { // TODO: check dominance depth instead of order result = ctrl; } @@ -1963,7 +1965,7 @@ static uint32_t _ir_estimated_control(ir_ctx *ctx, ir_ref val) static bool ir_is_loop_invariant(ir_ctx *ctx, ir_ref ref, ir_ref loop) { - ref = _ir_estimated_control(ctx, ref); + ref = _ir_estimated_control(ctx, ref, loop); return ref < loop; // TODO: check dominance instead of order } diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index e5c038fce8e5a..781c8e5269c29 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -1387,6 +1387,12 @@ op2_const: constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; break; + case IR_ARGVAL: + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RSI, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); + constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDI, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); + constraints->tmp_regs[2] = IR_SCRATCH_REG(IR_REG_RCX, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); + n = 3; + break; case IR_CALL: insn = &ctx->ir_base[ref]; if (IR_IS_TYPE_INT(insn->type)) { @@ -2431,6 +2437,11 @@ binop_fp: case IR_VAR: return IR_SKIPPED | IR_VAR; case IR_PARAM: +#ifndef _WIN64 + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + return IR_STATIC_ALLOCA; + } +#endif return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: /* alloca() may be used only in functions */ @@ -2976,6 +2987,8 @@ store_int: } } return IR_SKIPPED | IR_NOP; + case IR_ARGVAL: + return IR_FUSED | IR_ARGVAL; case IR_NOP: return IR_SKIPPED | IR_NOP; default: @@ -3153,6 +3166,17 @@ static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) } } +static int32_t ir_local_offset(ir_ctx *ctx, ir_insn *insn) +{ + if (insn->op != IR_PARAM) { + IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA || insn->op == IR_VADDR); + return IR_SPILL_POS_TO_OFFSET(insn->op3); + } else { + IR_ASSERT(ctx->value_params && ctx->value_params[insn->op3 - 1].align); + return IR_SPILL_POS_TO_OFFSET(ctx->value_params[insn->op3 - 1].offset); + } +} + static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) { ir_backend_data *data = ctx->data; @@ -3166,8 +3190,7 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) if (var_insn->op == IR_VADDR) { var_insn = &ctx->ir_base[var_insn->op1]; } - IR_ASSERT(var_insn->op == IR_VAR || var_insn->op == IR_ALLOCA); - offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); + offset = ir_local_offset(ctx, var_insn); if (offset == 0) { | mov Ra(reg), Ra(base) } else { @@ -3385,7 +3408,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) case IR_LEA_OB: offset_insn = insn; if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { @@ -3407,12 +3430,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) break; case IR_LEA_IB: if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 2; } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 1; @@ -3428,12 +3451,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = op1_insn; scale = 1; if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 2; @@ -3447,12 +3470,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = op2_insn; scale = 1; if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; } else if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op2_insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 1; @@ -3479,12 +3502,12 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = insn; scale = 1; if (ir_rule(ctx, op1_insn->op2) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op2]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; @@ -3500,7 +3523,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) op2_insn = &ctx->ir_base[insn->op2]; scale = ctx->ir_base[op2_insn->op2].val.i32; if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op1_insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { @@ -3514,7 +3537,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) op2_insn = &ctx->ir_base[insn->op2]; offset_insn = op2_insn; if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[op2_insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { @@ -3523,7 +3546,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) break; case IR_LEA_B_SI: if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op1]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { @@ -3537,7 +3560,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) case IR_LEA_SI_B: index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { @@ -3580,7 +3603,7 @@ static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) offset_insn = NULL; break; case IR_ALLOCA: - offset = IR_SPILL_POS_TO_OFFSET(insn->op3); + offset = ir_local_offset(ctx, insn); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = index_reg_ref = IR_UNUSED; scale = 1; @@ -8306,7 +8329,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (ctx->flags & IR_USE_FRAME_POINTER) { @@ -8340,7 +8363,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (ctx->flags & IR_USE_FRAME_POINTER) { @@ -8407,7 +8430,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { @@ -8418,7 +8441,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) @@ -8441,7 +8464,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { @@ -8452,7 +8475,7 @@ static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))] | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) @@ -8493,11 +8516,29 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] +#ifdef _WIN64 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) +#else + if (!insn->op3) { + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) + } else { + IR_ASSERT(type == IR_ADDR); + int align = 1U << (insn->op3 & 0x7); + int size = (uint32_t)insn->op3 >> 3; + + if (align > (int)sizeof(void*)) { + | add Ra(tmp_reg), (align-1) + | and Ra(tmp_reg), ~(align-1) + } + | mov Ra(def_reg), Ra(tmp_reg) + | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) + } +#endif | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); @@ -8526,9 +8567,23 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - if (IR_IS_TYPE_INT(type)) { + if (insn->op3) { + /* long struct arguemnt */ + IR_ASSERT(type == IR_ADDR); + int align = 1U << (insn->op3 & 0x7); + int size = (uint32_t)insn->op3 >> 3; + + | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] + if (align > (int)sizeof(void*)) { + | add Ra(tmp_reg), (align-1) + | and Ra(tmp_reg), ~(align-1) + } + | mov Ra(def_reg), Ra(tmp_reg) + | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) + | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) + } else if (IR_IS_TYPE_INT(type)) { | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))] | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS | jge >1 @@ -8847,7 +8902,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } } -static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_ptr) { int j, n; ir_type type; @@ -8856,6 +8911,9 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; int32_t used_stack = 0; +#ifdef _WIN64 + int32_t copy_stack = 0; +#endif #ifdef IR_HAVE_FASTCALL if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { @@ -8866,8 +8924,26 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) n = insn->inputs_count; for (j = 3; j <= n; j++) { - type = ctx->ir_base[ir_insn_op(insn, j)].type; + ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; + type = arg->type; if (IR_IS_TYPE_INT(type)) { + if (arg->op == IR_ARGVAL) { + int size = arg->op2; + int align = arg->op3; + +#ifdef _WIN64 + copy_stack += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack = IR_ALIGNED_SIZE(copy_stack, align); + type = IR_ADDR; +#else + align = IR_MAX((int)sizeof(void*), align); + used_stack = IR_ALIGNED_SIZE(used_stack, align); + used_stack += size; + used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*)); + continue; +#endif + } if (int_param >= int_reg_params_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } @@ -8892,6 +8968,14 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ used_stack += IR_SHADOW_ARGS; +#ifdef _WIN64 + copy_stack = IR_ALIGNED_SIZE(copy_stack, 16); + used_stack += copy_stack; + *copy_stack_ptr = copy_stack; +#else + *copy_stack_ptr = 0; +#endif + return used_stack; } @@ -8911,7 +8995,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; - int32_t used_stack, stack_offset = IR_SHADOW_ARGS; + int32_t used_stack, copy_stack = 0, stack_offset = IR_SHADOW_ARGS; ir_copy *copies; bool do_pass3 = 0; /* For temporaries we may use any scratch registers except for registers used for parameters */ @@ -8942,9 +9026,13 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg #endif ) { // TODO: support for preallocated stack +#ifdef _WIN64 + used_stack = ir_call_used_stack(ctx, insn, ©_stack); +#else used_stack = 0; +#endif } else { - used_stack = ir_call_used_stack(ctx, insn); + used_stack = ir_call_used_stack(ctx, insn, ©_stack); if (IR_SHADOW_ARGS && insn->op == IR_TAILCALL && used_stack == IR_SHADOW_ARGS) { @@ -8967,6 +9055,46 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } +#ifdef _WIN64 +|.if X64 + if (copy_stack) { + /* Copy struct arguments */ + int copy_stack_offset = 0; + + for (j = 3; j <= n; j++) { + arg = ir_insn_op(insn, j); + src_reg = ir_get_alocated_reg(ctx, def, j); + arg_insn = &ctx->ir_base[arg]; + type = arg_insn->type; + + if (arg_insn->op == IR_ARGVAL) { + /* make a stack copy */ + int size = arg_insn->op2; + int align = arg_insn->op3; + + copy_stack_offset += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); + src_reg = ctx->regs[arg][1]; + + | lea rdi, [rsp + (used_stack - copy_stack_offset)] + if (src_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1); + } + | mov rsi, Ra(src_reg) + } else { + ir_emit_load(ctx, IR_ADDR, IR_REG_RSI, arg_insn->op1); + } + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_RCX, size); + | rep; movsb + } + } + } +|.endif +#endif + /* 1. move all register arguments that should be passed through stack * and collect arguments that should be passed through registers */ copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); @@ -8976,6 +9104,55 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { +#ifndef _WIN64 + if (arg_insn->op == IR_ARGVAL) { + int size = arg_insn->op2; + int align = arg_insn->op3; + align = IR_MAX((int)sizeof(void*), align); + stack_offset = IR_ALIGNED_SIZE(stack_offset, align); + if (size) { + src_reg = ctx->regs[arg][1]; + if (src_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(src_reg)) { + src_reg = IR_REG_NUM(src_reg); + ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1); + } + if (src_reg != IR_REG_RSI) { + |.if X64 + | mov rsi, Ra(src_reg) + |.else + | mov esi, Ra(src_reg) + |.endif + } + } else { + ir_emit_load(ctx, IR_ADDR, IR_REG_RSI, arg_insn->op1); + } + if (stack_offset == 0) { + |.if X64 + | mov rdi, rsp + |.else + | mov edi, esp + |.endif + } else { + |.if X64 + | lea rdi, [rsp+stack_offset] + |.else + | lea edi, [esp+stack_offset] + |.endif + } + |.if X64 + | mov rcx, size + | rep; movsb + |.else + | mov ecx, size + | rep; movsb + |.endif + } + stack_offset += size; + stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); + continue; + } +#endif if (int_param < int_reg_params_count) { dst_reg = int_reg_params[int_param]; } else { @@ -8985,6 +9162,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param++; + if (arg_insn->op == IR_ARGVAL) { + do_pass3 = 3; + continue; + } #endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); @@ -9037,6 +9218,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg /* 3. move the remaining memory and immediate values */ if (do_pass3) { +#ifdef _WIN64 + int copy_stack_offset = 0; +#endif + stack_offset = IR_SHADOW_ARGS; int_param = 0; fp_param = 0; @@ -9046,6 +9231,37 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { + if (arg_insn->op == IR_ARGVAL) { + int size = arg_insn->op2; + int align = arg_insn->op3; + +#ifndef _WIN64 + align = IR_MAX((int)sizeof(void*), align); + stack_offset = IR_ALIGNED_SIZE(stack_offset, align); + stack_offset += size; + stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); + continue; +#else +|.if X64 + /* pass pointer to the copy on stack */ + copy_stack_offset += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); + if (int_param < int_reg_params_count) { + dst_reg = int_reg_params[int_param]; + | lea Ra(dst_reg), [rsp + (used_stack - copy_stack_offset)] + } else { + | lea Ra(tmp_reg), [rsp + (used_stack - copy_stack_offset)] + ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); + stack_offset += sizeof(void*); + } + int_param++; + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; + continue; +|.endif +#endif + } if (int_param < int_reg_params_count) { dst_reg = int_reg_params[int_param]; } else { @@ -9161,6 +9377,9 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } } + if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { + used_stack = 0; + } #endif #ifdef IR_REG_VARARG_FP_REGS /* set hidden argument to specify the number of vector registers used */ @@ -10230,7 +10449,16 @@ static void ir_emit_load_params(ir_ctx *ctx) insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { - if (int_param_num < int_reg_params_count) { + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + /* struct passed by value on stack */ + size_t align = ctx->value_params[insn->op3 - 1].align; + + align = IR_MAX(sizeof(void*), align); + stack_offset = IR_ALIGNED_SIZE(stack_offset, align); + stack_offset += ctx->value_params[insn->op3 - 1].size; + stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); + continue; + } else if (int_param_num < int_reg_params_count) { src_reg = int_reg_params[int_param_num]; } else { src_reg = IR_REG_NONE; @@ -10358,6 +10586,19 @@ static void ir_fix_param_spills(ir_ctx *ctx) insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { +#ifndef _WIN64 + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + /* struct passed by value on stack */ + size_t align = ctx->value_params[insn->op3 - 1].align; + + align = IR_MAX(sizeof(void*), align); + stack_offset = IR_ALIGNED_SIZE(stack_offset, align); + ctx->value_params[insn->op3 - 1].offset = stack_start + stack_offset; + stack_offset += ctx->value_params[insn->op3 - 1].size; + stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); + continue; + } +#endif if (int_param_num < int_reg_params_count) { src_reg = int_reg_params[int_param_num]; } else { @@ -10618,13 +10859,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) static void ir_preallocate_call_stack(ir_ctx *ctx) { - int call_stack_size, peak_call_stack_size = 0; + int call_stack_size, copy_stack, peak_call_stack_size = 0; ir_ref i, n; ir_insn *insn; for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { if (insn->op == IR_CALL) { - call_stack_size = ir_call_used_stack(ctx, insn); + call_stack_size = ir_call_used_stack(ctx, insn, ©_stack); if (call_stack_size > peak_call_stack_size #ifdef IR_HAVE_FASTCALL && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index d49d5ee3007e4..8e13daf99bc34 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -2119,8 +2119,13 @@ static zval* ZEND_FASTCALL zend_jit_fetch_obj_r_slow_ex(zend_object *zobj) void **cache_slot = CACHE_ADDR(opline->extended_value & ~ZEND_FETCH_OBJ_FLAGS); retval = zobj->handlers->read_property(zobj, name, BP_VAR_R, cache_slot, result); - if (retval == result && UNEXPECTED(Z_ISREF_P(retval))) { - zend_unwrap_reference(retval); + if (UNEXPECTED(Z_ISREF_P(retval))) { + if (retval == result) { + zend_unwrap_reference(retval); + } else { + retval = Z_REFVAL_P(retval); + } + ZEND_ASSERT(!Z_REFCOUNTED_P(retval)); } return retval; } diff --git a/ext/opcache/tests/jit/gh19831_001.phpt b/ext/opcache/tests/jit/gh19831_001.phpt new file mode 100644 index 0000000000000..c83ca6daa50d5 --- /dev/null +++ b/ext/opcache/tests/jit/gh19831_001.phpt @@ -0,0 +1,33 @@ +--TEST-- +GH-19831 001: fetch obj slow R REG + reference +--CREDITS-- +dktapps +--ENV-- +RT_COND=1 +--INI-- +opcache.jit=1203 +--FILE-- +layers; + } +} + +$t = new Test(); +$a = &$t->layers; +var_dump($t->getLayers()); + +?> +--EXPECT-- +int(1) diff --git a/ext/opcache/tests/jit/gh19831_002.phpt b/ext/opcache/tests/jit/gh19831_002.phpt new file mode 100644 index 0000000000000..25b596a3decb3 --- /dev/null +++ b/ext/opcache/tests/jit/gh19831_002.phpt @@ -0,0 +1,39 @@ +--TEST-- +GH-19831 002: fetch obj slow R REG + __get + reference +--CREDITS-- +dktapps +--ENV-- +RT_COND=1 +--INI-- +opcache.jit=1203 +--FILE-- +layers; + } +} + +$t = new Test(); +unset($t->layers); +var_dump($t->getLayers()); + +?> +--EXPECT-- +int(1)