@@ -747,6 +747,9 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
747747 return IR_CMP_FP;
748748 }
749749 break;
750+ case IR_ORDERED:
751+ case IR_UNORDERED:
752+ return IR_CMP_FP;
750753 case IR_ADD:
751754 case IR_SUB:
752755 if (IR_IS_TYPE_INT(insn->type)) {
@@ -1043,7 +1046,7 @@ binop_fp:
10431046 case IR_IF:
10441047 if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
10451048 op2_insn = &ctx->ir_base[insn->op2];
1046- if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT ) {
1049+ if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED ) {
10471050 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
10481051 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
10491052 return IR_CMP_AND_BRANCH_INT;
@@ -1066,7 +1069,7 @@ binop_fp:
10661069 case IR_GUARD_NOT:
10671070 if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
10681071 op2_insn = &ctx->ir_base[insn->op2];
1069- if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT
1072+ if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UNORDERED
10701073 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
10711074 && (insn->op2 == ref - 1 ||
10721075 (insn->op2 == ctx->prev_ref[ref] - 1
@@ -1110,6 +1113,9 @@ binop_fp:
11101113 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
11111114 }
11121115 }
1116+ } else {
1117+ /* va_list may escape */
1118+ ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
11131119 }
11141120 return IR_VA_START;
11151121 case IR_VA_END:
@@ -2991,6 +2997,12 @@ static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
29912997 case IR_UGT:
29922998 | cset Rw(def_reg), hi
29932999 break;
3000+ case IR_ORDERED:
3001+ | cset Rw(def_reg), vc
3002+ break;
3003+ case IR_UNORDERED:
3004+ | cset Rw(def_reg), vs
3005+ break;
29943006 }
29953007 if (IR_REG_SPILLED(ctx->regs[def][0])) {
29963008 ir_emit_store(ctx, insn->type, def, def_reg);
@@ -3065,7 +3077,7 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint
30653077 ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
30663078 if (true_block == next_block) {
30673079 /* swap to avoid unconditional JMP */
3068- if (int_cmp || op == IR_EQ || op == IR_NE) {
3080+ if (int_cmp || op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED ) {
30693081 op ^= 1; // reverse
30703082 } else {
30713083 op ^= 5; // reverse
@@ -3145,6 +3157,11 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint
31453157 case IR_UGT:
31463158 | bhi =>true_block
31473159 break;
3160+ case IR_ORDERED:
3161+ | bvc =>true_block
3162+ break;
3163+ case IR_UNORDERED:
3164+ | bvs =>true_block
31483165// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
31493166// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
31503167// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
@@ -4462,11 +4479,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
44624479 ir_reg tmp_reg = ctx->regs[def][3];
44634480 int32_t offset;
44644481
4465- if (ctx->use_lists[def].count == 1) {
4466- /* dead load */
4467- return;
4468- }
4469- IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4482+ IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
44704483 if (op2_reg != IR_REG_NONE) {
44714484 if (IR_REG_SPILLED(op2_reg)) {
44724485 op2_reg = IR_REG_NUM(op2_reg);
@@ -4479,10 +4492,12 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
44794492 offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]);
44804493 }
44814494 | ldr Rx(tmp_reg), [Rx(op2_reg), #offset]
4482- ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
4495+ if (def_reg != IR_REG_NONE) {
4496+ ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
4497+ }
44834498 | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
44844499 | str Rx(tmp_reg), [Rx(op2_reg), #offset]
4485- if (IR_REG_SPILLED(ctx->regs[def][0])) {
4500+ if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
44864501 ir_emit_store(ctx, type, def, def_reg);
44874502 }
44884503#else
@@ -4494,11 +4509,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
44944509 ir_reg tmp_reg = ctx->regs[def][3];
44954510 int32_t offset;
44964511
4497- if (ctx->use_lists[def].count == 1) {
4498- /* dead load */
4499- return;
4500- }
4501- IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4512+ IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE);
45024513 if (op2_reg != IR_REG_NONE) {
45034514 if (IR_REG_SPILLED(op2_reg)) {
45044515 op2_reg = IR_REG_NUM(op2_reg);
@@ -4517,13 +4528,17 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
45174528 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))]
45184529 | sxtw Rx(tmp_reg), Rw(tmp_reg)
45194530 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4520- | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
4531+ if (def_reg != IR_REG_NONE) {
4532+ | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
4533+ }
45214534 | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
45224535 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
45234536 | b >2
45244537 |1:
45254538 | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4526- | ldr Rx(def_reg), [Rx(tmp_reg)]
4539+ if (def_reg != IR_REG_NONE) {
4540+ | ldr Rx(def_reg), [Rx(tmp_reg)]
4541+ }
45274542 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
45284543 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
45294544 |2:
@@ -4534,18 +4549,22 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
45344549 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
45354550 | sxtw Rx(tmp_reg), Rw(tmp_reg)
45364551 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4537- | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
4552+ if (def_reg != IR_REG_NONE) {
4553+ | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
4554+ }
45384555 | add Rw(tmp_reg), Rw(tmp_reg), #16
45394556 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
45404557 | b >2
45414558 |1:
45424559 | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4543- | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
4560+ if (def_reg != IR_REG_NONE) {
4561+ | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
4562+ }
45444563 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
45454564 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
45464565 |2:
45474566 }
4548- if (IR_REG_SPILLED(ctx->regs[def][0])) {
4567+ if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
45494568 ir_emit_store(ctx, type, def, def_reg);
45504569 }
45514570#endif
@@ -5378,6 +5397,11 @@ static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
53785397 case IR_GT:
53795398 | bgt &addr
53805399 break;
5400+ case IR_ORDERED:
5401+ | bvc &addr
5402+ break;
5403+ case IR_UNORDERED:
5404+ | bvs &addr
53815405// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
53825406// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
53835407// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
0 commit comments