Skip to content

Commit 4e8fdb3

Browse files
author
Zoltan Herczeg
committed
JIT compiler update
1 parent c3529d0 commit 4e8fdb3

16 files changed

+1293
-455
lines changed

src/pcre2_jit_simd_inc.h

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -483,11 +483,7 @@ sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
483483
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4);
484484
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5);
485485
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6);
486-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
487-
sljit_s32 tmp2_ind = 0;
488-
#else /* !SLJIT_CONFIG_X86_32 */
489-
sljit_s32 tmp2_ind = 4;
490-
#endif /* SLJIT_CONFIG_X86_32 */
486+
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0);
491487
struct sljit_label *start;
492488
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
493489
struct sljit_label *restart;
@@ -660,19 +656,7 @@ for (i = 0; i < 4; i++)
660656
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
661657
}
662658

663-
/* PAND xmm1, xmm2/m128 */
664-
if (reg_type == SLJIT_SIMD_REG_256)
665-
{
666-
instruction[0] = 0xc5;
667-
instruction[1] = (sljit_u8)(0xfd ^ (data1_ind << 3));
668-
}
669-
670-
/* instruction[0] = 0x66 / 0xc5; */
671-
/* instruction[1] = 0x0f; */
672-
instruction[2] = 0xdb;
673-
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
674-
sljit_emit_op_custom(compiler, instruction, 4);
675-
659+
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
676660
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
677661

678662
/* Ignore matches before the first STR_PTR. */
@@ -700,16 +684,7 @@ for (i = 0; i < 4; i++)
700684
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
701685
}
702686

703-
/* PAND xmm1, xmm2/m128 */
704-
if (reg_type == SLJIT_SIMD_REG_256)
705-
instruction[1] = (sljit_u8)(0xfd ^ (data1_ind << 3));
706-
707-
/* instruction[0] = 0x66 / 0xc5; */
708-
/* instruction[1] = 0x0f; */
709-
instruction[2] = 0xdb;
710-
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
711-
sljit_emit_op_custom(compiler, instruction, 4);
712-
687+
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
713688
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
714689

715690
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);

src/sljit/sljitConfigInternal.h

Lines changed: 109 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ extern "C" {
7272
SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
7373
SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
7474
SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
75+
SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers
76+
SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers
7577
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
7678
SLJIT_F32_SHIFT : the shift required to apply when accessing
7779
a single precision floating point array by index
@@ -81,8 +83,21 @@ extern "C" {
8183
the scratch register index of ecx is stored in this variable
8284
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
8385
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
86+
SLJIT_CONV_MAX_FLOAT : result when a floating point value is converted to integer
87+
and the floating point value is higher than the maximum integer value
88+
(possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
89+
SLJIT_CONV_MIN_FLOAT : result when a floating point value is converted to integer
90+
and the floating point value is lower than the minimum integer value
91+
(possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
92+
SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer
93+
(possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT,
94+
or SLJIT_CONV_RESULT_ZERO)
8495
8596
Other macros:
97+
SLJIT_TMP_R0 .. R9 : accessing temporary registers
98+
SLJIT_TMP_R(i) : accessing temporary registers
99+
SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers
100+
SLJIT_TMP_FR(i) : accessing temporary floating point registers
86101
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
87102
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
88103
SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
@@ -356,6 +371,38 @@ typedef double sljit_f64;
356371
#define SLJIT_F32_SHIFT 2
357372
#define SLJIT_F64_SHIFT 3
358373

374+
#define SLJIT_CONV_RESULT_MAX_INT 0
375+
#define SLJIT_CONV_RESULT_MIN_INT 1
376+
#define SLJIT_CONV_RESULT_ZERO 2
377+
378+
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
379+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MIN_INT
380+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
381+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
382+
#elif (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
383+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
384+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
385+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO
386+
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
387+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
388+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MAX_INT
389+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
390+
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
391+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
392+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
393+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
394+
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
395+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
396+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
397+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
398+
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
399+
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
400+
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
401+
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
402+
#else
403+
#error "Result for float to integer conversion is not defined"
404+
#endif
405+
359406
#ifndef SLJIT_W
360407

361408
/* Defining long constants. */
@@ -528,8 +575,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
528575

529576
#define SLJIT_NUMBER_OF_REGISTERS 12
530577
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
578+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 1
531579
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
532580
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
581+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
533582
#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
534583
#define SLJIT_PREF_SHIFT_REG SLJIT_R2
535584
#define SLJIT_MASKED_SHIFT 1
@@ -538,7 +587,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
538587
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
539588

540589
#define SLJIT_NUMBER_OF_REGISTERS 13
590+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
541591
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
592+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
542593
#ifndef _WIN64
543594
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
544595
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
@@ -556,16 +607,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
556607

557608
#define SLJIT_NUMBER_OF_REGISTERS 12
558609
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
610+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
559611
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
560612
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
613+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
561614
#define SLJIT_LOCALS_OFFSET_BASE 0
562615

563616
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
564617

565618
#define SLJIT_NUMBER_OF_REGISTERS 26
566619
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
620+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
567621
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
568622
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
623+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
569624
#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
570625
#define SLJIT_MASKED_SHIFT 1
571626
#define SLJIT_MASKED_SHIFT32 1
@@ -574,8 +629,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
574629

575630
#define SLJIT_NUMBER_OF_REGISTERS 23
576631
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
632+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
577633
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
578634
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
635+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
579636
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
580637
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
581638
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
@@ -598,16 +655,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
598655
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29
599656
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
600657
#endif
658+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
659+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3
601660
#define SLJIT_MASKED_SHIFT 1
602661
#define SLJIT_MASKED_SHIFT32 1
603662

604663
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
605664

606665
#define SLJIT_NUMBER_OF_REGISTERS 23
607666
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
608-
#define SLJIT_LOCALS_OFFSET_BASE 0
667+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
609668
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
610669
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
670+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
671+
#define SLJIT_LOCALS_OFFSET_BASE 0
611672
#define SLJIT_MASKED_SHIFT 1
612673
#define SLJIT_MASKED_SHIFT32 1
613674

@@ -636,18 +697,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
636697

637698
#define SLJIT_NUMBER_OF_REGISTERS 12
638699
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
700+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
639701
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
640702
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
703+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
641704
#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
642705
#define SLJIT_MASKED_SHIFT 1
643706

644707
#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
645708

646709
#define SLJIT_NUMBER_OF_REGISTERS 23
647710
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
648-
#define SLJIT_LOCALS_OFFSET_BASE 0
711+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
649712
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
650713
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
714+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
715+
#define SLJIT_LOCALS_OFFSET_BASE 0
651716
#define SLJIT_MASKED_SHIFT 1
652717
#define SLJIT_MASKED_SHIFT32 1
653718

@@ -656,8 +721,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
656721
/* Just to have something. */
657722
#define SLJIT_NUMBER_OF_REGISTERS 0
658723
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
724+
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 0
659725
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
660726
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
727+
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0
661728
#define SLJIT_LOCALS_OFFSET_BASE 0
662729

663730
#endif
@@ -670,6 +737,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
670737
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
671738
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
672739

740+
/**********************************/
741+
/* Temporary register management. */
742+
/**********************************/
743+
744+
#define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2)
745+
#define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
746+
747+
/* WARNING: Accessing temporary registers is not recommended, because they
748+
are also used by the JIT compiler for various computations. Using them
749+
might have any side effects including incorrect operations and crashes,
750+
so use them at your own risk. The machine registers themselves might have
751+
limitations, e.g. the r0 register on s390x / ppc cannot be used as
752+
base address for memory operations. */
753+
754+
/* Temporary registers */
755+
#define SLJIT_TMP_R0 (SLJIT_TMP_REGISTER_BASE + 0)
756+
#define SLJIT_TMP_R1 (SLJIT_TMP_REGISTER_BASE + 1)
757+
#define SLJIT_TMP_R2 (SLJIT_TMP_REGISTER_BASE + 2)
758+
#define SLJIT_TMP_R3 (SLJIT_TMP_REGISTER_BASE + 3)
759+
#define SLJIT_TMP_R4 (SLJIT_TMP_REGISTER_BASE + 4)
760+
#define SLJIT_TMP_R5 (SLJIT_TMP_REGISTER_BASE + 5)
761+
#define SLJIT_TMP_R6 (SLJIT_TMP_REGISTER_BASE + 6)
762+
#define SLJIT_TMP_R7 (SLJIT_TMP_REGISTER_BASE + 7)
763+
#define SLJIT_TMP_R8 (SLJIT_TMP_REGISTER_BASE + 8)
764+
#define SLJIT_TMP_R9 (SLJIT_TMP_REGISTER_BASE + 9)
765+
#define SLJIT_TMP_R(i) (SLJIT_TMP_REGISTER_BASE + (i))
766+
767+
#define SLJIT_TMP_FR0 (SLJIT_TMP_FREGISTER_BASE + 0)
768+
#define SLJIT_TMP_FR1 (SLJIT_TMP_FREGISTER_BASE + 1)
769+
#define SLJIT_TMP_FR2 (SLJIT_TMP_FREGISTER_BASE + 2)
770+
#define SLJIT_TMP_FR3 (SLJIT_TMP_FREGISTER_BASE + 3)
771+
#define SLJIT_TMP_FR4 (SLJIT_TMP_FREGISTER_BASE + 4)
772+
#define SLJIT_TMP_FR5 (SLJIT_TMP_FREGISTER_BASE + 5)
773+
#define SLJIT_TMP_FR6 (SLJIT_TMP_FREGISTER_BASE + 6)
774+
#define SLJIT_TMP_FR7 (SLJIT_TMP_FREGISTER_BASE + 7)
775+
#define SLJIT_TMP_FR8 (SLJIT_TMP_FREGISTER_BASE + 8)
776+
#define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9)
777+
#define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i))
778+
673779
/********************************/
674780
/* CPU status flags management. */
675781
/********************************/
@@ -690,7 +796,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
690796
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
691797
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
692798
#define SLJIT_F64_SECOND(reg) \
693-
((reg) + SLJIT_FS0)
799+
((reg) + SLJIT_FS0 + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)
694800
#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
695801
#define SLJIT_F64_SECOND(reg) \
696802
(reg)

0 commit comments

Comments
 (0)