@@ -3764,7 +3764,7 @@ Use x0-7 for I-addresses, x8-15 for O-addresses - by the time we need x15 for ou
37643764 "xorq %%r8,%%r8 \n\t leaq %c[i1](%%r8),%%r8 \n\t"/* movq|movslq of literal %c[i1] both segfaulted, workaround via LEA */\
37653765 /* The twid_ptrs[] array holds ptrs to 14 complex twiddles in-order: (c,s)[1,2,3,4,5,6,7]: */\
37663766 "movq %[twid_ptrs],%%r14 \n\t"\
3767- /* Block 0/1 has just one twiddle-CMUL: /* Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
3767+ /* Block 0/1 has just one twiddle-CMUL: Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
37683768 "movq %[in0],%%rax \n\t"\
37693769 "leaq (%%rax,%%r8 ),%%rbx \n\t"\
37703770 "leaq (%%rax,%%r8,2),%%rcx \n\t movq 0x10(%%r14),%%r10 \n\t movq 0x20(%%r14),%%r12 \n\t"/* c2,c3 */\
@@ -9091,7 +9091,7 @@ Use x0-7 for I-addresses, x8-15 for O-addresses - by the time we need x15 for ou
90919091 "xorq %%r8,%%r8 \n\t leaq %c[i1](%%r8),%%r8 \n\t"/* movq|movslq of literal %c[i1] both segfaulted, workaround via LEA */\
90929092 /* The twid_ptrs[] array holds ptrs to 14 complex twiddles in-order: (c,s)[1,2,3,4,5,6,7]: */\
90939093 "movq %[twid_ptrs],%%r14 \n\t"\
9094- /* Block 0/1 has just one twiddle-CMUL: /* Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
9094+ /* Block 0/1 has just one twiddle-CMUL: Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
90959095 "movq %[in0],%%rax \n\t"\
90969096 "leaq (%%rax,%%r8 ),%%rbx \n\t"\
90979097 "leaq (%%rax,%%r8,2),%%rcx \n\t movq 0x10(%%r14),%%r10 \n\t movq 0x20(%%r14),%%r12 \n\t"/* c2,c3 */\
@@ -11209,7 +11209,7 @@ Use x0-7 for I-addresses, x8-15 for O-addresses - by the time we need x15 for ou
1120911209 "xorq %%r8,%%r8 \n\t leaq %c[i1](%%r8),%%r8 \n\t"/* movq|movslq of literal %c[i1] both segfaulted, workaround via LEA */\
1121011210 /* The twid_ptrs[] array holds ptrs to 14 complex twiddles in-order: (c,s)[1,2,3,4,5,6,7]: */\
1121111211 "movq %[twid_ptrs],%%r14 \n\t"\
11212- /* Block 0/1 has just one twiddle-CMUL: /* Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
11212+ /* Block 0/1 has just one twiddle-CMUL: Blocks 2/3 use separate register subset, can be done overlapped with 0/1: */\
1121311213 "movq %[in0],%%rax \n\t"\
1121411214 "leaq (%%rax,%%r8 ),%%rbx \n\t"\
1121511215 "leaq (%%rax,%%r8,2),%%rcx \n\t"\
0 commit comments