Skip to content

Commit 0e642ce

Browse files
committed
Remove warnings from radix64_ditN_cy_dif1.c.
1 parent 780790a commit 0e642ce

File tree

1 file changed

+74
-66
lines changed

1 file changed

+74
-66
lines changed

src/radix64_ditN_cy_dif1.c

Lines changed: 74 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ int radix64_ditN_cy_dif1(double a[], int n, int nwt, int nwt_bits, double wt0[],
172172
! storage scheme, and radix16_ditN_cy_dif1 for details on the reduced-length weights array scheme.
173173
*/
174174
const char func[] = "radix64_ditN_cy_dif1";
175+
#if !USE_SCALAR_DFT_MACRO && !defined(USE_SSE2)
175176
static int thr_id = 0; // Master thread gets this special id
177+
#endif
176178
#if USE_SCALAR_DFT_MACRO
177179
static int dft_offsets[RADIX], c_offsets[RADIX];
178180
#endif
@@ -267,11 +269,13 @@ int radix64_ditN_cy_dif1(double a[], int n, int nwt, int nwt_bits, double wt0[],
267269
*r20,*r22,*r24,*r26,*r28,*r2A,*r2C,*r2E,*r30,*r32,*r34,*r36,*r38,*r3A,*r3C,*r3E,
268270
*r40,*r42,*r44,*r46,*r48,*r4A,*r4C,*r4E,*r50,*r52,*r54,*r56,*r58,*r5A,*r5C,*r5E,
269271
*r60,*r62,*r64,*r66,*r68,*r6A,*r6C,*r6E,*r70,*r72,*r74,*r76,*r78,*r7A,*r7C,*r7E,
272+
#ifndef MULTITHREAD
270273
// ...and s's as pointers-to-complex-SIMD; thus the r-indices run 2x faster than the s-ones:
271-
*s1p00,*s1p01,*s1p02,*s1p03,*s1p04,*s1p05,*s1p06,*s1p07,*s1p08,*s1p09,*s1p0a,*s1p0b,*s1p0c,*s1p0d,*s1p0e,*s1p0f,
272-
*s1p10,*s1p11,*s1p12,*s1p13,*s1p14,*s1p15,*s1p16,*s1p17,*s1p18,*s1p19,*s1p1a,*s1p1b,*s1p1c,*s1p1d,*s1p1e,*s1p1f,
273-
*s1p20,*s1p21,*s1p22,*s1p23,*s1p24,*s1p25,*s1p26,*s1p27,*s1p28,*s1p29,*s1p2a,*s1p2b,*s1p2c,*s1p2d,*s1p2e,*s1p2f,
274-
*s1p30,*s1p31,*s1p32,*s1p33,*s1p34,*s1p35,*s1p36,*s1p37,*s1p38,*s1p39,*s1p3a,*s1p3b,*s1p3c,*s1p3d,*s1p3e,*s1p3f,
274+
*s1p00,*s1p01,*s1p02,*s1p03,*s1p04,*s1p05,*s1p06,*s1p07,*s1p08,/* *s1p09,*s1p0a,*s1p0b,*s1p0c,*s1p0d,*s1p0e,*s1p0f, */
275+
*s1p10,/* *s1p11,*s1p12,*s1p13,*s1p14,*s1p15,*s1p16,*s1p17, */*s1p18,/* *s1p19,*s1p1a,*s1p1b,*s1p1c,*s1p1d,*s1p1e,*s1p1f, */
276+
*s1p20,/* *s1p21,*s1p22,*s1p23,*s1p24,*s1p25,*s1p26,*s1p27, */*s1p28,/* *s1p29,*s1p2a,*s1p2b,*s1p2c,*s1p2d,*s1p2e,*s1p2f, */
277+
*s1p30,/* *s1p31,*s1p32,*s1p33,*s1p34,*s1p35,*s1p36,*s1p37, */*s1p38,/* *s1p39,*s1p3a,*s1p3b,*s1p3c,*s1p3d,*s1p3e,*s1p3f, */
278+
#endif
275279
*cy_r,*cy_i; // Need RADIX slots for sse2 carries, RADIX/2 for avx
276280
#ifdef USE_AVX
277281
static vec_dbl *base_negacyclic_root;
@@ -287,7 +291,10 @@ int radix64_ditN_cy_dif1(double a[], int n, int nwt, int nwt_bits, double wt0[],
287291

288292
static struct cy_thread_data_t *tdat = 0x0;
289293
// Threadpool-based dispatch stuff:
290-
static int main_work_units = 0, pool_work_units = 0;
294+
#if 0//def OS_TYPE_MACOSX
295+
static int main_work_units = 0;
296+
#endif
297+
static int pool_work_units = 0;
291298
static struct threadpool *tpool = 0x0;
292299
static int task_is_blocking = TRUE;
293300
static thread_control_t thread_control = {0,0,0};
@@ -556,38 +563,40 @@ int radix64_ditN_cy_dif1(double a[], int n, int nwt, int nwt_bits, double wt0[],
556563
r3C = tmp + 0x3c; r7C = tmp + 0x7c;
557564
r3E = tmp + 0x3e; r7E = tmp + 0x7e;
558565
tmp += 0x80;
566+
#ifndef MULTITHREAD
559567
s1p00 = tmp + 0x00; s1p20 = tmp + 0x40;
560-
s1p01 = tmp + 0x02; s1p21 = tmp + 0x42;
561-
s1p02 = tmp + 0x04; s1p22 = tmp + 0x44;
562-
s1p03 = tmp + 0x06; s1p23 = tmp + 0x46;
563-
s1p04 = tmp + 0x08; s1p24 = tmp + 0x48;
564-
s1p05 = tmp + 0x0a; s1p25 = tmp + 0x4a;
565-
s1p06 = tmp + 0x0c; s1p26 = tmp + 0x4c;
566-
s1p07 = tmp + 0x0e; s1p27 = tmp + 0x4e;
568+
s1p01 = tmp + 0x02; //s1p21 = tmp + 0x42;
569+
s1p02 = tmp + 0x04; //s1p22 = tmp + 0x44;
570+
s1p03 = tmp + 0x06; //s1p23 = tmp + 0x46;
571+
s1p04 = tmp + 0x08; //s1p24 = tmp + 0x48;
572+
s1p05 = tmp + 0x0a; //s1p25 = tmp + 0x4a;
573+
s1p06 = tmp + 0x0c; //s1p26 = tmp + 0x4c;
574+
s1p07 = tmp + 0x0e; //s1p27 = tmp + 0x4e;
567575
s1p08 = tmp + 0x10; s1p28 = tmp + 0x50;
568-
s1p09 = tmp + 0x12; s1p29 = tmp + 0x52;
569-
s1p0a = tmp + 0x14; s1p2a = tmp + 0x54;
570-
s1p0b = tmp + 0x16; s1p2b = tmp + 0x56;
571-
s1p0c = tmp + 0x18; s1p2c = tmp + 0x58;
572-
s1p0d = tmp + 0x1a; s1p2d = tmp + 0x5a;
573-
s1p0e = tmp + 0x1c; s1p2e = tmp + 0x5c;
574-
s1p0f = tmp + 0x1e; s1p2f = tmp + 0x5e;
576+
//s1p09 = tmp + 0x12; s1p29 = tmp + 0x52;
577+
//s1p0a = tmp + 0x14; s1p2a = tmp + 0x54;
578+
//s1p0b = tmp + 0x16; s1p2b = tmp + 0x56;
579+
//s1p0c = tmp + 0x18; s1p2c = tmp + 0x58;
580+
//s1p0d = tmp + 0x1a; s1p2d = tmp + 0x5a;
581+
//s1p0e = tmp + 0x1c; s1p2e = tmp + 0x5c;
582+
//s1p0f = tmp + 0x1e; s1p2f = tmp + 0x5e;
575583
s1p10 = tmp + 0x20; s1p30 = tmp + 0x60;
576-
s1p11 = tmp + 0x22; s1p31 = tmp + 0x62;
577-
s1p12 = tmp + 0x24; s1p32 = tmp + 0x64;
578-
s1p13 = tmp + 0x26; s1p33 = tmp + 0x66;
579-
s1p14 = tmp + 0x28; s1p34 = tmp + 0x68;
580-
s1p15 = tmp + 0x2a; s1p35 = tmp + 0x6a;
581-
s1p16 = tmp + 0x2c; s1p36 = tmp + 0x6c;
582-
s1p17 = tmp + 0x2e; s1p37 = tmp + 0x6e;
584+
//s1p11 = tmp + 0x22; s1p31 = tmp + 0x62;
585+
//s1p12 = tmp + 0x24; s1p32 = tmp + 0x64;
586+
//s1p13 = tmp + 0x26; s1p33 = tmp + 0x66;
587+
//s1p14 = tmp + 0x28; s1p34 = tmp + 0x68;
588+
//s1p15 = tmp + 0x2a; s1p35 = tmp + 0x6a;
589+
//s1p16 = tmp + 0x2c; s1p36 = tmp + 0x6c;
590+
//s1p17 = tmp + 0x2e; s1p37 = tmp + 0x6e;
583591
s1p18 = tmp + 0x30; s1p38 = tmp + 0x70;
584-
s1p19 = tmp + 0x32; s1p39 = tmp + 0x72;
585-
s1p1a = tmp + 0x34; s1p3a = tmp + 0x74;
586-
s1p1b = tmp + 0x36; s1p3b = tmp + 0x76;
587-
s1p1c = tmp + 0x38; s1p3c = tmp + 0x78;
588-
s1p1d = tmp + 0x3a; s1p3d = tmp + 0x7a;
589-
s1p1e = tmp + 0x3c; s1p3e = tmp + 0x7c;
590-
s1p1f = tmp + 0x3e; s1p3f = tmp + 0x7e;
592+
//s1p19 = tmp + 0x32; s1p39 = tmp + 0x72;
593+
//s1p1a = tmp + 0x34; s1p3a = tmp + 0x74;
594+
//s1p1b = tmp + 0x36; s1p3b = tmp + 0x76;
595+
//s1p1c = tmp + 0x38; s1p3c = tmp + 0x78;
596+
//s1p1d = tmp + 0x3a; s1p3d = tmp + 0x7a;
597+
//s1p1e = tmp + 0x3c; s1p3e = tmp + 0x7c;
598+
//s1p1f = tmp + 0x3e; s1p3f = tmp + 0x7e;
599+
#endif
591600
tmp += 0x80;
592601
#if !USE_SCALAR_DFT_MACRO
593602
two = tmp + 0; // AVX+ versions of various DFT macros need consts [2,1,sqrt2,isrt2] quartet laid out thusly
@@ -2463,10 +2472,10 @@ void radix64_dit_pass1(double a[], int n)
24632472
*r40,*r42,*r44,*r46,*r48,*r4A,*r4C,*r4E,*r50,*r52,*r54,*r56,*r58,*r5A,*r5C,*r5E,
24642473
*r60,*r62,*r64,*r66,*r68,*r6A,*r6C,*r6E,*r70,*r72,*r74,*r76,*r78,*r7A,*r7C,*r7E,
24652474
// ...and s's as pointers-to-complex-SIMD; thus the r-indices run 2x faster than the s-ones:
2466-
*s1p00,*s1p01,*s1p02,*s1p03,*s1p04,*s1p05,*s1p06,*s1p07,*s1p08,*s1p09,*s1p0a,*s1p0b,*s1p0c,*s1p0d,*s1p0e,*s1p0f,
2467-
*s1p10,*s1p11,*s1p12,*s1p13,*s1p14,*s1p15,*s1p16,*s1p17,*s1p18,*s1p19,*s1p1a,*s1p1b,*s1p1c,*s1p1d,*s1p1e,*s1p1f,
2468-
*s1p20,*s1p21,*s1p22,*s1p23,*s1p24,*s1p25,*s1p26,*s1p27,*s1p28,*s1p29,*s1p2a,*s1p2b,*s1p2c,*s1p2d,*s1p2e,*s1p2f,
2469-
*s1p30,*s1p31,*s1p32,*s1p33,*s1p34,*s1p35,*s1p36,*s1p37,*s1p38,*s1p39,*s1p3a,*s1p3b,*s1p3c,*s1p3d,*s1p3e,*s1p3f,
2475+
*s1p00,*s1p01,*s1p02,*s1p03,*s1p04,*s1p05,*s1p06,*s1p07,*s1p08,/* *s1p09,*s1p0a,*s1p0b,*s1p0c,*s1p0d,*s1p0e,*s1p0f, */
2476+
*s1p10,/* *s1p11,*s1p12,*s1p13,*s1p14,*s1p15,*s1p16,*s1p17,*/ *s1p18,/* *s1p19,*s1p1a,*s1p1b,*s1p1c,*s1p1d,*s1p1e,*s1p1f, */
2477+
*s1p20,/* *s1p21,*s1p22,*s1p23,*s1p24,*s1p25,*s1p26,*s1p27,*/ *s1p28,/* *s1p29,*s1p2a,*s1p2b,*s1p2c,*s1p2d,*s1p2e,*s1p2f, */
2478+
*s1p30,/* *s1p31,*s1p32,*s1p33,*s1p34,*s1p35,*s1p36,*s1p37,*/ *s1p38,/* *s1p39,*s1p3a,*s1p3b,*s1p3c,*s1p3d,*s1p3e,*s1p3f, */
24702479
*cy_r,*cy_i; // Need RADIX slots for sse2 carries, RADIX/2 for avx
24712480
#ifdef USE_AVX
24722481
vec_dbl *base_negacyclic_root;
@@ -2496,7 +2505,6 @@ void radix64_dit_pass1(double a[], int n)
24962505
#endif
24972506

24982507
// int data:
2499-
int thr_id = thread_arg->tid;
25002508
int iter = thread_arg->iter;
25012509
int NDIVR = thread_arg->ndivr;
25022510
int n = NDIVR*RADIX, nm1 = n-1;
@@ -2648,37 +2656,37 @@ void radix64_dit_pass1(double a[], int n)
26482656
r3E = tmp + 0x3e; r7E = tmp + 0x7e;
26492657
tmp += 0x80;
26502658
s1p00 = tmp + 0x00; s1p20 = tmp + 0x40;
2651-
s1p01 = tmp + 0x02; s1p21 = tmp + 0x42;
2652-
s1p02 = tmp + 0x04; s1p22 = tmp + 0x44;
2653-
s1p03 = tmp + 0x06; s1p23 = tmp + 0x46;
2654-
s1p04 = tmp + 0x08; s1p24 = tmp + 0x48;
2655-
s1p05 = tmp + 0x0a; s1p25 = tmp + 0x4a;
2656-
s1p06 = tmp + 0x0c; s1p26 = tmp + 0x4c;
2657-
s1p07 = tmp + 0x0e; s1p27 = tmp + 0x4e;
2659+
s1p01 = tmp + 0x02; //s1p21 = tmp + 0x42;
2660+
s1p02 = tmp + 0x04; //s1p22 = tmp + 0x44;
2661+
s1p03 = tmp + 0x06; //s1p23 = tmp + 0x46;
2662+
s1p04 = tmp + 0x08; //s1p24 = tmp + 0x48;
2663+
s1p05 = tmp + 0x0a; //s1p25 = tmp + 0x4a;
2664+
s1p06 = tmp + 0x0c; //s1p26 = tmp + 0x4c;
2665+
s1p07 = tmp + 0x0e; //s1p27 = tmp + 0x4e;
26582666
s1p08 = tmp + 0x10; s1p28 = tmp + 0x50;
2659-
s1p09 = tmp + 0x12; s1p29 = tmp + 0x52;
2660-
s1p0a = tmp + 0x14; s1p2a = tmp + 0x54;
2661-
s1p0b = tmp + 0x16; s1p2b = tmp + 0x56;
2662-
s1p0c = tmp + 0x18; s1p2c = tmp + 0x58;
2663-
s1p0d = tmp + 0x1a; s1p2d = tmp + 0x5a;
2664-
s1p0e = tmp + 0x1c; s1p2e = tmp + 0x5c;
2665-
s1p0f = tmp + 0x1e; s1p2f = tmp + 0x5e;
2667+
//s1p09 = tmp + 0x12; s1p29 = tmp + 0x52;
2668+
//s1p0a = tmp + 0x14; s1p2a = tmp + 0x54;
2669+
//s1p0b = tmp + 0x16; s1p2b = tmp + 0x56;
2670+
//s1p0c = tmp + 0x18; s1p2c = tmp + 0x58;
2671+
//s1p0d = tmp + 0x1a; s1p2d = tmp + 0x5a;
2672+
//s1p0e = tmp + 0x1c; s1p2e = tmp + 0x5c;
2673+
//s1p0f = tmp + 0x1e; s1p2f = tmp + 0x5e;
26662674
s1p10 = tmp + 0x20; s1p30 = tmp + 0x60;
2667-
s1p11 = tmp + 0x22; s1p31 = tmp + 0x62;
2668-
s1p12 = tmp + 0x24; s1p32 = tmp + 0x64;
2669-
s1p13 = tmp + 0x26; s1p33 = tmp + 0x66;
2670-
s1p14 = tmp + 0x28; s1p34 = tmp + 0x68;
2671-
s1p15 = tmp + 0x2a; s1p35 = tmp + 0x6a;
2672-
s1p16 = tmp + 0x2c; s1p36 = tmp + 0x6c;
2673-
s1p17 = tmp + 0x2e; s1p37 = tmp + 0x6e;
2675+
//s1p11 = tmp + 0x22; s1p31 = tmp + 0x62;
2676+
//s1p12 = tmp + 0x24; s1p32 = tmp + 0x64;
2677+
//s1p13 = tmp + 0x26; s1p33 = tmp + 0x66;
2678+
//s1p14 = tmp + 0x28; s1p34 = tmp + 0x68;
2679+
//s1p15 = tmp + 0x2a; s1p35 = tmp + 0x6a;
2680+
//s1p16 = tmp + 0x2c; s1p36 = tmp + 0x6c;
2681+
//s1p17 = tmp + 0x2e; s1p37 = tmp + 0x6e;
26742682
s1p18 = tmp + 0x30; s1p38 = tmp + 0x70;
2675-
s1p19 = tmp + 0x32; s1p39 = tmp + 0x72;
2676-
s1p1a = tmp + 0x34; s1p3a = tmp + 0x74;
2677-
s1p1b = tmp + 0x36; s1p3b = tmp + 0x76;
2678-
s1p1c = tmp + 0x38; s1p3c = tmp + 0x78;
2679-
s1p1d = tmp + 0x3a; s1p3d = tmp + 0x7a;
2680-
s1p1e = tmp + 0x3c; s1p3e = tmp + 0x7c;
2681-
s1p1f = tmp + 0x3e; s1p3f = tmp + 0x7e;
2683+
//s1p19 = tmp + 0x32; s1p39 = tmp + 0x72;
2684+
//s1p1a = tmp + 0x34; s1p3a = tmp + 0x74;
2685+
//s1p1b = tmp + 0x36; s1p3b = tmp + 0x76;
2686+
//s1p1c = tmp + 0x38; s1p3c = tmp + 0x78;
2687+
//s1p1d = tmp + 0x3a; s1p3d = tmp + 0x7a;
2688+
//s1p1e = tmp + 0x3c; s1p3e = tmp + 0x7c;
2689+
//s1p1f = tmp + 0x3e; s1p3f = tmp + 0x7e;
26822690
tmp += 0x80;
26832691
#if !USE_SCALAR_DFT_MACRO
26842692
// To support FMA versions of the radix-8 macros used to build radix-64 we insert a standalone copy of the [2,1,sqrt2,isrt2] quartet:

0 commit comments

Comments
 (0)