Skip to content

Commit 6e4075e

Browse files
author
synap
committed
2006-02-13 Dennis Smit <[email protected]>
* libvisual/lv_math.c: Added a bit more stuff. * libvisual/lv_audio.c: Use macro names instead of const string entries. 2006-02-09 Dennis Smit <[email protected]> * libvisual/lv_fourier.c, libvisual/lv_rectangle.c, libvisual/lv_math.c, libvisual/lv_video.c: Fixed doxygen errors. * libvisual/lv_rectangle.c: Include lv_math.h, errornous normalisation was because of down cast to int because the prototypes were missing.
1 parent fbf0f17 commit 6e4075e

File tree

11 files changed

+237
-51
lines changed

11 files changed

+237
-51
lines changed

libvisual/ChangeLog

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
2006-02-13 Dennis Smit <[email protected]>
2+
3+
* libvisual/lv_math.c: Added a bit more stuff.
4+
5+
* libvisual/lv_audio.c: Use macro names instead of const string
6+
entries.
7+
8+
2006-02-09 Dennis Smit <[email protected]>
9+
10+
* libvisual/lv_fourier.c, libvisual/lv_rectangle.c,
11+
libvisual/lv_math.c, libvisual/lv_video.c: Fixed doxygen errors.
12+
13+
* libvisual/lv_rectangle.c: Include lv_math.h, errornous normalisation
14+
was because of down cast to int because the prototypes were missing.
15+
116
2006-02-05 Dennis Smit <[email protected]>
217

318
* configure.ac: Added x86_64 detection.

libvisual/libvisual/lv_audio.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*
55
* Authors: Dennis Smit <[email protected]>
66
*
7-
* $Id: lv_audio.c,v 1.41 2006-01-22 20:07:56 synap Exp $
7+
* $Id: lv_audio.c,v 1.42 2006-02-13 20:54:08 synap Exp $
88
*
99
* This program is free software; you can redistribute it and/or modify
1010
* it under the terms of the GNU Lesser General Public License as
@@ -243,7 +243,7 @@ int visual_audio_analyze (VisAudio *audio)
243243

244244
visual_audio_samplepool_flush_old (audio->samplepool);
245245

246-
channel = visual_audio_samplepool_get_channel (audio->samplepool, "front left 1");
246+
channel = visual_audio_samplepool_get_channel (audio->samplepool, VISUAL_AUDIO_CHANNEL_LEFT);
247247

248248
if (channel != 0) {
249249
visual_buffer_init (&buffer, pcm[0], 1024, NULL);
@@ -254,7 +254,7 @@ int visual_audio_analyze (VisAudio *audio)
254254
visual_object_unref (VISUAL_OBJECT (&buffer));
255255
}
256256

257-
channel = visual_audio_samplepool_get_channel (audio->samplepool, "front right 1");
257+
channel = visual_audio_samplepool_get_channel (audio->samplepool, VISUAL_AUDIO_CHANNEL_RIGHT);
258258

259259
if (channel != 0) {
260260
visual_buffer_init (&buffer, pcm[1], 1024, NULL);
@@ -1343,10 +1343,10 @@ static int input_interleaved_stereo (VisAudioSamplePool *samplepool, VisBuffer *
13431343
visual_buffer_set_destroyer (chan2, visual_buffer_destroyer_free);
13441344

13451345
sample = visual_audio_sample_new (chan1, &timestamp, format, rate);
1346-
visual_audio_samplepool_add (samplepool, sample, "front left 1");
1346+
visual_audio_samplepool_add (samplepool, sample, VISUAL_AUDIO_CHANNEL_LEFT);
13471347

13481348
sample = visual_audio_sample_new (chan2, &timestamp, format, rate);
1349-
visual_audio_samplepool_add (samplepool, sample, "front right 1");
1349+
visual_audio_samplepool_add (samplepool, sample, VISUAL_AUDIO_CHANNEL_RIGHT);
13501350

13511351
return VISUAL_OK;
13521352
}

libvisual/libvisual/lv_fourier.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Authors: Dennis Smit <[email protected]>
99
* Chong Kai Xiong <[email protected]>
1010
*
11-
* $Id: lv_fourier.c,v 1.14 2006-01-22 20:07:56 synap Exp $
11+
* $Id: lv_fourier.c,v 1.15 2006-02-13 20:54:08 synap Exp $
1212
*
1313
* This program is free software; you can redistribute it and/or modify
1414
* it under the terms of the GNU Lesser General Public License as
@@ -468,7 +468,7 @@ static void perform_fft_radix2_dit (VisDFT *dft, float *output, float *input)
468468
* \note Output samples are normalised to [0.0, 1.0] by dividing with the
469469
* spectrum size.
470470
*
471-
* @param fourier Pointer to the VisDFT context for this transform.
471+
* @param dft Pointer to the VisDFT context for this transform.
472472
* @param output Array of output samples
473473
* @param input Array of input samples with values in [-1.0, 1.0]
474474
*
@@ -499,9 +499,9 @@ int visual_dft_perform (VisDFT *dft, float *output, float *input)
499499
*
500500
* \note Scaled values are guaranteed to be in [0.0, 1.0].
501501
*
502-
* @param dft Pointer to VisDFT context
503502
* @param output Array of output samples
504503
* @param input Array of input samples with values in [0.0, 1.0]
504+
* @param size Array size.
505505
*
506506
* @Return VISUAL_OK on success, VISUAL_ERROR_NULL on failure.
507507
*/

libvisual/libvisual/lv_keysym.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*
55
* Authors: Dennis Smit <[email protected]>
66
*
7-
* $Id: lv_keysym.h,v 1.5 2006-01-22 13:23:37 synap Exp $
7+
* $Id: lv_keysym.h,v 1.6 2006-02-13 20:54:08 synap Exp $
88
*
99
* This program is free software; you can redistribute it and/or modify
1010
* it under the terms of the GNU Lesser General Public License as
@@ -79,7 +79,7 @@ typedef enum {
7979
VKEY_GREATER = 62,
8080
VKEY_QUESTION = 63,
8181
VKEY_AT = 64,
82-
82+
8383
/* Skip uppercase here because it's done via the VisKeyMod */
8484
VKEY_LEFTBRACKET = 91,
8585
VKEY_BACKSLASH = 92,

libvisual/libvisual/lv_math.c

Lines changed: 186 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*
55
* Authors: Dennis Smit <[email protected]>
66
*
7-
* $Id: lv_math.c,v 1.12 2006-02-05 18:45:57 synap Exp $
7+
* $Id: lv_math.c,v 1.13 2006-02-13 20:54:08 synap Exp $
88
*
99
* This program is free software; you can redistribute it and/or modify
1010
* it under the terms of the GNU Lesser General Public License as
@@ -299,7 +299,7 @@ int visual_math_vectorized_add_floats_const_float (float *dest, float *src, visu
299299
* @param dest Pointer to the destination float array.
300300
* @param src Pointer to the source float array.
301301
* @param n The number of items in the array.
302-
* @param adder The constant substracter that is substracter from every entry in the source array.
302+
* @param substracter The constant substracter that is substracter from every entry in the source array.
303303
*
304304
* @return VISUAL_OK on succes or -VISUAL_ERROR_NULL on failure.
305305
*/
@@ -341,10 +341,10 @@ int visual_math_vectorized_substract_floats_const_float (float *dest, float *src
341341
"\n\t movups 16(%0), %%xmm1"
342342
"\n\t movups 32(%0), %%xmm2"
343343
"\n\t movups 48(%0), %%xmm3"
344-
"\n\t addps %%xmm7, %%xmm0"
345-
"\n\t addps %%xmm7, %%xmm1"
346-
"\n\t addps %%xmm7, %%xmm2"
347-
"\n\t addps %%xmm7, %%xmm3"
344+
"\n\t subps %%xmm7, %%xmm0"
345+
"\n\t subps %%xmm7, %%xmm1"
346+
"\n\t subps %%xmm7, %%xmm2"
347+
"\n\t subps %%xmm7, %%xmm3"
348348
"\n\t movntps %%xmm0, (%1)"
349349
"\n\t movntps %%xmm1, 16(%1)"
350350
"\n\t movntps %%xmm2, 32(%1)"
@@ -416,6 +416,104 @@ int visual_math_vectorized_substract_floats_const_float (float *dest, float *src
416416
return VISUAL_OK;
417417
}
418418

419+
int visual_math_vectorized_multiplier_floats_floats (float *dest, float *src1, float *src2, visual_size_t n)
420+
{
421+
float *d = dest;
422+
float *s1 = src1;
423+
float *s2 = src2;
424+
425+
visual_log_return_val_if_fail (dest != NULL, -VISUAL_ERROR_NULL);
426+
visual_log_return_val_if_fail (src1 != NULL, -VISUAL_ERROR_NULL);
427+
visual_log_return_val_if_fail (src2 != NULL, -VISUAL_ERROR_NULL);
428+
429+
if (visual_cpu_get_sse () && n >= 16) {
430+
#if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
431+
while (!VISUAL_ALIGNED(d, 16)) {
432+
(*d) = (*s1) * (*s2);
433+
434+
d++;
435+
s1++;
436+
s2++;
437+
438+
n--;
439+
}
440+
441+
while (n > 16) {
442+
__asm __volatile
443+
("\n\t prefetchnta 256(%0)"
444+
"\n\t prefetchnta 256(%1)"
445+
"\n\t movups (%0), %%xmm0"
446+
"\n\t movups 16(%0), %%xmm1"
447+
"\n\t movups 32(%0), %%xmm2"
448+
"\n\t movups 48(%0), %%xmm3"
449+
"\n\t movups (%1), %%xmm4"
450+
"\n\t movups 16(%1), %%xmm5"
451+
"\n\t movups 32(%1), %%xmm6"
452+
"\n\t movups 48(%1), %%xmm7"
453+
"\n\t mulps %%xmm4, %%xmm0"
454+
"\n\t mulps %%xmm5, %%xmm1"
455+
"\n\t mulps %%xmm6, %%xmm2"
456+
"\n\t mulps %%xmm7, %%xmm3"
457+
"\n\t movntps %%xmm0, (%2)"
458+
"\n\t movntps %%xmm1, 16(%2)"
459+
"\n\t movntps %%xmm2, 32(%2)"
460+
"\n\t movntps %%xmm3, 48(%2)"
461+
:: "r" (s1), "r" (s2), "r" (d) : "memory");
462+
463+
d += 16;
464+
s1 += 16;
465+
s2 += 16;
466+
467+
n -= 16;
468+
}
469+
#endif /* VISUAL_ARCH_X86 */
470+
} else if (visual_cpu_get_3dnow ()) {
471+
#if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
472+
while (n > 8) {
473+
__asm __volatile
474+
("\n\t prefetch 256(%0)"
475+
"\n\t movq (%0), %%mm0"
476+
"\n\t movq 8(%0), %%mm1"
477+
"\n\t movq 16(%0), %%mm2"
478+
"\n\t movq 24(%0), %%mm3"
479+
"\n\t movq (%1), %%mm4"
480+
"\n\t movq 8(%1), %%mm5"
481+
"\n\t movq 16(%1), %%mm6"
482+
"\n\t movq 24(%1), %%mm7"
483+
"\n\t pfmul %%mm4, %%mm0"
484+
"\n\t pfmul %%mm5, %%mm1"
485+
"\n\t pfmul %%mm6, %%mm2"
486+
"\n\t pfmul %%mm7, %%mm3"
487+
"\n\t movq %%mm0, (%2)"
488+
"\n\t movq %%mm1, 8(%2)"
489+
"\n\t movq %%mm2, 16(%2)"
490+
"\n\t movq %%mm3, 24(%2)"
491+
:: "r" (s1), "r" (s2), "r" (d) : "memory");
492+
493+
d += 8;
494+
s1 += 8;
495+
s2 += 8;
496+
497+
n -= 8;
498+
}
499+
500+
__asm __volatile
501+
("\n\t emms");
502+
#endif /* VISUAL_ARCH_X86 */
503+
504+
}
505+
506+
while (n--) {
507+
(*d) = (*s1) * (*s2);
508+
509+
d++;
510+
s1++;
511+
s2++;
512+
}
513+
514+
return VISUAL_OK;
515+
}
516+
419517
/**
420518
* Converts an array of floats to integers. With the right cpu features in place this function
421519
* is very optimized.
@@ -780,7 +878,8 @@ int visual_math_vectorized_floats_to_int32s_multiply_denormalise (int32_t *ints,
780878
* Vectorized square root for single precision floats. This function works best with data
781879
* sizes larger than 16 or equal to 16.
782880
*
783-
* @param vector The vector of floats of which the square roots will be calculated.
881+
* @param dest The destination vector of floats in which the results are placed.
882+
* @param src The source vector of floats of which the square roots will be calculated.
784883
* @param n The number of floats in the vector.
785884
*
786885
* @return VISUAL_OK on succes or -VISUAL_ERROR_NULL on failure.
@@ -805,10 +904,10 @@ int visual_math_vectorized_sqrt_floats (float *dest, float *src, visual_size_t n
805904
while (n > 16) {
806905
__asm __volatile
807906
("\n\t prefetchnta 256(%0)"
808-
"\n\t movaps (%0), %%xmm0"
809-
"\n\t movaps 16(%0), %%xmm1"
810-
"\n\t movaps 32(%0), %%xmm2"
811-
"\n\t movaps 48(%0), %%xmm3"
907+
"\n\t movups (%0), %%xmm0"
908+
"\n\t movups 16(%0), %%xmm1"
909+
"\n\t movups 32(%0), %%xmm2"
910+
"\n\t movups 48(%0), %%xmm3"
812911
"\n\t sqrtps %%xmm0, %%xmm4"
813912
"\n\t sqrtps %%xmm1, %%xmm5"
814913
"\n\t sqrtps %%xmm2, %%xmm6"
@@ -837,6 +936,80 @@ int visual_math_vectorized_sqrt_floats (float *dest, float *src, visual_size_t n
837936
return VISUAL_OK;
838937
}
839938

939+
/**
940+
* Vectorized complex to norm conversion. Will make norm values from a real and imaginary
941+
* array.
942+
*
943+
* @param dest Pointer to the destination float array.
944+
* @param real Pointer to the real part float array.
945+
* @param imag pointer to the imaginary part float array.
946+
* @param n The number of elements to be converted.
947+
*
948+
* @return VISUAL_OK on succes or -VISUAL_ERROR_NULL on failure.
949+
*/
950+
int visual_math_vectorized_complex_to_norm (float *dest, float *real, float *imag, visual_size_t n)
951+
{
952+
float *d = dest;
953+
float *r = real;
954+
float *i = imag;
955+
956+
visual_log_return_val_if_fail (dest != NULL, -VISUAL_ERROR_NULL);
957+
visual_log_return_val_if_fail (real != NULL, -VISUAL_ERROR_NULL);
958+
visual_log_return_val_if_fail (imag != NULL, -VISUAL_ERROR_NULL);
959+
960+
if (visual_cpu_get_sse () && n >= 16) {
961+
962+
#if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
963+
while (!VISUAL_ALIGNED(d, 16)) {
964+
*d = sqrtf (((*r) * (*r)) + ((*i) * (*i)));
965+
966+
d++;
967+
r++;
968+
i++;
969+
970+
n--;
971+
}
972+
973+
while (n > 8) {
974+
__asm __volatile
975+
("\n\t prefetchnta 256(%0)"
976+
"\n\t prefetchnta 256(%1)"
977+
"\n\t movups (%0), %%xmm0"
978+
"\n\t movups 16(%0), %%xmm2"
979+
"\n\t movups (%1), %%xmm1"
980+
"\n\t movups 16(%1), %%xmm3"
981+
"\n\t mulps %%xmm0, %%xmm0"
982+
"\n\t mulps %%xmm1, %%xmm1"
983+
"\n\t mulps %%xmm2, %%xmm2"
984+
"\n\t mulps %%xmm3, %%xmm3"
985+
"\n\t addps %%xmm0, %%xmm1"
986+
"\n\t addps %%xmm2, %%xmm3"
987+
"\n\t sqrtps %%xmm1, %%xmm0"
988+
"\n\t sqrtps %%xmm3, %%xmm2"
989+
"\n\t movntps %%xmm0, (%2)"
990+
"\n\t movntps %%xmm2, 16(%2)"
991+
:: "r" (r), "r" (i), "r" (d) : "memory");
992+
993+
d += 8;
994+
i += 8;
995+
r += 8;
996+
997+
n -= 8;
998+
}
999+
#endif /* VISUAL_ARCH_X86 */
1000+
}
1001+
1002+
while (n--) {
1003+
*d = sqrtf (((*r) * (*r)) + ((*i) * (*i)));
1004+
1005+
d++;
1006+
r++;
1007+
i++;
1008+
}
1009+
1010+
return VISUAL_OK;
1011+
}
1012+
8401013
/**
8411014
* Vectorized complex to norm conversion and result value scaler. Will make norm values from a real and imaginary
8421015
* array, after the conversion has been made it will be multiplied by the scaler.
@@ -882,35 +1055,26 @@ int visual_math_vectorized_complex_to_norm_scale (float *dest, float *real, floa
8821055
("\n\t movups (%0), %%xmm7"
8831056
:: "r" (packed_scaler) : "memory");
8841057

885-
/* FIXME optimize more, look into how we can get it atleast partially aligned, right */
8861058
while (n > 8) {
8871059
__asm __volatile
8881060
("\n\t prefetchnta 256(%0)"
8891061
"\n\t prefetchnta 256(%1)"
890-
8911062
"\n\t movups (%0), %%xmm0"
8921063
"\n\t movups 16(%0), %%xmm2"
893-
8941064
"\n\t movups (%1), %%xmm1"
8951065
"\n\t movups 16(%1), %%xmm3"
896-
8971066
"\n\t mulps %%xmm0, %%xmm0"
8981067
"\n\t mulps %%xmm1, %%xmm1"
899-
9001068
"\n\t mulps %%xmm2, %%xmm2"
9011069
"\n\t mulps %%xmm3, %%xmm3"
902-
9031070
"\n\t addps %%xmm0, %%xmm1"
9041071
"\n\t addps %%xmm2, %%xmm3"
905-
9061072
"\n\t sqrtps %%xmm1, %%xmm0"
9071073
"\n\t sqrtps %%xmm3, %%xmm2"
908-
9091074
"\n\t mulps %%xmm7, %%xmm0"
9101075
"\n\t mulps %%xmm7, %%xmm2"
911-
912-
"\n\t movups %%xmm0, (%2)"
913-
"\n\t movups %%xmm2, 16(%2)"
1076+
"\n\t movntps %%xmm0, (%2)"
1077+
"\n\t movntps %%xmm2, 16(%2)"
9141078
:: "r" (r), "r" (i), "r" (d) : "memory");
9151079

9161080
d += 8;

0 commit comments

Comments
 (0)