Skip to content

Commit f21fd9f

Browse files
committed
Add explicit attribute target
1 parent 9b43e7e commit f21fd9f

File tree

5 files changed

+70
-40
lines changed

5 files changed

+70
-40
lines changed

SPOUTSDK/SpoutDirectX/SpoutDX/Tutorial04_Lib/include/SpoutCopy.h

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,27 @@
3737
#include <windows.h>
3838
#include <stdio.h> // for debug printf
3939
#include <GL/gl.h> // For OpenGL definitions
40-
#include <intrin.h> // for cpuid to test for SSE2
4140

42-
#ifdef _M_ARM64
43-
#include <sse2neon.h> // for NEON
44-
#else
45-
#include <emmintrin.h> // for SSE2
46-
#include <tmmintrin.h> // for SSSE3
41+
#if defined(_M_IX86) || defined(_M_X64)
42+
#include <intrin.h> // for cpuid to test for SSE2
43+
#include <emmintrin.h> // for SSE2
44+
#include <tmmintrin.h> // for SSSE3
45+
#if defined(__GNUC__) || (defined(__clang__) && defined(_MSC_VER))
46+
#define SPOUT_TARGET_SSE2 __attribute__((__target__("sse2")))
47+
#define SPOUT_TARGET_SSSE3 __attribute__((__target__("ssse3")))
48+
#endif
49+
#elif defined(_M_ARM64)
50+
#include <sse2neon.h> // for NEON
4751
#endif
52+
53+
#ifndef SPOUT_TARGET_SSE2
54+
#define SPOUT_TARGET_SSE2
55+
#endif
56+
57+
#ifndef SPOUT_TARGET_SSSE3
58+
#define SPOUT_TARGET_SSSE3
59+
#endif
60+
4861
#include <cmath> // For compatibility with Clang. PR#81
4962
#include <stdint.h> // for _uint32 etc
5063

@@ -152,11 +165,11 @@ class SPOUT_DLLEXP spoutCopy {
152165
unsigned int destWidth, unsigned int destHeight, bool bInvert = false) const;
153166

154167
//
155-
// SSE3 function
168+
// SSSE3 function
156169
//
157170
// RGBA to RGB/BGR with source line pitch
158171
//
159-
void rgba_to_rgb_sse3(const void* rgba_source, void* rgb_dest,
172+
void rgba_to_rgb_ssse3(const void* rgba_source, void* rgb_dest,
160173
unsigned int width, unsigned int height,
161174
unsigned int rgba_pitch, // line byte pitch
162175
bool bInvert = false, // Flip image
@@ -198,7 +211,7 @@ class SPOUT_DLLEXP spoutCopy {
198211
// Single line
199212
void rgb_to_bgrx_sse(unsigned int npixels, const void* rgb_source, void* bgrx_out) const;
200213
// Full height
201-
void rgb_to_bgra_sse3(void* rgb_source, void* rgba_dest, unsigned int width, unsigned int height) const;
214+
void rgb_to_bgra_ssse3(void* rgb_source, void* rgba_dest, unsigned int width, unsigned int height) const;
202215

203216
// Copy BGR to BGRA
204217
void bgr2bgra (const void* bgr_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;

SPOUTSDK/SpoutDirectX/SpoutDX/Tutorial04_Lib/include/SpoutSenderNames.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
#include <string>
4747
#include <vector>
4848
#include <unordered_map>
49+
#if defined(_M_IX86) || defined(_M_X64)
4950
#include <intrin.h> // for __movsd
51+
#endif
5052
#include <stdint.h> // for _uint32
5153
#include <assert.h>
5254
#ifdef _M_ARM64

SPOUTSDK/SpoutGL/SpoutCopy.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ void spoutCopy::ClearAlpha(unsigned char* src, unsigned int width, unsigned int
288288
//---------------------------------------------------------
289289
// Function: memcpy_sse2
290290
// SSE2 version of memcpy
291-
void spoutCopy::memcpy_sse2(void* dst, const void* src, size_t Size) const
291+
SPOUT_TARGET_SSE2 void spoutCopy::memcpy_sse2(void* dst, const void* src, size_t Size) const
292292
{
293293

294294
if (!dst || !src)
@@ -482,8 +482,8 @@ void spoutCopy::rgba2bgra(const void* rgba_source, void* bgra_dest,
482482
return;
483483

484484
if ((width % 16) == 0) { // 16 byte aligned width
485-
if (m_bSSE2 && m_bSSSE3) // SSE3 available
486-
rgba_bgra_sse3(rgba_source, bgra_dest, width, height, bInvert);
485+
if (m_bSSE2 && m_bSSSE3) // SSSE3 available
486+
rgba_bgra_ssse3(rgba_source, bgra_dest, width, height, bInvert);
487487
else if (m_bSSE2) // SSE2 available
488488
rgba_bgra_sse2(rgba_source, bgra_dest, width, height, bInvert);
489489
}
@@ -528,8 +528,8 @@ void spoutCopy::rgba2bgra(const void *rgba_source, void *bgra_dest,
528528

529529
// Copy the line
530530
if ((width % 16) == 0) { // 16 byte aligned width
531-
if (m_bSSE2 && m_bSSSE3) // SSE3 available
532-
rgba_bgra_sse3(source, dest, width, 1); // invert flag false
531+
if (m_bSSE2 && m_bSSSE3) // SSSE3 available
532+
rgba_bgra_ssse3(source, dest, width, 1); // invert flag false
533533
else if (m_bSSE2) // SSE2 available
534534
rgba_bgra_sse2(source, dest, width, 1);
535535
}
@@ -573,8 +573,8 @@ void spoutCopy::rgba2bgra(const void* rgba_source, void* bgra_dest,
573573
}
574574
// Copy the line
575575
if ((width % 16) == 0) { // 16 byte aligned width
576-
if (m_bSSE2 && m_bSSSE3) // SSE3 available
577-
rgba_bgra_sse3(source, dest, width, 1); // invert flag false
576+
if (m_bSSE2 && m_bSSSE3) // SSSE3 available
577+
rgba_bgra_ssse3(source, dest, width, 1); // invert flag false
578578
else if (m_bSSE2) // SSE2 available
579579
rgba_bgra_sse2(source, dest, width, 1);
580580
}
@@ -613,8 +613,8 @@ void spoutCopy::rgba2rgb(const void* rgba_source, void* rgb_dest,
613613
return;
614614

615615
//
616-
// SSE3 copy
617-
// No mirror option, image size 16 bit byte aligned, SSE3 intrinsics support
616+
// SSSE3 copy
617+
// No mirror option, image size 16 bit byte aligned, SSSE3 intrinsics support
618618
//
619619
// Timing tests show more than twice as fast
620620
// (Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz)
@@ -630,8 +630,8 @@ void spoutCopy::rgba2rgb(const void* rgba_source, void* rgb_dest,
630630
//
631631
unsigned int pitch = rgba_pitch;
632632
if(pitch == 0) pitch = width*4;
633-
if (!bMirror && width >= 320 && (width % 16) == 0 && m_bSSE3) {
634-
rgba_to_rgb_sse3(rgba_source, rgb_dest, width, height, pitch, bInvert, bSwapRB);
633+
if (!bMirror && width >= 320 && (width % 16) == 0 && m_bSSSE3) {
634+
rgba_to_rgb_ssse3(rgba_source, rgb_dest, width, height, pitch, bInvert, bSwapRB);
635635
return;
636636
}
637637

@@ -932,7 +932,7 @@ void spoutCopy::rgb2bgra(const void *rgb_source, void *bgra_dest,
932932
// Function: rgb_to_bgrx_sse
933933
// Experimental pending testing
934934
// Single line function
935-
void spoutCopy::rgb_to_bgrx_sse(unsigned int npixels, const void* rgb_source, void* bgrx_dest) const
935+
SPOUT_TARGET_SSSE3 void spoutCopy::rgb_to_bgrx_sse(unsigned int npixels, const void* rgb_source, void* bgrx_dest) const
936936
{
937937
const __m128i* in_vec = static_cast<const __m128i*>(rgb_source);
938938
__m128i* out_vec = static_cast<__m128i*>(bgrx_dest);
@@ -1006,7 +1006,7 @@ void spoutCopy::rgb_to_bgrx_sse(unsigned int npixels, const void* rgb_source, vo
10061006
// Function: rgb_to_bgra_sse
10071007
// Experimental pending testing
10081008
// Full image height
1009-
void spoutCopy::rgb_to_bgra_sse3 (
1009+
void spoutCopy::rgb_to_bgra_ssse3 (
10101010
void* rgb_source,
10111011
void* rgba_dest,
10121012
unsigned int width,
@@ -1026,16 +1026,16 @@ void spoutCopy::rgb_to_bgra_sse3 (
10261026
rgba += width*4;
10271027
}
10281028

1029-
} // end rgb_to_bgra_sse3
1029+
} // end rgb_to_bgra_ssse3
10301030

10311031
//
10321032
// =====================================================================================
10331033

10341034

10351035
//---------------------------------------------------------
1036-
// Function: rgba_to_rgb_sse3
1036+
// Function: rgba_to_rgb_ssse3
10371037
//
1038-
void spoutCopy::rgba_to_rgb_sse3(const void* rgba_source, void* rgb_dest,
1038+
SPOUT_TARGET_SSSE3 void spoutCopy::rgba_to_rgb_ssse3(const void* rgba_source, void* rgb_dest,
10391039
unsigned int width, unsigned int height, unsigned int rgba_pitch,
10401040
bool bInvert, bool bSwapRB) const
10411041
{
@@ -1643,7 +1643,7 @@ void spoutCopy::rgba_bgra(const void* rgba_source, void* bgra_dest,
16431643
//
16441644
// All instructions SSE2.
16451645
//
1646-
void spoutCopy::rgba_bgra_sse2(const void* rgba_source, void* bgra_dest, unsigned int width, unsigned int height, bool bInvert) const
1646+
SPOUT_TARGET_SSE2 void spoutCopy::rgba_bgra_sse2(const void* rgba_source, void* bgra_dest, unsigned int width, unsigned int height, bool bInvert) const
16471647
{
16481648

16491649
if (!rgba_source)
@@ -1705,13 +1705,13 @@ void spoutCopy::rgba_bgra_sse2(const void* rgba_source, void* bgra_dest, unsigne
17051705
} // end rgba_bgra_sse2
17061706

17071707
//
1708-
// Adapted from a Gist snippet by Aurélien Vallée (NewbiZ) http://newbiz.github.io/
1708+
// Adapted from a Gist snippet by Aur�lien Vall�e (NewbiZ) http://newbiz.github.io/
17091709
//
17101710
// https://gist.github.com/NewbiZ/5541524
17111711
//
17121712
// Approximately 15% faster than SSE2 function
17131713
//
1714-
void spoutCopy::rgba_bgra_sse3(const void* rgba_source, void* bgra_dest, unsigned int width, unsigned int height, bool bInvert) const
1714+
SPOUT_TARGET_SSSE3 void spoutCopy::rgba_bgra_ssse3(const void* rgba_source, void* bgra_dest, unsigned int width, unsigned int height, bool bInvert) const
17151715
{
17161716
// Shuffling mask (RGBA -> BGRA) x 4, in reverse byte order
17171717
static const __m128i m = _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2);
@@ -1760,11 +1760,11 @@ void spoutCopy::rgba_bgra_sse3(const void* rgba_source, void* bgra_dest, unsigne
17601760
}
17611761
}
17621762

1763-
} // end rgba_bgra_sse3
1763+
} // end rgba_bgra_ssse3
17641764

17651765

17661766
// Swap red and blue components in place
1767-
void spoutCopy::rgba_swap_ssse3(void* __restrict rgba_source, unsigned int width, unsigned int height)
1767+
SPOUT_TARGET_SSSE3 void spoutCopy::rgba_swap_ssse3(void* __restrict rgba_source, unsigned int width, unsigned int height)
17681768
{
17691769
// Shuffling mask (RGBA -> BGRA) x 4, in reverse byte order (requires SSSE3)
17701770
static const __m128i mask = _mm_set_epi8(

SPOUTSDK/SpoutGL/SpoutCopy.h

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,27 @@
3737
#include <windows.h>
3838
#include <stdio.h> // for debug printf
3939
#include <GL/gl.h> // For OpenGL definitions
40-
#include <intrin.h> // for cpuid to test for SSE2
4140

42-
#ifdef _M_ARM64
43-
#include <sse2neon.h> // for NEON
44-
#else
45-
#include <emmintrin.h> // for SSE2
46-
#include <tmmintrin.h> // for SSSE3
41+
#if defined(_M_IX86) || defined(_M_X64)
42+
#include <intrin.h> // for cpuid to test for SSE2
43+
#include <emmintrin.h> // for SSE2
44+
#include <tmmintrin.h> // for SSSE3
45+
#if defined(__GNUC__) || (defined(__clang__) && defined(_MSC_VER))
46+
#define SPOUT_TARGET_SSE2 __attribute__((__target__("sse2")))
47+
#define SPOUT_TARGET_SSSE3 __attribute__((__target__("ssse3")))
48+
#endif
49+
#elif defined(_M_ARM64)
50+
#include <sse2neon.h> // for NEON
4751
#endif
52+
53+
#ifndef SPOUT_TARGET_SSE2
54+
#define SPOUT_TARGET_SSE2
55+
#endif
56+
57+
#ifndef SPOUT_TARGET_SSSE3
58+
#define SPOUT_TARGET_SSSE3
59+
#endif
60+
4861
#include <cmath> // For compatibility with Clang. PR#81
4962
#include <stdint.h> // for _uint32 etc
5063

@@ -152,11 +165,11 @@ class SPOUT_DLLEXP spoutCopy {
152165
unsigned int destWidth, unsigned int destHeight, bool bInvert = false) const;
153166

154167
//
155-
// SSE3 function
168+
// SSSE3 function
156169
//
157170
// RGBA to RGB/BGR with source line pitch
158171
//
159-
void rgba_to_rgb_sse3(const void* rgba_source, void* rgb_dest,
172+
void rgba_to_rgb_ssse3(const void* rgba_source, void* rgb_dest,
160173
unsigned int width, unsigned int height,
161174
unsigned int rgba_pitch, // line byte pitch
162175
bool bInvert = false, // Flip image
@@ -198,7 +211,7 @@ class SPOUT_DLLEXP spoutCopy {
198211
// Single line
199212
void rgb_to_bgrx_sse(unsigned int npixels, const void* rgb_source, void* bgrx_out) const;
200213
// Full height
201-
void rgb_to_bgra_sse3(void* rgb_source, void* rgba_dest, unsigned int width, unsigned int height) const;
214+
void rgb_to_bgra_ssse3(void* rgb_source, void* rgba_dest, unsigned int width, unsigned int height) const;
202215

203216
// Copy BGR to BGRA
204217
void bgr2bgra (const void* bgr_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;
@@ -230,7 +243,7 @@ class SPOUT_DLLEXP spoutCopy {
230243

231244
void rgba_bgra(const void *rgba_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;
232245
void rgba_bgra_sse2(const void *rgba_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;
233-
void rgba_bgra_sse3(const void *rgba_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;
246+
void rgba_bgra_ssse3(const void *rgba_source, void *bgra_dest, unsigned int width, unsigned int height, bool bInvert = false) const;
234247
// Swap red and blue components in place
235248
void rgba_swap_ssse3(void* __restrict rgbasource, unsigned int width, unsigned int height);
236249

SPOUTSDK/SpoutGL/SpoutSenderNames.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
#include <string>
4747
#include <vector>
4848
#include <unordered_map>
49+
#if defined(_M_IX86) || defined(_M_X64)
4950
#include <intrin.h> // for __movsd
51+
#endif
5052
#include <stdint.h> // for _uint32
5153
#include <assert.h>
5254
#ifdef _M_ARM64

0 commit comments

Comments
 (0)