Skip to content

Commit 255eef5

Browse files
author
Dogancan Ozturk
committed
adjustments
1 parent 308f2fe commit 255eef5

File tree

9 files changed

+69
-65
lines changed

9 files changed

+69
-65
lines changed

H264Sharp/NativeBindings.cs

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,9 @@ public class NativeBindings
4646
private delegate void SetConverterConfigd(ConverterConfig config);
4747
private delegate void GetConverterConfigd(ref ConverterConfig p);
4848

49-
5049
private delegate IntPtr AllocAllignedNatived(int size);
5150
private delegate void FreeAllignedNatived(IntPtr p);
5251

53-
5452
//---------------------------------------Decleration-----------------------------------------------
5553
// Encoder
5654
private EnableDebugLogsd encoderEnableDebugLogs;
@@ -111,8 +109,6 @@ public NativeBindings()
111109
}
112110
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
113111
{
114-
115-
116112
switch (RuntimeInformation.ProcessArchitecture)
117113
{
118114
case Architecture.X86:
@@ -129,9 +125,7 @@ public NativeBindings()
129125
break;
130126
default:
131127
throw new PlatformNotSupportedException("Unsupported architecture.");
132-
}
133-
134-
128+
}
135129
}
136130
else if (Defines.IsRunningOnAndroid())
137131
{
@@ -570,7 +564,6 @@ internal void SetTargetFps(IntPtr encoder, float target)
570564
=> setTargetFps(encoder, target);
571565
internal void FreeEncoder(IntPtr encoder)
572566
=> freeEncoder(encoder);
573-
574567
internal int GetOptionEncoder(IntPtr encoder, ENCODER_OPTION option, IntPtr value)
575568
=> getOptionEncoder(encoder, option, value);
576569
internal int SetOptionEncoder(IntPtr encoder, ENCODER_OPTION option, IntPtr value)
@@ -585,44 +578,35 @@ internal int InitializeDecoderDefault(IntPtr dec)
585578
=> initializeDecoderDefault(dec);
586579
internal int InitializeDecoder(IntPtr dec, TagSVCDecodingParam param)
587580
=> initializeDecoder(dec, param);
588-
589581
internal int DecodeAsYUV(IntPtr decoder, ref byte frame, int lenght, bool noDelay, ref int state, ref YUVImagePointer decoded)
590582
=> decodeAsYUV(decoder, ref frame, lenght, noDelay, ref state, ref decoded);
591-
592583
internal int DecodeAsYUVExt(IntPtr decoder, ref byte frame, int lenght, bool noDelay, ref int state, ref YUVImagePointer decoded)
593584
=> decodeAsYUVext(decoder, ref frame, lenght, noDelay, ref state, ref decoded);
594585
internal unsafe bool DecodeRgbInto(IntPtr decoder, ref byte frame, int lenght, bool noDelay, ref int state, IntPtr buffer)
595586
=> decodeRgbInto(decoder, ref frame, lenght, noDelay, ref state, buffer);
596587
internal void FreeDecoder(IntPtr decoder)
597588
=> freeDecoder(decoder);
598-
599589
internal int GetOptionDecoder(IntPtr decoder, DECODER_OPTION option, IntPtr value)
600590
=> getOptionDecoder(decoder, option, value);
601591
internal int SetOptionDecoder(IntPtr decoder, DECODER_OPTION option, IntPtr value)
602592
=> setOptionDecoder(decoder, option, value);
603593
internal void DecoderEnableDebugLogs(int val)
604594
=> decoderEnableDebugLogs(val);
605-
606595
// Converter
607-
608596
internal void RGBXtoYUV(ref UnsafeGenericRgbImage rgb, ref YUVImagePointer yuv)
609597
=> rGBXtoYUV(ref rgb, ref yuv);
610598
internal void YUV2RGB(ref YUVImagePointer yuv, ref UnsafeGenericRgbImage rgb)
611599
=> yUV2RGB(ref yuv, ref rgb);
612-
613600
internal void YUVNV12ToRGB(ref YUVNV12ImagePointer nv12, ref UnsafeGenericRgbImage yv12)
614601
=> YuvNV12ToRGB(ref nv12, ref yv12);
615602
internal void YUVNV12ToYV12(ref YUVNV12ImagePointer nv12, ref YUVImagePointer yv12)
616603
=> YuvNV12ToYV12(ref nv12, ref yv12);
617-
618604
internal void DownscaleImg(ref UnsafeGenericRgbImage from, ref UnsafeGenericRgbImage to, int mul)
619605
=> downscaleImg(ref from, ref to, mul);
620-
621606
internal void ConverterGetConfig(ref ConverterConfig c)
622607
=> getConfig(ref c);
623608
internal void ConverterSetConfig(ConverterConfig val)
624609
=> setConfig(val);
625-
626610
internal IntPtr AllocAllignedNative( int size)
627611
=> allocAllognedNative(size);
628612
internal void FreeAllignedNative(IntPtr p)

H264SharpNative/AVX2Common.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,24 @@ constexpr bool hasFlag(AlignmentFlags allFlags, AlignmentFlags flag) {
3030
}
3131

3232
__attribute__((target("avx2")))
33-
inline bool isAligned32(void* ptr) {
33+
inline bool isAligned32(void* ptr)
34+
{
3435
return (reinterpret_cast<std::uintptr_t>(ptr) & 31) == 0;
3536
}
37+
3638
__attribute__((target("avx2")))
37-
inline __m256i loadAligned(const void* ptr) {
39+
inline __m256i loadAligned(const void* ptr)
40+
{
3841
const __m256i* aligned_ptr = (const __m256i*)__builtin_assume_aligned(ptr, 32);
3942
return _mm256_load_si256(aligned_ptr);
4043
}
44+
4145
__attribute__((target("avx2")))
42-
inline __m256i loadUnaligned(const void* ptr) {
46+
inline __m256i loadUnaligned(const void* ptr)
47+
{
4348
return _mm256_loadu_si256((const __m256i*)ptr);
4449
}
50+
4551
template <bool alligned>
4652
__attribute__((target("avx2")))
4753
inline __m256i Load(const void* ptr)
@@ -59,15 +65,19 @@ inline void GetChannels3_16x16_2(uint8_t* ptr, __m256i& rl, __m256i& gl, __m256i
5965
const __m256i blendMask0 = _mm256_setr_epi8(
6066
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
6167
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
68+
6269
const __m256i blendMask1 = _mm256_setr_epi8(
6370
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
6471
-1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
72+
6573
const __m256i shuffleMaskR = _mm256_setr_epi8(
6674
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
6775
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
76+
6877
const __m256i shuffleMaskG = _mm256_setr_epi8(
6978
1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
7079
1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
80+
7181
const __m256i shuffleMaskB = _mm256_setr_epi8(
7282
2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15,
7383
2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
@@ -103,8 +113,10 @@ inline void GetChannels3_16x16_2(uint8_t* ptr, __m256i& rl, __m256i& gl, __m256i
103113
__attribute__((target("avx2")))
104114
inline void GetChannels4_16x16_2(const uint8_t* ptr, __m256i& rl, __m256i& gl, __m256i& bl, __m256i& rh, __m256i& gh, __m256i& bh)
105115
{
106-
const __m256i rgbaShuffleMask = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
116+
const __m256i rgbaShuffleMask = _mm256_setr_epi8(
117+
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
107118
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
119+
108120
__m256i rgb1 = _mm256_loadu_si256((const __m256i*)ptr);
109121
__m256i rgb2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
110122
__m256i rgb3 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
@@ -295,7 +307,6 @@ inline void GetChannels3_16x16(uint8_t* RESTRICT input, __m256i& rl, __m256i& gl
295307
8, 11, 14, 1, 4, 7, 10, 13
296308
);
297309

298-
// Define blend mask directly in AVX registers
299310
const __m256i blendMask = _mm256_setr_epi8(
300311
-1, -1, -1, -1, -1, -1, -1, -1,
301312
-1, -1, -1, 0, 0, 0, 0, 0,
@@ -358,12 +369,15 @@ inline void GetChannels4_16x16(uint8_t* RESTRICT src, __m256i& rl, __m256i& gl,
358369
const __m256i rmask = _mm256_setr_epi8(
359370
0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1, 16,
360371
-1, -1, -1, 20, -1, -1, -1, 24, -1, -1, -1, 28, -1, -1, -1);
372+
361373
const __m256i gmask = _mm256_setr_epi8(
362374
1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1, 17,
363375
-1, -1, -1, 21, -1, -1, -1, 25, -1, -1, -1, 29, -1, -1, -1);
376+
364377
const __m256i bmask = _mm256_setr_epi8(
365378
2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1, 18,
366379
-1, -1, -1, 22, -1, -1, -1, 26, -1, -1, -1, 30, -1, -1, -1);
380+
367381
__m256i rgb1 = _mm256_loadu_si256((__m256i*)src);
368382
__m256i rgb2 = _mm256_loadu_si256((__m256i*)(src + 32));
369383
__m256i rgb3 = _mm256_loadu_si256((__m256i*)(src + 64));

H264SharpNative/Converter.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ namespace H264Sharp
7575
if (config.EnableDebugPrints > 0)
7676
{
7777
logger << (hasSSE41() ? "SSE4 is supported!" : "SSE4 is NOT supported!") << "\n";
78-
std::cout << (hasAVX2() ? "AVX2 is supported!" : "AVX2 is NOT supported!") << "\n";
79-
std::cout << (hasAVX512() ? "AVX-512 is supported!" : "AVX-512 is NOT supported!") << "\n";
80-
std::cout << (hasNEON() ? "NEON is supported!" : "NEON is NOT supported!") << "\n";
78+
logger << (hasAVX2() ? "AVX2 is supported!" : "AVX2 is NOT supported!") << "\n";
79+
logger << (hasAVX512() ? "AVX-512 is supported!" : "AVX-512 is NOT supported!") << "\n";
80+
logger << (hasNEON() ? "NEON is supported!" : "NEON is NOT supported!") << "\n";
8181

8282
}
8383

H264SharpNative/Decoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ namespace H264Sharp
6464

6565

6666
private:
67+
6768
unsigned char* innerBuffer = nullptr;
6869
int innerBufLen=0;
6970
ISVCDecoder* decoder= nullptr;

H264SharpNative/Encoder.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414
namespace H264Sharp {
1515

16-
enum class ConfigType { CameraBasic, ScreenCaptureBasic, CameraCaptureAdvanced, ScreenCaptureAdvanced, CameraCaptureAdvancedHP, ScreenCaptureAdvancedHP};
16+
enum class ConfigType { CameraBasic, ScreenCaptureBasic, CameraCaptureAdvanced, ScreenCaptureAdvanced,
17+
CameraCaptureAdvancedHP, ScreenCaptureAdvancedHP};
1718

1819
class Encoder
1920
{

H264SharpNative/Logger.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,22 @@
55
#define PLATFORM_DESKTOP
66
#endif
77

8-
class Logger {
8+
class Logger
9+
{
910
public:
1011
Logger(const char* = nullptr) {} // Tag is ignored
1112

1213
template<typename T>
13-
Logger& operator<<(const T& value) {
14+
Logger& operator<<(const T& value)
15+
{
1416
#ifdef PLATFORM_DESKTOP
1517
std::cout << value;
1618
#endif
1719
return *this;
1820
}
1921

20-
Logger& operator<<(std::ostream& (*manip)(std::ostream&)) {
22+
Logger& operator<<(std::ostream& (*manip)(std::ostream&))
23+
{
2124
#ifdef PLATFORM_DESKTOP
2225
std::cout << manip;
2326
#endif

H264SharpNative/Rgb2YuvNEON.cpp

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,7 @@
77

88
namespace H264Sharp
99
{
10-
const uint16x8_t kB_Y = vdupq_n_u16(25);
11-
const uint16x8_t kG_Y = vdupq_n_u16(129);
12-
const uint16x8_t kR_Y = vdupq_n_u16(66);
13-
14-
const uint8x8_t kB_Y8 = vdup_n_u8(25);
15-
const uint8x8_t kG_Y8 = vdup_n_u8(129);
16-
const uint8x8_t kR_Y8 = vdup_n_u8(66);
17-
18-
const uint8x16_t offset_Y = vdupq_n_u8(16);
19-
20-
const int16x8_t kR_U = vdupq_n_s16(112 / 2);
21-
const int16x8_t kG_U = vdupq_n_s16(-94 / 2);
22-
const int16x8_t kB_U = vdupq_n_s16(-18 / 2);
23-
24-
const int16x8_t kR_V = vdupq_n_s16(-38 / 2);
25-
const int16x8_t kG_V = vdupq_n_s16(-74 / 2);
26-
const int16x8_t kB_V = vdupq_n_s16(112 / 2);
27-
28-
const int16x8_t offset_UV = vdupq_n_s16(128);
29-
30-
const uint8x16_t dropMask = { 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00,
31-
0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00 };//keep drop keep drop
10+
3211

3312
// Look how simple NEON is compared to FUCKING AVX and their sadistic shuffle permutes for data allignment
3413
template <int NUM_CH, bool RGB>
@@ -42,6 +21,30 @@ namespace H264Sharp
4221
const int32_t end
4322
) {
4423

24+
const uint16x8_t kB_Y = vdupq_n_u16(25);
25+
const uint16x8_t kG_Y = vdupq_n_u16(129);
26+
const uint16x8_t kR_Y = vdupq_n_u16(66);
27+
28+
const uint8x8_t kB_Y8 = vdup_n_u8(25);
29+
const uint8x8_t kG_Y8 = vdup_n_u8(129);
30+
const uint8x8_t kR_Y8 = vdup_n_u8(66);
31+
32+
const uint8x16_t offset_Y = vdupq_n_u8(16);
33+
34+
const int16x8_t kR_U = vdupq_n_s16(112 / 2);
35+
const int16x8_t kG_U = vdupq_n_s16(-94 / 2);
36+
const int16x8_t kB_U = vdupq_n_s16(-18 / 2);
37+
38+
const int16x8_t kR_V = vdupq_n_s16(-38 / 2);
39+
const int16x8_t kG_V = vdupq_n_s16(-74 / 2);
40+
const int16x8_t kB_V = vdupq_n_s16(112 / 2);
41+
42+
const int16x8_t offset_UV = vdupq_n_s16(128);
43+
44+
const uint8x16_t dropMask = { 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00,
45+
0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00 };//keep drop keep drop
46+
47+
4548
int R_INDEX, G_INDEX, B_INDEX;
4649
if constexpr (RGB) {
4750
R_INDEX = 0; G_INDEX = 1; B_INDEX = 2;

H264SharpNative/Yuv2RgbNEON.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,7 @@ namespace H264Sharp
1515
B = CLAMP((Y-16)*1.164 + 2.018*U )
1616
*/
1717
// BT.601-7 studio range constants
18-
const int8x8_t alpha = vdup_n_u8(255);
19-
const int16x8_t const_16 = vdupq_n_s16(16);
20-
const uint8x16_t const_16_8 = vdupq_n_u8(16);
21-
const int16x8_t const_128 = vdupq_n_s16(128);
2218

23-
const auto y_factor = vdupq_n_u16(149); // 1.164 * 64
24-
const auto v_to_r_coeff = vdupq_n_s16(102); // 1.596 * 64
25-
const auto u_to_g_coeff = vdupq_n_s16(25); // 0.391 * 64
26-
const auto v_to_g_coeff = vdupq_n_s16(52); // 0.813 * 64
27-
const auto u_to_b_coeff = vdupq_n_s16(129); // 2.018 * 64
28-
2919
inline void Convert(uint8x16_t y_vals1, uint8x16_t y_vals2, int16x8_t u_valsl, int16x8_t u_valsh, int16x8_t v_valsl, int16x8_t v_valsh,
3020
uint8x16_t& r1l, uint8x16_t& g1l, uint8x16_t& b1l, uint8x16_t& r1h, uint8x16_t& g1h, uint8x16_t& b1h);
3121

@@ -41,6 +31,9 @@ namespace H264Sharp
4131
int32_t begin,
4232
int32_t end)
4333
{
34+
const uint8x16_t const_16_8 = vdupq_n_u8(16);
35+
const int16x8_t const_128 = vdupq_n_s16(128);
36+
4437
int ridx, gidx, bidx;
4538
if constexpr (RGB)
4639
{
@@ -130,6 +123,9 @@ namespace H264Sharp
130123
int32_t begin,
131124
int32_t end)
132125
{
126+
const uint8x16_t const_16_8 = vdupq_n_u8(16);
127+
const int16x8_t const_128 = vdupq_n_s16(128);
128+
133129
int ridx, gidx, bidx;
134130
if constexpr (RGB)
135131
{
@@ -211,6 +207,12 @@ namespace H264Sharp
211207
inline void Convert(uint8x16_t y_vals1, uint8x16_t y_vals2, int16x8_t u_valsl, int16x8_t u_valsh, int16x8_t v_valsl, int16x8_t v_valsh,
212208
uint8x16_t& r1l, uint8x16_t& g1l, uint8x16_t& b1l, uint8x16_t& r1h, uint8x16_t& g1h, uint8x16_t& b1h)
213209
{
210+
const auto y_factor = vdupq_n_u16(149); // 1.164 * 64
211+
const auto v_to_r_coeff = vdupq_n_s16(102); // 1.596 * 64
212+
const auto u_to_g_coeff = vdupq_n_s16(25); // 0.391 * 64
213+
const auto v_to_g_coeff = vdupq_n_s16(52); // 0.813 * 64
214+
const auto u_to_b_coeff = vdupq_n_s16(129); // 2.018 * 64
215+
214216
// multiply UV with the scaling
215217
int16x8_t u_vals_ugl = vshrq_n_s16(vmulq_s16(u_valsl, u_to_g_coeff), 6);
216218
int16x8_t u_vals_ubl = vshrq_n_s16(vmulq_s16(u_valsl, u_to_b_coeff), 6);

H264SharpNative/dllmain.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,10 @@ extern "C" void __attribute__((destructor)) dll_unload(void);
2626

2727
void dll_load()
2828
{
29-
// std::cout << "Library loaded.\n";
30-
// Perform initialization tasks here
3129
}
3230

3331
void dll_unload()
3432
{
35-
//std::cout << "Library unloaded.\n";
36-
// Perform cleanup tasks here
3733
}
3834

3935
#endif // _WIN32

0 commit comments

Comments
 (0)