Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit d0004f1

Browse files
1480c1tianjunwork
authored andcommitted
macOS support (#135)
If you get error "error at sem_open: too many open files" while encoding, refer to SVT-AV1 README for the fix. Signed-off-by: Christopher Degawa <[email protected]>
1 parent 251cb8b commit d0004f1

File tree

10 files changed

+565
-551
lines changed

10 files changed

+565
-551
lines changed

.travis.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ stages:
3636
- name: Coveralls And Valgrind
3737
if: type != pull_request
3838
before_install:
39+
- |
40+
if [ "$(uname -s)" == "Darwin" ]; then
41+
sysctl -n machdep.cpu.brand_string
42+
sysctl machdep.cpu.features
43+
sysctl machdep.cpu.leaf7_features
44+
elif [ -f "/proc/cpuinfo" ]; then
45+
grep -Ei " sse*| ssse*| avx|model name" /proc/cpuinfo | sort -u
46+
fi
3947
- "sudo chown -R travis: $HOME/.ccache"
4048
- export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig PATH="/usr/local/opt/ccache/libexec:$PATH"
4149
- wget -nc https://raw.githubusercontent.com/OpenVisualCloud/SVT-AV1-Resources/master/video.tar.gz || wget -nc http://randomderp.com/video.tar.gz
@@ -52,7 +60,7 @@ script:
5260
cmake $TRAVIS_BUILD_DIR -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=$build_type ${CMAKE_EFLAGS[@]}
5361
cmake -j $(if [ $TRAVIS_OS_NAME = osx ]; then sysctl -n hw.ncpu; else nproc; fi) --build . &&
5462
sudo cmake --build . --target install && cd $TRAVIS_BUILD_DIR
55-
- travis_wait SvtHevcEncApp -encMode 9 -i akiyo_cif.y4m -b test1.h265
63+
- SvtHevcEncApp -encMode 9 -i akiyo_cif.y4m -b test1.h265 $(if [ "$(uname -s)" == "Darwin" ]; then echo "-asm 0"; fi)
5664
before_cache:
5765
- "sudo chown -R travis: $HOME/.ccache"
5866
matrix:

CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,6 @@ endif()
8282
set(CAN_USE_ASSEMBLER TRUE)
8383
set(CMAKE_INCLUDE_CURRENT_DIR ON)
8484

85-
if(WIN32)
86-
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DWIN64")
87-
else()
88-
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DUNIX64")
89-
endif()
90-
9185
if(UNIX)
9286
if(APPLE)
9387
set(CMAKE_MACOSX_RPATH 1)

Source/App/EbTime.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include <sys/time.h>
1313
#endif
1414

15+
#include "EbTime.h"
16+
1517
void EbAppStartTime(
1618
uint64_t *Startseconds,
1719
uint64_t *Startuseconds)

Source/Lib/ASM_SSE2/EbMcp_Intrinsic_SSE2.c

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,7 @@
66
#include "EbDefinitions.h"
77
#include "EbMcp_SSE2.h"
88

9-
#ifdef __linux__
10-
#ifndef __cplusplus
11-
__attribute__((visibility("hidden")))
12-
#endif
13-
#endif
14-
15-
16-
#ifdef __linux__
17-
#ifndef __cplusplus
18-
__attribute__((visibility("hidden")))
19-
#endif
20-
#endif
21-
22-
#ifdef __linux__
23-
#ifndef __cplusplus
24-
__attribute__((visibility("hidden")))
25-
#endif
26-
#endif
27-
28-
29-
#ifdef __linux__
30-
#ifndef __cplusplus
31-
__attribute__((visibility("hidden")))
32-
#endif
33-
#endif
34-
35-
36-
#ifdef __linux__
9+
#ifdef __GNUC__
3710
#ifndef __cplusplus
3811
__attribute__((visibility("hidden")))
3912
#endif
@@ -84,6 +57,11 @@ EB_ALIGN(16) const EB_S16 IntraPredictionConst_SSE2[344]= {
8457
32, 32, 32, 32, 32, 32, 32, 32,
8558
};
8659

60+
#ifdef __GNUC__
61+
#ifndef __cplusplus
62+
__attribute__((visibility("hidden")))
63+
#endif
64+
#endif
8765
void LumaInterpolationCopy16bit_SSE2(
8866
EB_U16 *refPic,
8967
EB_U32 srcStride,
@@ -97,8 +75,12 @@ void LumaInterpolationCopy16bit_SSE2(
9775
PictureCopyKernel_SSE2((EB_BYTE)refPic, srcStride*sizeof(EB_U16), (EB_BYTE)dst, dstStride*sizeof(EB_U16), puWidth*sizeof(EB_U16), puHeight);
9876
}
9977

100-
101-
void ChromaInterpolationCopy16bit_SSE2(
78+
#ifdef __GNUC__
79+
#ifndef __cplusplus
80+
__attribute__((visibility("hidden")))
81+
#endif
82+
#endif
83+
void ChromaInterpolationCopy16bit_SSE2(
10284
EB_U16* refPic,
10385
EB_U32 srcStride,
10486
EB_U16* dst,
@@ -114,7 +96,3 @@ void LumaInterpolationCopy16bit_SSE2(
11496
(void)fracPosy;
11597
PictureCopyKernel_SSE2((EB_BYTE)refPic, srcStride*sizeof(EB_U16), (EB_BYTE)dst, dstStride*sizeof(EB_U16), puWidth*sizeof(EB_U16), puHeight);
11698
}
117-
118-
119-
120-

Source/Lib/ASM_SSE2/EbTransforms_Intrinsic_SSE2.c

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
sum = _mm_packs_epi32(sum1, sum1);\
101101
INSTR((__m128i *)(DST + OFST5), sum);
102102

103-
#ifdef __linux__
103+
#ifdef __GNUC__
104104
#ifndef __cplusplus
105105
__attribute__((visibility("hidden")))
106106
#endif
@@ -121,7 +121,7 @@ EB_ALIGN(16) const EB_S16 DstTransformAsmConst_SSE2[] = {
121121
//55, -29, 55, -29, 55, -29, 55, -29,
122122
};
123123

124-
#ifdef __linux__
124+
#ifdef __GNUC__
125125
#ifndef __cplusplus
126126
__attribute__((visibility("hidden")))
127127
#endif
@@ -318,7 +318,7 @@ EB_ALIGN(16) const EB_S16 InvTransformAsmConst_SSE2[] = {
318318
54, 90, 54, 90, 54, 90, 54, 90
319319
};
320320

321-
#ifdef __linux__
321+
#ifdef __GNUC__
322322
#ifndef __cplusplus
323323
__attribute__((visibility("hidden")))
324324
#endif
@@ -365,7 +365,7 @@ EB_EXTERN const EB_S16 coeff_tbl2[48 * 8] =
365365
54, 67, -31, -73, 4, 78, 22, -82, -46, 85, 67, -88, -82, 90, 90, -90
366366
};
367367

368-
#ifdef __linux__
368+
#ifdef __GNUC__
369369
#ifndef __cplusplus
370370
__attribute__((visibility("hidden")))
371371
#endif
@@ -2535,12 +2535,12 @@ void Transform4x4_SSE2_INTRIN(
25352535
xmm3 = _mm_loadl_epi64((__m128i *)(residual + 3 * srcStride));
25362536
xmm0 = _mm_unpacklo_epi16(xmm0, xmm1);
25372537
xmm2 = _mm_unpacklo_epi16(xmm2, xmm3);
2538-
2538+
25392539
xmm1 = _mm_unpackhi_epi32(xmm0, xmm2);
25402540
xmm0 = _mm_unpacklo_epi32(xmm0, xmm2);
25412541
xmm1 = _mm_unpacklo_epi64(_mm_srli_si128(xmm1, 8), xmm1);
25422542
xmm3 = _mm_sub_epi16(xmm0, xmm1);
2543-
xmm0 = _mm_add_epi16(xmm0, xmm1);
2543+
xmm0 = _mm_add_epi16(xmm0, xmm1);
25442544

25452545
xmm4 = xmm2 = xmm0;
25462546
xmm0 = _mm_srli_si128(xmm0, 8);
@@ -2551,7 +2551,7 @@ void Transform4x4_SSE2_INTRIN(
25512551
xmm_shift = _mm_cvtsi32_si128(bitIncrement + 1);
25522552

25532553
xmm1 = _mm_unpacklo_epi16(xmm3, _mm_srli_si128(xmm3, 8));
2554-
2554+
25552555
xmm3 = _mm_madd_epi16(xmm1, _mm_load_si128((__m128i *)(transformIntrinConst_SSE2 + OFFSET_36_N83)));
25562556
xmm1 = _mm_madd_epi16(xmm1, _mm_load_si128((__m128i *)(transformIntrinConst_SSE2 + OFFSET_83_36)));
25572557
xmm1 = _mm_add_epi32(xmm1, xmm_offset);
@@ -2577,14 +2577,14 @@ void Transform4x4_SSE2_INTRIN(
25772577
(void)transformCoefficients;
25782578
(void)transformInnerArrayPtr;
25792579

2580-
#undef OFFSET_128
2581-
#undef OFFSET_64_64
2582-
#undef OFFSET_83_36
2580+
#undef OFFSET_128
2581+
#undef OFFSET_64_64
2582+
#undef OFFSET_83_36
25832583
#undef OFFSET_N36_N83
2584-
#undef OFFSET_64_N64
2585-
#undef OFFSET_N64_64
2586-
#undef OFFSET_36_N83
2587-
#undef OFFSET_83_N36
2584+
#undef OFFSET_64_N64
2585+
#undef OFFSET_N64_64
2586+
#undef OFFSET_36_N83
2587+
#undef OFFSET_83_N36
25882588
}
25892589

25902590
void DstTransform4x4_SSE2_INTRIN(
@@ -2619,7 +2619,7 @@ void DstTransform4x4_SSE2_INTRIN(
26192619
xmm_res2 = _mm_loadl_epi64((__m128i *)(residual + 2 * srcStride));
26202620
xmm_res3 = _mm_loadl_epi64((__m128i *)(residual + 3 * srcStride));
26212621
xmm_offset = _mm_srli_epi32(_mm_slli_epi32(_mm_load_si128((__m128i *)(DstTransformAsmConst_SSE2 + OFFSET_DST_1)), shift), 1);
2622-
2622+
26232623
xmm_res0_1 = _mm_unpacklo_epi32(xmm_res0, xmm_res1); // |res01 |res-S1-01|res23 |res-S1-23|
26242624
xmm_res2_3 = _mm_unpacklo_epi32(xmm_res2, xmm_res3); // |res-S2-01|res-S3-01|res-S2-23|res-S3-23|
26252625
xmm_res_hi = _mm_unpackhi_epi64(xmm_res0_1, xmm_res2_3); // |res23 |res-S1-23|res-S2-23|res-S3-23|
@@ -2629,7 +2629,7 @@ void DstTransform4x4_SSE2_INTRIN(
26292629
MACRO_TRANS_2MAC_NO_SAVE(xmm_res_lo, xmm_res_hi, xmm_trans1, xmm_temp, xmm_offset, OFFSET_DST_74_74, OFFSET_DST_0_N74, shift)
26302630
MACRO_TRANS_2MAC_NO_SAVE(xmm_res_lo, xmm_res_hi, xmm_trans2, xmm_temp, xmm_offset, OFFSET_DST_84_N29, OFFSET_DST_N74_55, shift)
26312631
MACRO_TRANS_2MAC_NO_SAVE(xmm_res_lo, xmm_res_hi, xmm_trans3, xmm_temp, xmm_offset, OFFSET_DST_55_N84, OFFSET_DST_74_N29, shift)
2632-
2632+
26332633
// Second Partial Bufferfly
26342634
xmm_offset = _mm_set1_epi32(0x00000080); // 128
26352635
xmm_trans0_1 = _mm_unpacklo_epi32(xmm_trans0, xmm_trans1);
@@ -2641,7 +2641,7 @@ void DstTransform4x4_SSE2_INTRIN(
26412641
MACRO_TRANS_2MAC(xmm_trans_lo, xmm_trans_hi, xmm_trans1, xmm_temp, xmm_offset, OFFSET_DST_74_74, OFFSET_DST_0_N74, 8, dstStride)
26422642
MACRO_TRANS_2MAC(xmm_trans_lo, xmm_trans_hi, xmm_trans2, xmm_temp, xmm_offset, OFFSET_DST_84_N29, OFFSET_DST_N74_55, 8, (2 * dstStride))
26432643
MACRO_TRANS_2MAC(xmm_trans_lo, xmm_trans_hi, xmm_trans3, xmm_temp, xmm_offset, OFFSET_DST_55_N84, OFFSET_DST_74_N29, 8, (3 * dstStride))
2644-
2644+
26452645
(void)transformInnerArrayPtr;
26462646
}
26472647

@@ -2656,7 +2656,7 @@ void Transform8x8_SSE2_INTRIN(
26562656
// Transform8x8 has its own table because the larger table's offset macros exceed 256 (which is maximum macro expansion depth
26572657
// Use a smaller table with values just for Transform8x8.
26582658

2659-
EB_ALIGN(16) EB_S16 transformIntrinConst_8x8[] = {
2659+
EB_ALIGN(16) EB_S16 transformIntrinConst_8x8[] = {
26602660
83, 36, 83, 36, 83, 36, 83, 36,
26612661
36, -83, 36, -83, 36, -83, 36, -83,
26622662
89, 75, 89, 75, 89, 75, 89, 75,
@@ -2676,15 +2676,15 @@ void Transform8x8_SSE2_INTRIN(
26762676
36, 83, 36, 83, 36, 83, 36, 83,
26772677
50, 89, 50, 89, 50, 89, 50, 89,
26782678
18, -75, 18, -75, 18, -75, 18, -75,
2679-
-64, 64, -64, 64, -64, 64, -64, 64,
2679+
-64, 64, -64, 64, -64, 64, -64, 64,
26802680
64, -64, 64, -64, 64, -64, 64, -64,
26812681
-75, -18, -75, -18, -75, -18, -75, -18,
26822682
89, -50, 89, -50, 89, -50, 89, -50,
2683-
83, -36, 83, -36, 83, -36, 83, -36,
2684-
-36, 83, -36, 83, -36, 83, -36, 83,
2685-
-83, 36, -83, 36, -83, 36, -83, 36,
2683+
83, -36, 83, -36, 83, -36, 83, -36,
2684+
-36, 83, -36, 83, -36, 83, -36, 83,
2685+
-83, 36, -83, 36, -83, 36, -83, 36,
26862686
89, -75, 89, -75, 89, -75, 89, -75,
2687-
50, -18, 50, -18, 50, -18, 50, -18,
2687+
50, -18, 50, -18, 50, -18, 50, -18,
26882688
};
26892689
__m128i sum, sum1, sum2, sum3, sum4;
26902690
__m128i res0, res1, res2, res3, res4, res5, res6, res7;
@@ -2711,15 +2711,15 @@ void Transform8x8_SSE2_INTRIN(
27112711
MACRO_UNPACK(32, res0, res2, res01, res23, res4, res6, res45, res67, res02, res0123, res46, res4567)
27122712
MACRO_UNPACK(64, res0, res4, res02, res46, res01, res45, res0123, res4567, res04, res0246, res0145, res0_to_7)
27132713
MACRO_CALC_EVEN_ODD(res0, res04, res02, res0246, res01, res0145, res0123, res0_to_7)
2714-
2714+
27152715
evenEven0 = _mm_add_epi16(even0, even3);
27162716
evenEven1 = _mm_add_epi16(even1, even2);
27172717
evenOdd0 = _mm_sub_epi16(even0, even3);
27182718
evenOdd1 = _mm_sub_epi16(even1, even2);
27192719

27202720
shift = 4 - bitIncrement;
27212721
trans0 = _mm_slli_epi16(_mm_add_epi16(evenEven0, evenEven1), shift);
2722-
trans4 = _mm_slli_epi16(_mm_sub_epi16(evenEven0, evenEven1), shift);
2722+
trans4 = _mm_slli_epi16(_mm_sub_epi16(evenEven0, evenEven1), shift);
27232723

27242724
xmm_offset = _mm_slli_epi32(_mm_set1_epi32(0x00000002), bitIncrement);
27252725
shift = bitIncrement + 2;
@@ -2729,13 +2729,13 @@ void Transform8x8_SSE2_INTRIN(
27292729

27302730
trans6 = _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(_mm_madd_epi16(_mm_load_si128((__m128i *)(TransformIntrinConst + TRANS8x8_OFFSET_36_N83)),_mm_unpacklo_epi16(evenOdd0, evenOdd1)), xmm_offset), shift),
27312731
_mm_srai_epi32(_mm_add_epi32(_mm_madd_epi16(_mm_load_si128((__m128i *)(TransformIntrinConst + TRANS8x8_OFFSET_36_N83)),_mm_unpackhi_epi16(evenOdd0, evenOdd1)), xmm_offset), shift));
2732-
2732+
27332733
// TransformCoefficients 1, 3, 5, 7
27342734
odd01_lo = _mm_unpacklo_epi16(odd0, odd1);
27352735
odd01_hi = _mm_unpackhi_epi16(odd0, odd1);
27362736
odd23_lo = _mm_unpacklo_epi16(odd2, odd3);
27372737
odd23_hi = _mm_unpackhi_epi16(odd2, odd3);
2738-
2738+
27392739
MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans1, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_89_75, TRANS8x8_OFFSET_50_18, shift)
27402740
MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans3, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_75_N18, TRANS8x8_OFFSET_N89_N50, shift)
27412741
MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans5, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_50_N89, TRANS8x8_OFFSET_18_75, shift)
@@ -2745,17 +2745,17 @@ void Transform8x8_SSE2_INTRIN(
27452745
MACRO_UNPACK(64, trans0, trans2, trans01, trans23, trans4, trans6, trans45, trans67, trans02, trans0123, trans46, trans4567)
27462746

27472747
xmm_offset = _mm_loadu_si128((__m128i *)(TransformIntrinConst + TRANS8x8_OFFSET_256));
2748-
2748+
27492749
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, 9, _mm_storeu_si128, transformCoefficients, 0)
27502750
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_89_75, TRANS8x8_OFFSET_50_18, TRANS8x8_OFFSET_N18_N50, TRANS8x8_OFFSET_N75_N89, 9, _mm_storeu_si128, transformCoefficients, (dstStride))
27512751
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_83_36, TRANS8x8_OFFSET_N36_N83, TRANS8x8_OFFSET_N83_N36, TRANS8x8_OFFSET_36_83, 9, _mm_storeu_si128, transformCoefficients, (2 * dstStride))
27522752
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_75_N18, TRANS8x8_OFFSET_N89_N50, TRANS8x8_OFFSET_50_89, TRANS8x8_OFFSET_18_N75, 9, _mm_storeu_si128, transformCoefficients, (3 * dstStride))
2753-
transformCoefficients += 4 * dstStride;
2753+
transformCoefficients += 4 * dstStride;
27542754
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_64_N64, TRANS8x8_OFFSET_N64_64, TRANS8x8_OFFSET_64_N64, TRANS8x8_OFFSET_N64_64, 9, _mm_storeu_si128, transformCoefficients, 0)
27552755
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_50_N89, TRANS8x8_OFFSET_18_75, TRANS8x8_OFFSET_N75_N18, TRANS8x8_OFFSET_89_N50, 9, _mm_storeu_si128, transformCoefficients, (dstStride))
27562756
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_36_N83, TRANS8x8_OFFSET_83_N36, TRANS8x8_OFFSET_N36_83, TRANS8x8_OFFSET_N83_36, 9, _mm_storeu_si128, transformCoefficients, (2 * dstStride))
27572757
MACRO_TRANS_8MAC(trans0, trans02, trans01, trans0123, trans4, trans46, trans45, trans4567, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_18_N50, TRANS8x8_OFFSET_75_N89, TRANS8x8_OFFSET_89_N75, TRANS8x8_OFFSET_50_N18, 9, _mm_storeu_si128, transformCoefficients, (3 * dstStride))
2758-
2758+
27592759
(void)transformInnerArrayPtr;
27602760
}
27612761

@@ -2855,10 +2855,10 @@ void PfreqTransform8x8_SSE2_INTRIN(
28552855
MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans3, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_75_N18, TRANS8x8_OFFSET_N89_N50, shift)
28562856
//MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans5, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_50_N89, TRANS8x8_OFFSET_18_75, shift)
28572857
//MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans7, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_18_N50, TRANS8x8_OFFSET_75_N89, shift)
2858-
2858+
28592859
MACRO_UNPACK(32, trans0, trans1, trans2, trans3, trans4/*, trans5, trans6, trans7*/, trans1, trans1, trans1, trans01, trans23, trans45, trans67)
28602860
MACRO_UNPACK_V2(64, trans0, trans2, trans01, trans23, trans4, trans0, /*trans6,*/ trans45, trans67, trans02, trans0123)
2861-
2861+
28622862
xmm_offset = _mm_loadu_si128((__m128i *)(TransformIntrinConst + TRANS8x8_OFFSET_256));
28632863

28642864
MACRO_TRANS_8MAC_PF_N2(trans0, trans02, trans01, trans0123, trans4, trans45, trans45, trans45, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, TRANS8x8_OFFSET_64_64, 9, _mm_storeu_si128, transformCoefficients, 0)

Source/Lib/ASM_SSE2/x64inc.asm

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,30 @@
1-
;
1+
;
22
; Copyright(c) 2018 Intel Corporation
33
; SPDX - License - Identifier: BSD - 2 - Clause - Patent
4-
;
4+
;
5+
6+
%undef WIN64
7+
%undef UNIX64
8+
9+
%ifidn __OUTPUT_FORMAT__,win32
10+
%define WIN64
11+
%elifidn __OUTPUT_FORMAT__,win64
12+
%define WIN64
13+
%elifidn __OUTPUT_FORMAT__,x64
14+
%define WIN64
15+
%else
16+
%define UNIX64
17+
%endif
18+
19+
%ifidn __OUTPUT_FORMAT__,macho32
20+
%define FORMAT_MACHO
21+
%elifidn __OUTPUT_FORMAT__,macho64
22+
%define FORMAT_MACHO
23+
%endif
24+
25+
%ifdef FORMAT_MACHO
26+
%define PREFIX
27+
%endif
528

629
%ifdef PREFIX
730
%define mangle(x) _ %+ x
@@ -310,7 +333,7 @@ bits 64
310333
ADD_RSP 16
311334
movdqu xmm11, [rsp]
312335
ADD_RSP 16
313-
movdqu xmm10, [rsp]
336+
movdqu xmm10, [rsp]
314337
ADD_RSP 16
315338
movdqu xmm9, [rsp]
316339
ADD_RSP 16

0 commit comments

Comments
 (0)