diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdeffc1..06396fa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,6 +5,7 @@ on: branches: [ "main" ] pull_request: branches: [ "main" ] + workflow_dispatch: jobs: build-linux-amd64: @@ -68,6 +69,25 @@ jobs: cmake -S ${{github.workspace}} -B ${{github.workspace}}/build-arm -DCMAKE_TOOLCHAIN_FILE=${{github.workspace}}/arm64_toolchain.cmake -DCMAKE_BUILD_TYPE=Release cmake --build ${{github.workspace}}/build-arm + build-linux-s390x: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Set up CMake + uses: jwlawson/actions-setup-cmake@v2 + + - name: Install s390x toolchain + run: | + sudo apt-get update + sudo apt-get install -y gcc-s390x-linux-gnu g++-s390x-linux-gnu binutils-s390x-linux-gnu libc6-s390x-cross + + - name: Build (crosscompile) PC Linux s390x + run: | + cmake -S ${{github.workspace}} -B ${{github.workspace}}/build-s390x -DCMAKE_SYSTEM_NAME=Linux -DCMAKE_SYSTEM_PROCESSOR=s390x -DCMAKE_C_COMPILER=s390x-linux-gnu-gcc -DCMAKE_CXX_COMPILER=s390x-linux-gnu-g++ -DCMAKE_BUILD_TYPE=Release + cmake --build ${{github.workspace}}/build-s390x + build-darwin-arm64: runs-on: macos-latest steps: @@ -124,3 +144,29 @@ jobs: with: name: LastTest.log path: ${{github.workspace}}/build/Testing/Temporary/LastTest.log + + test-linux-s390x: + runs-on: ubuntu-latest + env: + QEMU_LD_PREFIX: /usr/s390x-linux-gnu + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Set up CMake + uses: jwlawson/actions-setup-cmake@v2 + + - name: Install s390x toolchain and emulator + run: | + sudo apt-get update + sudo apt-get install -y gcc-s390x-linux-gnu g++-s390x-linux-gnu binutils-s390x-linux-gnu libc6-s390x-cross qemu-user qemu-user-static + + - name: Build (crosscompile) PC Linux s390x for tests + run: | + cmake -S ${{github.workspace}} -B ${{github.workspace}}/build-s390x -DCMAKE_SYSTEM_NAME=Linux -DCMAKE_SYSTEM_PROCESSOR=s390x -DCMAKE_C_COMPILER=s390x-linux-gnu-gcc -DCMAKE_CXX_COMPILER=s390x-linux-gnu-g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CROSSCOMPILING_EMULATOR="qemu-s390x;-L;/usr/s390x-linux-gnu" + cmake --build ${{github.workspace}}/build-s390x + + - name: Run tests for s390x + run: | + cd build-s390x + ctest --output-on-failure --timeout 15 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c568e1..a6315ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -272,7 +272,7 @@ set_tests_properties(encode PROPERTIES # Test - decode add_test(NAME decode COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i out.oapv) set_tests_properties(decode PROPERTIES - TIMEOUT 10 + TIMEOUT 20 DEPENDS encode FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 125" @@ -282,7 +282,7 @@ set_tests_properties(decode PROPERTIES # Test - decode qp_A.apv add_test(NAME decode_qp_A COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/qp_A.apv -v 3 --hash) set_tests_properties(decode_qp_A PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -291,7 +291,7 @@ set_tests_properties(decode_qp_A PROPERTIES # Test - decode qp_B.apv add_test(NAME decode_qp_B COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/qp_B.apv -v 3 --hash) set_tests_properties(decode_qp_B PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -300,7 +300,7 @@ set_tests_properties(decode_qp_B PROPERTIES # Test - decode qp_C.apv add_test(NAME decode_qp_C COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/qp_C.apv -v 3 --hash) set_tests_properties(decode_qp_C PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -309,7 +309,7 @@ set_tests_properties(decode_qp_C PROPERTIES # Test - decode qp_D.apv add_test(NAME decode_qp_D COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/qp_D.apv -v 3 --hash) set_tests_properties(decode_qp_D PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -318,7 +318,7 @@ set_tests_properties(decode_qp_D PROPERTIES # Test - decode qp_E.apv add_test(NAME decode_qp_E COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/qp_E.apv -v 3 --hash) set_tests_properties(decode_qp_E PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -327,7 +327,7 @@ set_tests_properties(decode_qp_E PROPERTIES # Test - decode syn_A.apv add_test(NAME decode_syn_A COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/syn_A.apv -v 3 --hash) set_tests_properties(decode_syn_A PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -336,7 +336,7 @@ set_tests_properties(decode_syn_A PROPERTIES # Test - decode syn_B.apv add_test(NAME decode_syn_B COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/syn_B.apv -v 3 --hash) set_tests_properties(decode_syn_B PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -345,7 +345,7 @@ set_tests_properties(decode_syn_B PROPERTIES # Test - decode tile_A.apv add_test(NAME decode_tile_A COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/tile_A.apv -v 3 --hash) set_tests_properties(decode_tile_A PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -354,7 +354,7 @@ set_tests_properties(decode_tile_A PROPERTIES # Test - decode tile_B.apv add_test(NAME decode_tile_B COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/tile_B.apv -v 3 --hash) set_tests_properties(decode_tile_B PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -363,7 +363,7 @@ set_tests_properties(decode_tile_B PROPERTIES # Test - decode tile_C.apv add_test(NAME decode_tile_C COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/tile_C.apv -v 3 --hash) set_tests_properties(decode_tile_C PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -372,7 +372,7 @@ set_tests_properties(decode_tile_C PROPERTIES # Test - decode tile_D.apv add_test(NAME decode_tile_D COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/tile_D.apv -v 3 --hash) set_tests_properties(decode_tile_D PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" @@ -381,7 +381,7 @@ set_tests_properties(decode_tile_D PROPERTIES # Test - decode tile_E.apv add_test(NAME decode_tile_E COMMAND ${CMAKE_CURRENT_BINARY_DIR}/bin/oapv_app_dec -i ${CMAKE_CURRENT_SOURCE_DIR}/test/bitstream/tile_E.apv -v 3 --hash) set_tests_properties(decode_tile_E PROPERTIES - TIMEOUT 10 + TIMEOUT 20 FAIL_REGULAR_EXPRESSION "Decoded frame count = 0" PASS_REGULAR_EXPRESSION "Decoded frame count = 3" PASS_REGULAR_EXPRESSION ".*hash:match.*" diff --git a/src/oapv_port.h b/src/oapv_port.h index a084bc1..3edeb2c 100644 --- a/src/oapv_port.h +++ b/src/oapv_port.h @@ -203,5 +203,51 @@ static __inline void oapv_mset_16b(s16 *dst, s16 v, int cnt) /* CPU information */ int oapv_get_num_cpu_cores(void); +/* Endianness handling */ +#if defined(__GNUC__) || defined(__clang__) +#define OAPV_SWAP16(x) __builtin_bswap16(x) +#define OAPV_SWAP32(x) __builtin_bswap32(x) +#define OAPV_SWAP64(x) __builtin_bswap64(x) +#elif defined(_MSC_VER) +#define OAPV_SWAP16(x) _byteswap_ushort(x) +#define OAPV_SWAP32(x) _byteswap_ulong(x) +#define OAPV_SWAP64(x) _byteswap_uint64(x) +#else +#define OAPV_SWAP16(x) ((((x) & 0xff00) >> 8) | (((x) & 0x00ff) << 8)) +#define OAPV_SWAP32(x) ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) +#define OAPV_SWAP64(x) ((((x) & 0xff00000000000000ull) >> 56) | \ + (((x) & 0x00ff000000000000ull) >> 40) | \ + (((x) & 0x0000ff0000000000ull) >> 24) | \ + (((x) & 0x000000ff00000000ull) >> 8) | \ + (((x) & 0x00000000ff000000ull) << 8) | \ + (((x) & 0x0000000000ff0000ull) << 24) | \ + (((x) & 0x000000000000ff00ull) << 40) | \ + (((x) & 0x00000000000000ffull) << 56)) +#endif + +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define OAPV_BIG_ENDIAN 1 +#define OAPV_CPU_TO_LE32(x) OAPV_SWAP32(x) +#define OAPV_CPU_TO_LE64(x) OAPV_SWAP64(x) +#define OAPV_LE32_TO_CPU(x) OAPV_SWAP32(x) +#define OAPV_LE64_TO_CPU(x) OAPV_SWAP64(x) +#define OAPV_CPU_TO_BE32(x) (x) +#define OAPV_CPU_TO_BE64(x) (x) +#define OAPV_BE32_TO_CPU(x) (x) +#define OAPV_BE64_TO_CPU(x) (x) +#else +// Little Endian (Default for x86/ARM) +#define OAPV_LITTLE_ENDIAN 1 +#define OAPV_CPU_TO_LE32(x) (x) +#define OAPV_CPU_TO_LE64(x) (x) +#define OAPV_LE32_TO_CPU(x) (x) +#define OAPV_LE64_TO_CPU(x) (x) +#define OAPV_CPU_TO_BE32(x) OAPV_SWAP32(x) +#define OAPV_CPU_TO_BE64(x) OAPV_SWAP64(x) +#define OAPV_BE32_TO_CPU(x) OAPV_SWAP32(x) +#define OAPV_BE64_TO_CPU(x) OAPV_SWAP64(x) +#endif + #endif /* _OAPV_PORT_H_ */ diff --git a/src/oapv_util.c b/src/oapv_util.c index f6a0cec..b369112 100644 --- a/src/oapv_util.c +++ b/src/oapv_util.c @@ -40,97 +40,109 @@ #define HH(x, y, z) (x ^ y ^ z) #define II(x, y, z) (y ^ (x | ~z)) -static void md5_trans(u32 *buf, u32 *msg) +static void md5_trans(u32 *buf, const u8 *msg) { register u32 a, b, c, d; +#if OAPV_BIG_ENDIAN + u32 x[16]; + int i; + const u32 *ptr = (const u32 *)msg; + for (i = 0; i < 16; i++) { + x[i] = OAPV_LE32_TO_CPU(ptr[i]); + } + const u32 *blk = x; +#else // Little Endian + const u32 *blk = (const u32 *)msg; +#endif + a = buf[0]; b = buf[1]; c = buf[2]; d = buf[3]; - MD5FUNC(FF, a, b, c, d, msg[0], 7, 0xd76aa478); /* 1 */ - MD5FUNC(FF, d, a, b, c, msg[1], 12, 0xe8c7b756); /* 2 */ - MD5FUNC(FF, c, d, a, b, msg[2], 17, 0x242070db); /* 3 */ - MD5FUNC(FF, b, c, d, a, msg[3], 22, 0xc1bdceee); /* 4 */ + MD5FUNC(FF, a, b, c, d, blk[0], 7, 0xd76aa478); /* 1 */ + MD5FUNC(FF, d, a, b, c, blk[1], 12, 0xe8c7b756); /* 2 */ + MD5FUNC(FF, c, d, a, b, blk[2], 17, 0x242070db); /* 3 */ + MD5FUNC(FF, b, c, d, a, blk[3], 22, 0xc1bdceee); /* 4 */ - MD5FUNC(FF, a, b, c, d, msg[4], 7, 0xf57c0faf); /* 5 */ - MD5FUNC(FF, d, a, b, c, msg[5], 12, 0x4787c62a); /* 6 */ - MD5FUNC(FF, c, d, a, b, msg[6], 17, 0xa8304613); /* 7 */ - MD5FUNC(FF, b, c, d, a, msg[7], 22, 0xfd469501); /* 8 */ + MD5FUNC(FF, a, b, c, d, blk[4], 7, 0xf57c0faf); /* 5 */ + MD5FUNC(FF, d, a, b, c, blk[5], 12, 0x4787c62a); /* 6 */ + MD5FUNC(FF, c, d, a, b, blk[6], 17, 0xa8304613); /* 7 */ + MD5FUNC(FF, b, c, d, a, blk[7], 22, 0xfd469501); /* 8 */ - MD5FUNC(FF, a, b, c, d, msg[8], 7, 0x698098d8); /* 9 */ - MD5FUNC(FF, d, a, b, c, msg[9], 12, 0x8b44f7af); /* 10 */ - MD5FUNC(FF, c, d, a, b, msg[10], 17, 0xffff5bb1); /* 11 */ - MD5FUNC(FF, b, c, d, a, msg[11], 22, 0x895cd7be); /* 12 */ + MD5FUNC(FF, a, b, c, d, blk[8], 7, 0x698098d8); /* 9 */ + MD5FUNC(FF, d, a, b, c, blk[9], 12, 0x8b44f7af); /* 10 */ + MD5FUNC(FF, c, d, a, b, blk[10], 17, 0xffff5bb1); /* 11 */ + MD5FUNC(FF, b, c, d, a, blk[11], 22, 0x895cd7be); /* 12 */ - MD5FUNC(FF, a, b, c, d, msg[12], 7, 0x6b901122); /* 13 */ - MD5FUNC(FF, d, a, b, c, msg[13], 12, 0xfd987193); /* 14 */ - MD5FUNC(FF, c, d, a, b, msg[14], 17, 0xa679438e); /* 15 */ - MD5FUNC(FF, b, c, d, a, msg[15], 22, 0x49b40821); /* 16 */ + MD5FUNC(FF, a, b, c, d, blk[12], 7, 0x6b901122); /* 13 */ + MD5FUNC(FF, d, a, b, c, blk[13], 12, 0xfd987193); /* 14 */ + MD5FUNC(FF, c, d, a, b, blk[14], 17, 0xa679438e); /* 15 */ + MD5FUNC(FF, b, c, d, a, blk[15], 22, 0x49b40821); /* 16 */ /* Round 2 */ - MD5FUNC(GG, a, b, c, d, msg[1], 5, 0xf61e2562); /* 17 */ - MD5FUNC(GG, d, a, b, c, msg[6], 9, 0xc040b340); /* 18 */ - MD5FUNC(GG, c, d, a, b, msg[11], 14, 0x265e5a51); /* 19 */ - MD5FUNC(GG, b, c, d, a, msg[0], 20, 0xe9b6c7aa); /* 20 */ - - MD5FUNC(GG, a, b, c, d, msg[5], 5, 0xd62f105d); /* 21 */ - MD5FUNC(GG, d, a, b, c, msg[10], 9, 0x2441453); /* 22 */ - MD5FUNC(GG, c, d, a, b, msg[15], 14, 0xd8a1e681); /* 23 */ - MD5FUNC(GG, b, c, d, a, msg[4], 20, 0xe7d3fbc8); /* 24 */ - - MD5FUNC(GG, a, b, c, d, msg[9], 5, 0x21e1cde6); /* 25 */ - MD5FUNC(GG, d, a, b, c, msg[14], 9, 0xc33707d6); /* 26 */ - MD5FUNC(GG, c, d, a, b, msg[3], 14, 0xf4d50d87); /* 27 */ - MD5FUNC(GG, b, c, d, a, msg[8], 20, 0x455a14ed); /* 28 */ - - MD5FUNC(GG, a, b, c, d, msg[13], 5, 0xa9e3e905); /* 29 */ - MD5FUNC(GG, d, a, b, c, msg[2], 9, 0xfcefa3f8); /* 30 */ - MD5FUNC(GG, c, d, a, b, msg[7], 14, 0x676f02d9); /* 31 */ - MD5FUNC(GG, b, c, d, a, msg[12], 20, 0x8d2a4c8a); /* 32 */ + MD5FUNC(GG, a, b, c, d, blk[1], 5, 0xf61e2562); /* 17 */ + MD5FUNC(GG, d, a, b, c, blk[6], 9, 0xc040b340); /* 18 */ + MD5FUNC(GG, c, d, a, b, blk[11], 14, 0x265e5a51); /* 19 */ + MD5FUNC(GG, b, c, d, a, blk[0], 20, 0xe9b6c7aa); /* 20 */ + + MD5FUNC(GG, a, b, c, d, blk[5], 5, 0xd62f105d); /* 21 */ + MD5FUNC(GG, d, a, b, c, blk[10], 9, 0x2441453); /* 22 */ + MD5FUNC(GG, c, d, a, b, blk[15], 14, 0xd8a1e681); /* 23 */ + MD5FUNC(GG, b, c, d, a, blk[4], 20, 0xe7d3fbc8); /* 24 */ + + MD5FUNC(GG, a, b, c, d, blk[9], 5, 0x21e1cde6); /* 25 */ + MD5FUNC(GG, d, a, b, c, blk[14], 9, 0xc33707d6); /* 26 */ + MD5FUNC(GG, c, d, a, b, blk[3], 14, 0xf4d50d87); /* 27 */ + MD5FUNC(GG, b, c, d, a, blk[8], 20, 0x455a14ed); /* 28 */ + + MD5FUNC(GG, a, b, c, d, blk[13], 5, 0xa9e3e905); /* 29 */ + MD5FUNC(GG, d, a, b, c, blk[2], 9, 0xfcefa3f8); /* 30 */ + MD5FUNC(GG, c, d, a, b, blk[7], 14, 0x676f02d9); /* 31 */ + MD5FUNC(GG, b, c, d, a, blk[12], 20, 0x8d2a4c8a); /* 32 */ /* Round 3 */ - MD5FUNC(HH, a, b, c, d, msg[5], 4, 0xfffa3942); /* 33 */ - MD5FUNC(HH, d, a, b, c, msg[8], 11, 0x8771f681); /* 34 */ - MD5FUNC(HH, c, d, a, b, msg[11], 16, 0x6d9d6122); /* 35 */ - MD5FUNC(HH, b, c, d, a, msg[14], 23, 0xfde5380c); /* 36 */ - - MD5FUNC(HH, a, b, c, d, msg[1], 4, 0xa4beea44); /* 37 */ - MD5FUNC(HH, d, a, b, c, msg[4], 11, 0x4bdecfa9); /* 38 */ - MD5FUNC(HH, c, d, a, b, msg[7], 16, 0xf6bb4b60); /* 39 */ - MD5FUNC(HH, b, c, d, a, msg[10], 23, 0xbebfbc70); /* 40 */ - - MD5FUNC(HH, a, b, c, d, msg[13], 4, 0x289b7ec6); /* 41 */ - MD5FUNC(HH, d, a, b, c, msg[0], 11, 0xeaa127fa); /* 42 */ - MD5FUNC(HH, c, d, a, b, msg[3], 16, 0xd4ef3085); /* 43 */ - MD5FUNC(HH, b, c, d, a, msg[6], 23, 0x4881d05); /* 44 */ - - MD5FUNC(HH, a, b, c, d, msg[9], 4, 0xd9d4d039); /* 45 */ - MD5FUNC(HH, d, a, b, c, msg[12], 11, 0xe6db99e5); /* 46 */ - MD5FUNC(HH, c, d, a, b, msg[15], 16, 0x1fa27cf8); /* 47 */ - MD5FUNC(HH, b, c, d, a, msg[2], 23, 0xc4ac5665); /* 48 */ + MD5FUNC(HH, a, b, c, d, blk[5], 4, 0xfffa3942); /* 33 */ + MD5FUNC(HH, d, a, b, c, blk[8], 11, 0x8771f681); /* 34 */ + MD5FUNC(HH, c, d, a, b, blk[11], 16, 0x6d9d6122); /* 35 */ + MD5FUNC(HH, b, c, d, a, blk[14], 23, 0xfde5380c); /* 36 */ + + MD5FUNC(HH, a, b, c, d, blk[1], 4, 0xa4beea44); /* 37 */ + MD5FUNC(HH, d, a, b, c, blk[4], 11, 0x4bdecfa9); /* 38 */ + MD5FUNC(HH, c, d, a, b, blk[7], 16, 0xf6bb4b60); /* 39 */ + MD5FUNC(HH, b, c, d, a, blk[10], 23, 0xbebfbc70); /* 40 */ + + MD5FUNC(HH, a, b, c, d, blk[13], 4, 0x289b7ec6); /* 41 */ + MD5FUNC(HH, d, a, b, c, blk[0], 11, 0xeaa127fa); /* 42 */ + MD5FUNC(HH, c, d, a, b, blk[3], 16, 0xd4ef3085); /* 43 */ + MD5FUNC(HH, b, c, d, a, blk[6], 23, 0x4881d05); /* 44 */ + + MD5FUNC(HH, a, b, c, d, blk[9], 4, 0xd9d4d039); /* 45 */ + MD5FUNC(HH, d, a, b, c, blk[12], 11, 0xe6db99e5); /* 46 */ + MD5FUNC(HH, c, d, a, b, blk[15], 16, 0x1fa27cf8); /* 47 */ + MD5FUNC(HH, b, c, d, a, blk[2], 23, 0xc4ac5665); /* 48 */ /* Round 4 */ - MD5FUNC(II, a, b, c, d, msg[0], 6, 0xf4292244); /* 49 */ - MD5FUNC(II, d, a, b, c, msg[7], 10, 0x432aff97); /* 50 */ - MD5FUNC(II, c, d, a, b, msg[14], 15, 0xab9423a7); /* 51 */ - MD5FUNC(II, b, c, d, a, msg[5], 21, 0xfc93a039); /* 52 */ - - MD5FUNC(II, a, b, c, d, msg[12], 6, 0x655b59c3); /* 53 */ - MD5FUNC(II, d, a, b, c, msg[3], 10, 0x8f0ccc92); /* 54 */ - MD5FUNC(II, c, d, a, b, msg[10], 15, 0xffeff47d); /* 55 */ - MD5FUNC(II, b, c, d, a, msg[1], 21, 0x85845dd1); /* 56 */ - - MD5FUNC(II, a, b, c, d, msg[8], 6, 0x6fa87e4f); /* 57 */ - MD5FUNC(II, d, a, b, c, msg[15], 10, 0xfe2ce6e0); /* 58 */ - MD5FUNC(II, c, d, a, b, msg[6], 15, 0xa3014314); /* 59 */ - MD5FUNC(II, b, c, d, a, msg[13], 21, 0x4e0811a1); /* 60 */ - - MD5FUNC(II, a, b, c, d, msg[4], 6, 0xf7537e82); /* 61 */ - MD5FUNC(II, d, a, b, c, msg[11], 10, 0xbd3af235); /* 62 */ - MD5FUNC(II, c, d, a, b, msg[2], 15, 0x2ad7d2bb); /* 63 */ - MD5FUNC(II, b, c, d, a, msg[9], 21, 0xeb86d391); /* 64 */ + MD5FUNC(II, a, b, c, d, blk[0], 6, 0xf4292244); /* 49 */ + MD5FUNC(II, d, a, b, c, blk[7], 10, 0x432aff97); /* 50 */ + MD5FUNC(II, c, d, a, b, blk[14], 15, 0xab9423a7); /* 51 */ + MD5FUNC(II, b, c, d, a, blk[5], 21, 0xfc93a039); /* 52 */ + + MD5FUNC(II, a, b, c, d, blk[12], 6, 0x655b59c3); /* 53 */ + MD5FUNC(II, d, a, b, c, blk[3], 10, 0x8f0ccc92); /* 54 */ + MD5FUNC(II, c, d, a, b, blk[10], 15, 0xffeff47d); /* 55 */ + MD5FUNC(II, b, c, d, a, blk[1], 21, 0x85845dd1); /* 56 */ + + MD5FUNC(II, a, b, c, d, blk[8], 6, 0x6fa87e4f); /* 57 */ + MD5FUNC(II, d, a, b, c, blk[15], 10, 0xfe2ce6e0); /* 58 */ + MD5FUNC(II, c, d, a, b, blk[6], 15, 0xa3014314); /* 59 */ + MD5FUNC(II, b, c, d, a, blk[13], 21, 0x4e0811a1); /* 60 */ + + MD5FUNC(II, a, b, c, d, blk[4], 6, 0xf7537e82); /* 61 */ + MD5FUNC(II, d, a, b, c, blk[11], 10, 0xbd3af235); /* 62 */ + MD5FUNC(II, c, d, a, b, blk[2], 15, 0x2ad7d2bb); /* 63 */ + MD5FUNC(II, b, c, d, a, blk[9], 21, 0xeb86d391); /* 64 */ buf[0] += a; buf[1] += b; @@ -168,10 +180,10 @@ static void md5_update(oapv_md5_t *md5, void *buf_t, u32 len) if(len >= part_len) { oapv_mcpy(md5->msg + idx, buf, part_len); - md5_trans(md5->h, (u32 *)md5->msg); + md5_trans(md5->h, md5->msg); for(i = part_len; i + 63 < len; i += 64) { - md5_trans(md5->h, (u32 *)(buf + i)); + md5_trans(md5->h, buf + i); } idx = 0; } @@ -186,43 +198,23 @@ static void md5_update(oapv_md5_t *md5, void *buf_t, u32 len) static void md5_update_16(oapv_md5_t *md5, void *buf_t, u32 len) { - u16 *buf; - u32 i, idx, part_len, j; - u8 t[512]; - - buf = (u16 *)buf_t; - idx = (u32)((md5->bits[0] >> 3) & 0x3f); - - len = len * 2; - for(j = 0; j < len; j += 2) { - t[j] = (u8)(*(buf)); - t[j + 1] = *(buf) >> 8; - buf++; - } - - md5->bits[0] += (len << 3); - if(md5->bits[0] < (len << 3)) { - (md5->bits[1])++; - } - - md5->bits[1] += (len >> 29); - part_len = 64 - idx; - - if(len >= part_len) { - oapv_mcpy(md5->msg + idx, t, part_len); - md5_trans(md5->h, (u32 *)md5->msg); - - for(i = part_len; i + 63 < len; i += 64) { - md5_trans(md5->h, (u32 *)(t + i)); + u16 *buf = (u16 *)buf_t; + u8 t[1024]; + u32 i, j, chunk_len; + + i = 0; + while(i < len) { + chunk_len = len - i; + if(chunk_len > 512) + chunk_len = 512; + + for(j = 0; j < chunk_len; j++) { + t[j * 2] = (u8)(buf[i + j]); + t[j * 2 + 1] = (u8)(buf[i + j] >> 8); } - idx = 0; - } - else { - i = 0; - } - if(len - i > 0) { - oapv_mcpy(md5->msg + idx, t + i, len - i); + md5_update(md5, t, chunk_len * 2); + i += chunk_len; } } @@ -238,18 +230,33 @@ static void md5_finish(oapv_md5_t *md5, u8 digest[16]) if(cnt < 8) { oapv_mset(pos, 0, cnt); - md5_trans(md5->h, (u32 *)md5->msg); + md5_trans(md5->h, md5->msg); oapv_mset(md5->msg, 0, 56); } else { oapv_mset(pos, 0, cnt - 8); } - oapv_mcpy((md5->msg + 14 * sizeof(u32)), &md5->bits[0], sizeof(u32)); - oapv_mcpy((md5->msg + 15 * sizeof(u32)), &md5->bits[1], sizeof(u32)); - - md5_trans(md5->h, (u32 *)md5->msg); - oapv_mcpy(digest, md5->h, 16); + /* Append length in bits - Little Endian */ + md5->msg[56] = (u8)(md5->bits[0]); + md5->msg[57] = (u8)(md5->bits[0] >> 8); + md5->msg[58] = (u8)(md5->bits[0] >> 16); + md5->msg[59] = (u8)(md5->bits[0] >> 24); + md5->msg[60] = (u8)(md5->bits[1]); + md5->msg[61] = (u8)(md5->bits[1] >> 8); + md5->msg[62] = (u8)(md5->bits[1] >> 16); + md5->msg[63] = (u8)(md5->bits[1] >> 24); + + md5_trans(md5->h, md5->msg); + + /* Store state in digest - Little Endian */ + for (int i=0; i<4; i++) { + digest[i*4+0] = (u8)(md5->h[i]); + digest[i*4+1] = (u8)(md5->h[i] >> 8); + digest[i*4+2] = (u8)(md5->h[i] >> 16); + digest[i*4+3] = (u8)(md5->h[i] >> 24); + } + oapv_mset(md5, 0, sizeof(oapv_md5_t)); } @@ -263,6 +270,8 @@ void oapv_imgb_set_md5(oapv_imgb_t *imgb) oapv_md5_t md5[N_C]; int i, j; + int b_depth = OAPV_CS_GET_BYTE_DEPTH(imgb->cs); + oapv_assert(imgb != NULL); memset(imgb->hash, 0, sizeof(imgb->hash)); @@ -270,7 +279,12 @@ void oapv_imgb_set_md5(oapv_imgb_t *imgb) md5_init(&md5[i]); for(j = 0; j < imgb->ah[i]; j++) { - md5_update(&md5[i], ((u8 *)imgb->a[i]) + j * imgb->s[i], imgb->aw[i] * 2); + if(b_depth >= 2) { + md5_update_16(&md5[i], ((u8 *)imgb->a[i]) + j * imgb->s[i], imgb->aw[i]); + } + else { + md5_update(&md5[i], ((u8 *)imgb->a[i]) + j * imgb->s[i], imgb->aw[i]); + } } md5_finish(&md5[i], imgb->hash[i]);