Skip to content

Commit 99e8081

Browse files
authored
Merge pull request #137 from AcademySoftwareFoundation/update_oapv_itrans_diff
s16 datatype for oapv_itrans_diff table and changes to oapv_adjust_itrans_avx()
2 parents 9328d09 + 108d7ab commit 99e8081

File tree

4 files changed

+682
-654
lines changed

4 files changed

+682
-654
lines changed

src/avx/oapv_tq_avx.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -532,18 +532,27 @@ const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2] =
532532

533533
void oapv_adjust_itrans_avx(int* src, int* dst, int itrans_diff_idx, int diff_step, int shift)
534534
{
535-
__m256i v0 = _mm256_set1_epi32(diff_step);
536-
__m256i v1 = _mm256_set1_epi32(1 << (shift - 1));
537-
__m256i s0, s1;
535+
__m256i v0 = _mm256_set1_epi32((1 << 16) | (diff_step & 0xffff));
536+
__m256i v1 = _mm256_set1_epi16(1 << (shift - 1));
537+
__m256i s0, s1, d, d0, d1;
538538

539-
for (int j = 0; j < 64; j += 8) {
539+
for (int j = 0; j < 64; j += 16)
540+
{
540541
s0 = _mm256_loadu_si256((const __m256i*)(src + j));
541-
s1 = _mm256_loadu_si256((const __m256i*)(oapv_itrans_diff[itrans_diff_idx] + j));
542-
s1 = _mm256_mullo_epi32(s1, v0);
543-
s1 = _mm256_add_epi32(s1, v1);
544-
s1 = _mm256_srai_epi32(s1, shift);
545-
s1 = _mm256_add_epi32(s0, s1);
546-
_mm256_storeu_si256((__m256i*)(dst + j), s1);
542+
d = _mm256_loadu_si256((const __m256i*)(oapv_itrans_diff[itrans_diff_idx] + j));
543+
544+
d0 = _mm256_unpacklo_epi16(d, v1);
545+
d0 = _mm256_madd_epi16(d0, v0);
546+
d0 = _mm256_srai_epi32(d0, shift);
547+
d0 = _mm256_add_epi32(s0, d0);
548+
_mm256_storeu_si256((__m256i*)(dst + j), d0);
549+
550+
s1 = _mm256_loadu_si256((const __m256i*)(src + j + 8));
551+
d1 = _mm256_unpackhi_epi16(d, v1);
552+
d1 = _mm256_madd_epi16(d1, v0);
553+
d1 = _mm256_srai_epi32(d1, shift);
554+
d1 = _mm256_add_epi32(s1, d1);
555+
_mm256_storeu_si256((__m256i*)(dst + j + 8), d1);
547556
}
548557
}
549558

0 commit comments

Comments
 (0)