Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 106 additions & 19 deletions libvmaf/src/feature/integer_adm.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

#if ARCH_X86
#include "x86/adm_avx2.h"
#if HAVE_AVX512
#include "x86/adm_avx512.h"
#endif
#elif ARCH_AARCH64
#include "arm64/adm_neon.h"
#include <arm_neon.h>
Expand All @@ -41,6 +44,30 @@ typedef struct AdmState {
void (*dwt2_8)(const uint8_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride);
void (*dwt2_16)(const uint16_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride, int inp_size_bits);
void (*adm_decouple)(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit);
float (*adm_csf_den_scale)(const adm_dwt_band_t *src, int w, int h,
int src_stride, double adm_norm_view_dist,
int adm_ref_display_height);
void (*adm_csf)(AdmBuffer *buf, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height);
float (*adm_cm)(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride,
double adm_norm_view_dist, int adm_ref_display_height);
void (*adm_dwt2_s123_combined)(const int32_t *i4_ref_scale, const int32_t *i4_curr_dis,
AdmBuffer *buf, int w, int h, int ref_stride,
int dis_stride, int dst_stride, int scale);
float (*adm_csf_den_s123)(const i4_adm_dwt_band_t *src, int scale, int w, int h,
int src_stride, double adm_norm_view_dist,
int adm_ref_display_height);
void (*i4_adm_csf)(AdmBuffer *buf, int scale, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height);
float (*i4_adm_cm)(AdmBuffer *buf, int w, int h, int src_stride,
int csf_a_stride, int scale, double adm_norm_view_dist,
int adm_ref_display_height);

VmafDictionary *feature_name_dict;
} AdmState;

Expand Down Expand Up @@ -657,7 +684,7 @@ static void dwt2_src_indices_filt(int **src_ind_y, int **src_ind_x, int w, int h
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))

static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
static inline void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit)
{
const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);
Expand All @@ -684,7 +711,6 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
if (bottom > h) {
bottom = h;
}

int64_t ot_dp, o_mag_sq, t_mag_sq;

for (int i = top; i < bottom; ++i) {
Expand Down Expand Up @@ -722,6 +748,7 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
o_mag_sq = (int64_t)oh * oh + (int64_t)ov * ov;
t_mag_sq = (int64_t)th * th + (int64_t)tv * tv;


/**
* angle_flag is calculated in floating-point by converting fixed-point variables back to
* floating-point
Expand All @@ -745,6 +772,7 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
int32_t kv = tmp_kv < 0 ? 0 : (tmp_kv > 32768 ? 32768 : tmp_kv);
int32_t kd = tmp_kd < 0 ? 0 : (tmp_kd > 32768 ? 32768 : tmp_kd);


/**
* kh,kv,kd are in Q15 type and oh,ov,od are in Q16 type hence shifted by
* 15 to make result Q16
Expand Down Expand Up @@ -903,12 +931,12 @@ static void adm_decouple_s123(AdmBuffer *buf, int w, int h, int stride,

rst_h = ((kh * oh) + 16384) >> 15;
rst_v = ((kv * ov) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;

const float rst_h_f = ((float)kh / 32768) * ((float)oh / 64);
const float rst_v_f = ((float)kv / 32768) * ((float)ov / 64);
const float rst_d_f = ((float)kd / 32768) * ((float)od / 64);

if (angle_flag && (rst_h_f > 0.)) rst_h = MIN((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_h_f < 0.)) rst_h = MAX((rst_h * adm_enhn_gain_limit), th);

Expand Down Expand Up @@ -1416,6 +1444,7 @@ static float adm_cm(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stri
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;

for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
Expand Down Expand Up @@ -2429,7 +2458,7 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic
{
int w = ref_pic->w[0];
int h = ref_pic->h[0];

unsigned flags = vmaf_get_cpu_flags();
const double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0);

size_t curr_ref_stride;
Expand Down Expand Up @@ -2463,9 +2492,9 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic
curr_dis_stride, buf_stride);
}
else {
adm_dwt2_16(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
s->dwt2_16(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
curr_ref_stride, buf_stride, ref_pic->bpc);
adm_dwt2_16(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
s->dwt2_16(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
curr_dis_stride, buf_stride, dis_pic->bpc);
}

Expand All @@ -2475,34 +2504,63 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic
w = (w + 1) / 2;
h = (h + 1) / 2;

adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
adm_decouple_avx512(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);
Copy link
Collaborator

@kylophone kylophone Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and elsewhere. Please check the CPU flags during init and set optimized path via function pointer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done with a new version of integer_adm.c provided in the commit 91942c5

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good, thank you.

else
adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);
#else
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
adm_decouple_avx2(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);
else
adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);
#else
adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);
#endif
#endif

den_scale = adm_csf_den_scale(&buf->ref_dwt2, w, h, buf_stride,
den_scale = s->adm_csf_den_scale(&buf->ref_dwt2, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

adm_csf(buf, w, h, buf_stride, adm_norm_view_dist, adm_ref_display_height);
s->adm_csf(buf, w, h, buf_stride, adm_norm_view_dist, adm_ref_display_height);

num_scale = adm_cm(buf, w, h, buf_stride, buf_stride,
num_scale = s->adm_cm(buf, w, h, buf_stride, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
}
else {
adm_dwt2_s123_combined(i4_curr_ref_scale, i4_curr_dis_scale, buf, w, h, curr_ref_stride,
curr_dis_stride, buf_stride, scale);
s->adm_dwt2_s123_combined(i4_curr_ref_scale, i4_curr_dis_scale, buf, w, h, curr_ref_stride,
curr_dis_stride, buf_stride, scale);

w = (w + 1) / 2;
h = (h + 1) / 2;

adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
adm_decouple_s123_avx2(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);
else
adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
#else
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
adm_decouple_s123_avx2(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);
else
adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
#else
adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
#endif
#endif

den_scale = adm_csf_den_s123(
den_scale = s->adm_csf_den_s123(
&buf->i4_ref_dwt2, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

s->i4_adm_csf(buf, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

i4_adm_csf(buf, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
num_scale = s->i4_adm_cm(buf, w, h, buf_stride, buf_stride, scale,
adm_norm_view_dist, adm_ref_display_height);

num_scale = i4_adm_cm(buf, w, h, buf_stride, buf_stride, scale,
adm_norm_view_dist, adm_ref_display_height);
}

num += num_scale;
Expand Down Expand Up @@ -2593,12 +2651,41 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
}

s->dwt2_8 = adm_dwt2_8;
s->dwt2_16 = adm_dwt2_16;
s->adm_csf_den_scale = adm_csf_den_scale;
s->adm_csf = adm_csf;
s->adm_cm = adm_cm;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined;
s->adm_csf_den_s123 = adm_csf_den_s123;
s->i4_adm_csf = i4_adm_csf;
s->i4_adm_cm = i4_adm_cm;

#if ARCH_X86
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2) {
if (!(w % 8)) s->dwt2_8 = adm_dwt2_8_avx2;
s->dwt2_16 = adm_dwt2_16_avx2;
s->adm_csf_den_scale = adm_csf_den_scale_avx2;
s->adm_csf = adm_csf_avx2;
s->adm_cm = adm_cm_avx2;
s->adm_csf_den_s123 = adm_csf_den_s123_avx2;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined_avx2;
s->i4_adm_csf = i4_adm_csf_avx2;
s->i4_adm_cm = i4_adm_cm_avx2;
}
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512) {
s->dwt2_8 = adm_dwt2_8_avx512;
s->dwt2_16 = adm_dwt2_16_avx512;
s->adm_csf_den_scale = adm_csf_den_scale_avx512;
s->adm_csf = adm_csf_avx512;
s->adm_cm = adm_cm_avx512;
s->adm_csf_den_s123 = adm_csf_den_s123_avx512;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined_avx512;
s->i4_adm_csf = i4_adm_csf_avx512;
s->i4_adm_cm = i4_adm_cm_avx512;
}
#endif
#elif ARCH_AARCH64
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_ARM_CPU_FLAG_NEON) {
Expand Down
20 changes: 18 additions & 2 deletions libvmaf/src/feature/integer_motion.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,

MotionState *s = fex->priv;
int err = 0;
unsigned flags = vmaf_get_cpu_flags();

s->feature_name_dict =
vmaf_feature_name_dict_from_provided_features(fex->provided_features,
Expand All @@ -296,10 +297,17 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
if (err) goto fail;

s->y_convolution = bpc == 8 ? y_convolution_8 : y_convolution_16;
s->x_convolution = x_convolution_16;
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->y_convolution = bpc == 8 ? y_convolution_8_avx2 : y_convolution_16_avx2;
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
s->y_convolution = bpc == 8 ? y_convolution_8_avx512 : y_convolution_16_avx512;
#endif
#endif

s->x_convolution = x_convolution_16;
#if ARCH_X86
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->x_convolution = x_convolution_16_avx2;
#if HAVE_AVX512
Expand All @@ -309,6 +317,14 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
#endif

s->sad = sad_c;
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->sad = sad_avx2;
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
s->sad = sad_avx512;
#endif
#endif
s->score = 0.;

return 0;
Expand Down
2 changes: 2 additions & 0 deletions libvmaf/src/feature/integer_motion.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <stdbool.h>
#include <stdint.h>
#include "picture.h"
#include "cpu.h"

static const uint16_t filter[5] = { 3571, 16004, 26386, 16004, 3571 };
static const int filter_width = sizeof(filter) / sizeof(filter[0]);
Expand Down
1 change: 1 addition & 0 deletions libvmaf/src/feature/integer_vif.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <stdint.h>
#include <stdbool.h>
#include <assert.h>
#include "cpu.h"

/* Enhancement gain imposed on vif, must be >= 1.0, where 1.0 means the gain is completely disabled */
#ifndef DEFAULT_VIF_ENHN_GAIN_LIMIT
Expand Down
Loading
Loading