Skip to content

Commit f7f4d70

Browse files
authored
Merge branch 'master' into cuda-ptx-null-terminate
2 parents 85098c0 + 8b2582d commit f7f4d70

File tree

21 files changed

+747
-75
lines changed

21 files changed

+747
-75
lines changed

Makefile

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ VIRTUAL_ENV_PATH := $(VENV)/bin
99
PYTHON_INTERPRETER := python3.10
1010
VENV_PIP := $(VIRTUAL_ENV_PATH)/pip
1111
VENV_PYTHON := $(VIRTUAL_ENV_PATH)/python
12-
MESON_SETUP := $(VIRTUAL_ENV_PATH)/meson setup
12+
MESON := $(VIRTUAL_ENV_PATH)/meson
13+
MESON_SETUP := $(MESON) setup
1314
NINJA := $(VIRTUAL_ENV_PATH)/ninja
1415

1516
# Build types and options
@@ -22,31 +23,31 @@ LIBVMAF_DIR := libvmaf
2223
BUILD_DIR := $(LIBVMAF_DIR)/build
2324
DEBUG_DIR := $(LIBVMAF_DIR)/debug
2425

25-
.PHONY: default all debug build install cythonize clean distclean
26+
.PHONY: default all debug build install cythonize clean distclean cythonize-deps
2627

2728
default: build
2829

2930
all: build debug install test cythonize
3031

31-
$(BUILD_DIR): $(VENV)
32+
$(BUILD_DIR): $(MESON) $(NINJA)
3233
PATH="$(VENV)/bin:$$PATH" $(MESON_SETUP) $(BUILD_DIR) $(LIBVMAF_DIR) $(BUILDTYPE_RELEASE) $(ENABLE_FLOAT)
3334

34-
$(DEBUG_DIR): $(VENV)
35+
$(DEBUG_DIR): $(MESON) $(NINJA)
3536
PATH="$(VENV)/bin:$$PATH" $(MESON_SETUP) $(DEBUG_DIR) $(LIBVMAF_DIR) $(BUILDTYPE_DEBUG) $(ENABLE_FLOAT)
3637

37-
cythonize: $(VENV)
38+
cythonize: cythonize-deps
3839
pushd python && ../$(VENV_PYTHON) setup.py build_ext --build-lib . && popd || exit 1
3940

40-
build: $(BUILD_DIR) $(VENV)
41+
build: $(BUILD_DIR) $(NINJA)
4142
PATH="$(VENV)/bin:$$PATH" $(NINJA) -vC $(BUILD_DIR)
4243

43-
test: build $(VENV)
44+
test: build $(NINJA)
4445
PATH="$(VENV)/bin:$$PATH" $(NINJA) -vC $(BUILD_DIR) test
4546

46-
debug: $(DEBUG_DIR) $(VENV)
47+
debug: $(DEBUG_DIR) $(NINJA)
4748
PATH="$(VENV)/bin:$$PATH" $(NINJA) -vC $(DEBUG_DIR)
4849

49-
install: $(BUILD_DIR) $(VENV)
50+
install: $(BUILD_DIR) $(NINJA)
5051
PATH="$(VENV)/bin:$$PATH" $(NINJA) -vC $(BUILD_DIR) install
5152

5253
clean:
@@ -57,10 +58,17 @@ distclean: clean
5758
rm -rf $(VENV)
5859

5960
# Set up or rebuild virtual environment
60-
$(VENV):
61+
$(VENV_PIP):
6162
@echo "Setting up the virtual environment..."
62-
@set -e; \
63-
$(PYTHON_INTERPRETER) -m venv $(VENV) || { echo "Failed to create virtual environment"; exit 1; }; \
64-
$(VENV_PIP) install --upgrade pip || { echo "Failed to upgrade pip"; exit 1; }; \
65-
$(VENV_PIP) install meson ninja cython numpy || { echo "Failed to install dependencies"; exit 1; }
66-
@echo "Virtual environment setup complete."
63+
$(PYTHON_INTERPRETER) -m venv $(VENV) || { echo "Failed to create virtual environment"; exit 1; }
64+
$(VENV_PIP) install --upgrade pip || { echo "Failed to upgrade pip"; exit 1; }
65+
@echo "Virtual environment setup complete."
66+
67+
$(MESON): $(VENV_PIP)
68+
$(VENV_PIP) install meson || { echo "Failed to install meson"; exit 1; }
69+
70+
$(NINJA): $(VENV_PIP)
71+
$(VENV_PIP) install ninja || { echo "Failed to install ninja"; exit 1; }
72+
73+
cythonize-deps: $(VENV_PIP)
74+
$(VENV_PIP) install setuptools cython numpy || { echo "Failed to install dependencies"; exit 1; }

libvmaf/meson.build

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ libvmaf_inc = include_directories(['include'])
2020

2121
# Arguments in test_args will be used even on feature tests
2222
test_args = []
23-
if host_machine.system() == 'linux' or host_machine.system() == 'windows'
23+
if host_machine.system() == 'linux' or host_machine.system() == 'windows' or host_machine.system() == 'cygwin'
2424
test_args += '-D_GNU_SOURCE'
25-
add_project_arguments('-D_GNU_SOURCE', language: 'c')
25+
add_project_arguments('-D_GNU_SOURCE', language: ['c', 'cpp'])
2626
elif host_machine.system() == 'darwin'
2727
test_args += '-D_DARWIN_C_SOURCE'
28-
add_project_arguments('-D_DARWIN_C_SOURCE', language: 'c')
28+
add_project_arguments('-D_DARWIN_C_SOURCE', language: ['c', 'cpp'])
2929
endif
3030

3131
# Header checks

libvmaf/src/cuda/picture_cuda.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "picture_cuda.h"
2323
#include "common.h"
2424
#include "log.h"
25+
#include "ref.h"
2526

2627
#include <cuda.h>
2728
#include <errno.h>
@@ -41,7 +42,7 @@ int vmaf_cuda_picture_download_async(VmafPicture *cuda_pic, VmafPicture *pic,
4142

4243
VmafPicturePrivate *cuda_priv = cuda_pic->priv;
4344
for (int i = 0; i < 3; i++) {
44-
m.srcDevice = cuda_pic->data[i];
45+
m.srcDevice = (CUdeviceptr)cuda_pic->data[i];
4546
m.srcPitch = cuda_pic->stride[i];
4647
m.dstHost = pic->data[i];
4748
m.dstPitch = pic->stride[i];
@@ -68,7 +69,7 @@ int vmaf_cuda_picture_upload_async(VmafPicture *cuda_pic,
6869
for (int i = 0; i < 3; i++) {
6970
m.srcHost = pic->data[i];
7071
m.srcPitch = pic->stride[i];
71-
m.dstDevice = cuda_pic->data[i];
72+
m.dstDevice = (CUdeviceptr)cuda_pic->data[i];
7273
m.dstPitch = cuda_pic->stride[i];
7374
m.WidthInBytes = cuda_pic->w[i] * ((pic->bpc + 7) / 8);
7475
m.Height = cuda_pic->h[i];
@@ -84,6 +85,7 @@ int vmaf_cuda_picture_upload_async(VmafPicture *cuda_pic,
8485

8586
static int default_release_pinned_picture(VmafPicture *pic, void *cookie)
8687
{
88+
(void)cookie;
8789
if (!pic) return -EINVAL;
8890

8991
VmafPicturePrivate* priv = pic->priv;
@@ -197,7 +199,7 @@ int vmaf_cuda_picture_alloc(VmafPicture *pic, void *cookie)
197199
pic->data[1] = pic->data[2] = NULL;
198200
break;
199201
}
200-
CHECK_CUDA(cuMemAllocPitch(&pic->data[i], &pic->stride[i],
202+
CHECK_CUDA(cuMemAllocPitch((CUdeviceptr*)&pic->data[i], &pic->stride[i],
201203
pic->w[i] * ((pic->bpc + 7) / 8), pic->h[i],
202204
8 << hbd));
203205
}
@@ -223,7 +225,7 @@ int vmaf_cuda_picture_free(VmafPicture *pic, void *cookie)
223225

224226
for (int i = 0; i < 3; i++) {
225227
if (pic->data[i])
226-
CHECK_CUDA(cuMemFreeAsync(pic->data[i], priv->cuda.str));
228+
CHECK_CUDA(cuMemFreeAsync((CUdeviceptr)pic->data[i], priv->cuda.str));
227229
}
228230

229231
CHECK_CUDA(cuEventDestroy(priv->cuda.finished));

libvmaf/src/feature/adm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static char *init_dwt_band(adm_dwt_band_t *band, char *data_top, size_t buf_sz_o
4949
return data_top;
5050
}
5151

52-
__attribute__((unused))
52+
UNUSED_FUNCTION
5353
static char *init_dwt_band_d(adm_dwt_band_t_d *band, char *data_top, size_t buf_sz_one)
5454
{
5555
band->band_a = (double *)data_top; data_top += buf_sz_one;

libvmaf/src/feature/adm_tools.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ static const float dwt_7_9_basis_function_amplitudes[6][4] = {
329329
* lambda = 0 (finest scale), 1, 2, 3 (coarsest scale);
330330
* theta = 0 (ll), 1 (lh - vertical), 2 (hh - diagonal), 3(hl - horizontal).
331331
*/
332-
static FORCE_INLINE inline float dwt_quant_step(const struct dwt_model_params *params,
332+
static FORCE_INLINE float dwt_quant_step(const struct dwt_model_params *params,
333333
int lambda, int theta, double adm_norm_view_dist, int adm_ref_display_height)
334334
{
335335
// Formula (1), page 1165 - display visual resolution (DVR), in pixels/degree of visual angle. This should be 56.55

libvmaf/src/feature/arm64/vif_neon.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
99
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
1010

11-
static FORCE_INLINE inline void
11+
static FORCE_INLINE void
1212
pad_top_and_bottom(VifBuffer buf, unsigned h, int fwidth)
1313
{
1414
const unsigned fwidth_half = fwidth / 2;
@@ -27,7 +27,7 @@ pad_top_and_bottom(VifBuffer buf, unsigned h, int fwidth)
2727
}
2828
}
2929

30-
static FORCE_INLINE inline void
30+
static FORCE_INLINE void
3131
decimate_and_pad(VifBuffer buf, unsigned w, unsigned h, int scale)
3232
{
3333
uint16_t *ref = buf.ref;

libvmaf/src/feature/cambi.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ enum CambiTVIBisectFlag {
227227
CAMBI_TVI_BISECT_TOO_BIG
228228
};
229229

230-
static FORCE_INLINE inline int clip(int value, int low, int high) {
230+
static FORCE_INLINE int clip(int value, int low, int high) {
231231
return value < low ? low : (value > high ? high : value);
232232
}
233233

@@ -282,7 +282,7 @@ static int get_tvi_for_diff(int diff, double tvi_threshold, int bitdepth, VmafLu
282282
}
283283
}
284284

285-
static FORCE_INLINE inline void adjust_window_size(uint16_t *window_size,
285+
static FORCE_INLINE void adjust_window_size(uint16_t *window_size,
286286
unsigned input_width,
287287
unsigned input_height)
288288
{
@@ -725,7 +725,7 @@ static void filter_mode(const VmafPicture *image, int width, int height, uint16_
725725
}
726726
}
727727

728-
static FORCE_INLINE inline uint16_t ceil_log2(uint32_t num) {
728+
static FORCE_INLINE uint16_t ceil_log2(uint32_t num) {
729729
if (num==0)
730730
return 0;
731731

@@ -738,7 +738,7 @@ static FORCE_INLINE inline uint16_t ceil_log2(uint32_t num) {
738738
return shift;
739739
}
740740

741-
static FORCE_INLINE inline uint16_t get_mask_index(unsigned input_width, unsigned input_height,
741+
static FORCE_INLINE uint16_t get_mask_index(unsigned input_width, unsigned input_height,
742742
uint16_t filter_size) {
743743
uint32_t shifted_wh = (input_width >> 6) * (input_height >> 6);
744744
return (filter_size * filter_size + 3 * (ceil_log2(shifted_wh) - 11) - 1)>>1;
@@ -853,7 +853,7 @@ static float c_value_pixel(const uint16_t *histograms, uint16_t value, const int
853853
return c_value;
854854
}
855855

856-
static FORCE_INLINE inline void update_histogram_subtract_edge(uint16_t *histograms, uint16_t *image, uint16_t *mask,
856+
static FORCE_INLINE void update_histogram_subtract_edge(uint16_t *histograms, uint16_t *image, uint16_t *mask,
857857
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
858858
const uint16_t num_diffs, VmafRangeUpdater dec_range_callback) {
859859
uint16_t mask_val = mask[(i - pad_size - 1) * stride + j];
@@ -863,7 +863,7 @@ static FORCE_INLINE inline void update_histogram_subtract_edge(uint16_t *histogr
863863
}
864864
}
865865

866-
static FORCE_INLINE inline void update_histogram_subtract(uint16_t *histograms, uint16_t *image, uint16_t *mask,
866+
static FORCE_INLINE void update_histogram_subtract(uint16_t *histograms, uint16_t *image, uint16_t *mask,
867867
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
868868
const uint16_t num_diffs, VmafRangeUpdater dec_range_callback) {
869869
uint16_t mask_val = mask[(i - pad_size - 1) * stride + j];
@@ -873,7 +873,7 @@ static FORCE_INLINE inline void update_histogram_subtract(uint16_t *histograms,
873873
}
874874
}
875875

876-
static FORCE_INLINE inline void update_histogram_add_edge(uint16_t *histograms, uint16_t *image, uint16_t *mask,
876+
static FORCE_INLINE void update_histogram_add_edge(uint16_t *histograms, uint16_t *image, uint16_t *mask,
877877
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
878878
const uint16_t num_diffs, VmafRangeUpdater inc_range_callback) {
879879
uint16_t mask_val = mask[(i + pad_size) * stride + j];
@@ -883,7 +883,7 @@ static FORCE_INLINE inline void update_histogram_add_edge(uint16_t *histograms,
883883
}
884884
}
885885

886-
static FORCE_INLINE inline void update_histogram_add(uint16_t *histograms, uint16_t *image, uint16_t *mask,
886+
static FORCE_INLINE void update_histogram_add(uint16_t *histograms, uint16_t *image, uint16_t *mask,
887887
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
888888
const uint16_t num_diffs, VmafRangeUpdater inc_range_callback) {
889889
uint16_t mask_val = mask[(i + pad_size) * stride + j];
@@ -893,7 +893,7 @@ static FORCE_INLINE inline void update_histogram_add(uint16_t *histograms, uint1
893893
}
894894
}
895895

896-
static FORCE_INLINE inline void update_histogram_add_edge_first_pass(uint16_t *histograms, uint16_t *image, uint16_t *mask,
896+
static FORCE_INLINE void update_histogram_add_edge_first_pass(uint16_t *histograms, uint16_t *image, uint16_t *mask,
897897
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
898898
const uint16_t num_diffs, VmafRangeUpdater inc_range_callback) {
899899
uint16_t mask_val = mask[i * stride + j];
@@ -903,7 +903,7 @@ static FORCE_INLINE inline void update_histogram_add_edge_first_pass(uint16_t *h
903903
}
904904
}
905905

906-
static FORCE_INLINE inline void update_histogram_add_first_pass(uint16_t *histograms, uint16_t *image, uint16_t *mask,
906+
static FORCE_INLINE void update_histogram_add_first_pass(uint16_t *histograms, uint16_t *image, uint16_t *mask,
907907
int i, int j, int width, ptrdiff_t stride, uint16_t pad_size,
908908
const uint16_t num_diffs, VmafRangeUpdater inc_range_callback) {
909909
uint16_t mask_val = mask[i * stride + j];
@@ -913,7 +913,7 @@ static FORCE_INLINE inline void update_histogram_add_first_pass(uint16_t *histog
913913
}
914914
}
915915

916-
static FORCE_INLINE inline void calculate_c_values_row(float *c_values, uint16_t *histograms, uint16_t *image,
916+
static FORCE_INLINE void calculate_c_values_row(float *c_values, uint16_t *histograms, uint16_t *image,
917917
uint16_t *mask, int row, int width, ptrdiff_t stride,
918918
const uint16_t num_diffs, const uint16_t *tvi_for_diff,
919919
const int *diff_weights, const int *all_diffs) {
@@ -1050,13 +1050,13 @@ static double spatial_pooling(float *c_values, double topk, unsigned width, unsi
10501050
return average_topk_elements(c_values, topk_num_elements);
10511051
}
10521052

1053-
static FORCE_INLINE inline uint16_t get_pixels_in_window(uint16_t window_length) {
1053+
static FORCE_INLINE uint16_t get_pixels_in_window(uint16_t window_length) {
10541054
uint16_t odd_length = 2 * (window_length >> 1) + 1;
10551055
return odd_length * odd_length;
10561056
}
10571057

10581058
// Inner product weighting scores for each scale
1059-
static FORCE_INLINE inline double weight_scores_per_scale(double *scores_per_scale, uint16_t normalization) {
1059+
static FORCE_INLINE double weight_scores_per_scale(double *scores_per_scale, uint16_t normalization) {
10601060
double score = 0.0;
10611061
for (unsigned scale = 0; scale < NUM_SCALES; scale++)
10621062
score += (scores_per_scale[scale] * g_scale_weights[scale]);

libvmaf/src/feature/common/convolution_internal.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "macros.h"
2525
#include <stdbool.h>
2626

27-
FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
27+
FORCE_INLINE float convolution_edge_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
2828
{
2929
int radius = filter_width / 2;
3030

@@ -51,7 +51,7 @@ FORCE_INLINE inline float convolution_edge_s(bool horizontal, const float *filte
5151
return accum;
5252
}
5353

54-
FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
54+
FORCE_INLINE float convolution_edge_sq_s(bool horizontal, const float *filter, int filter_width, const float *src, int width, int height, int stride, int i, int j)
5555
{
5656
int radius = filter_width / 2;
5757

@@ -80,7 +80,7 @@ FORCE_INLINE inline float convolution_edge_sq_s(bool horizontal, const float *fi
8080
return accum;
8181
}
8282

83-
FORCE_INLINE inline float convolution_edge_xy_s(bool horizontal, const float *filter, int filter_width, const float *src1, const float *src2, int width, int height, int stride1, int stride2, int i, int j)
83+
FORCE_INLINE float convolution_edge_xy_s(bool horizontal, const float *filter, int filter_width, const float *src1, const float *src2, int width, int height, int stride1, int stride2, int i, int j)
8484
{
8585
int radius = filter_width / 2;
8686

libvmaf/src/feature/common/macros.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
#define MACROS_H_
2121

2222
#if defined(_MSC_VER)
23-
#define FORCE_INLINE __forceinline
23+
#define FORCE_INLINE __forceinline
24+
#define UNUSED_FUNCTION /**/
2425
#else
25-
#define FORCE_INLINE __attribute__((always_inline))
26+
#define FORCE_INLINE __attribute__((always_inline)) inline
27+
#define UNUSED_FUNCTION __attribute__((unused))
2628
#endif
2729
#define RESTRICT __restrict
2830

libvmaf/src/feature/cuda/integer_adm_cuda.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -838,14 +838,14 @@ static void integer_compute_adm_cuda(VmafFeatureExtractor *fex, AdmStateCuda *s,
838838
// consumes reference picture
839839
// produces buf->ref_dwt2, buf->dis_dwt2
840840
if (ref_pic->bpc == 8) {
841-
dwt2_8_device(s, (const uint8_t*)ref_pic->data[0], &buf->ref_dwt2, buf->i4_ref_dwt2, (int16_t*)buf->tmp_ref->data, buf, w, h, curr_ref_stride, buf_stride, &p, vmaf_cuda_picture_get_stream(ref_pic));
841+
dwt2_8_device(s, (const uint8_t*)ref_pic->data[0], &buf->ref_dwt2, buf->i4_ref_dwt2, (short2*)buf->tmp_ref->data, buf, w, h, curr_ref_stride, buf_stride, &p, vmaf_cuda_picture_get_stream(ref_pic));
842842

843-
dwt2_8_device(s, (const uint8_t*)dis_pic->data[0], &buf->dis_dwt2, buf->i4_dis_dwt2, (int16_t*)buf->tmp_dis->data, buf, w, h, curr_dis_stride, buf_stride, &p, vmaf_cuda_picture_get_stream(dis_pic));
843+
dwt2_8_device(s, (const uint8_t*)dis_pic->data[0], &buf->dis_dwt2, buf->i4_dis_dwt2, (short2*)buf->tmp_dis->data, buf, w, h, curr_dis_stride, buf_stride, &p, vmaf_cuda_picture_get_stream(dis_pic));
844844
}
845845
else {
846-
adm_dwt2_16_device(s,(uint16_t*)ref_pic->data[0], &buf->ref_dwt2, buf->i4_ref_dwt2, (int16_t*)buf->tmp_ref->data, buf, w, h, curr_ref_stride, buf_stride, ref_pic->bpc, &p, vmaf_cuda_picture_get_stream(ref_pic));
846+
adm_dwt2_16_device(s,(uint16_t*)ref_pic->data[0], &buf->ref_dwt2, buf->i4_ref_dwt2, (short2*)buf->tmp_ref->data, buf, w, h, curr_ref_stride, buf_stride, ref_pic->bpc, &p, vmaf_cuda_picture_get_stream(ref_pic));
847847

848-
adm_dwt2_16_device(s,(uint16_t*)dis_pic->data[0], &buf->dis_dwt2, buf->i4_dis_dwt2, (int16_t*)buf->tmp_dis->data, buf, w, h, curr_dis_stride, buf_stride, dis_pic->bpc, &p, vmaf_cuda_picture_get_stream(dis_pic));
848+
adm_dwt2_16_device(s,(uint16_t*)dis_pic->data[0], &buf->dis_dwt2, buf->i4_dis_dwt2, (short2*)buf->tmp_dis->data, buf, w, h, curr_dis_stride, buf_stride, dis_pic->bpc, &p, vmaf_cuda_picture_get_stream(dis_pic));
849849

850850
}
851851
CHECK_CUDA(cuEventRecord(s->ref_event, vmaf_cuda_picture_get_stream(ref_pic)));

0 commit comments

Comments
 (0)