Skip to content

Commit 80db494

Browse files
committed
Merge branch 'aswild-cuda-ptx-null-terminate'
2 parents 4db7c0c + f7f4d70 commit 80db494

File tree

7 files changed

+23
-18
lines changed

7 files changed

+23
-18
lines changed

libvmaf/src/feature/cuda/integer_adm_cuda.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,11 +1023,11 @@ static int init_fex_cuda(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt
10231023
CUmodule adm_cm_module, adm_csf_den_module, adm_csf_module, adm_decouple_module, adm_dwt_module;
10241024

10251025

1026-
CHECK_CUDA(cuModuleLoadData(&adm_dwt_module, src_adm_dwt2_ptx));
1027-
CHECK_CUDA(cuModuleLoadData(&adm_csf_module, src_adm_csf_ptx));
1028-
CHECK_CUDA(cuModuleLoadData(&adm_decouple_module, src_adm_decouple_ptx));
1029-
CHECK_CUDA(cuModuleLoadData(&adm_csf_den_module, src_adm_csf_den_ptx));
1030-
CHECK_CUDA(cuModuleLoadData(&adm_cm_module, src_adm_cm_ptx));
1026+
CHECK_CUDA(cuModuleLoadData(&adm_dwt_module, adm_dwt2_ptx));
1027+
CHECK_CUDA(cuModuleLoadData(&adm_csf_module, adm_csf_ptx));
1028+
CHECK_CUDA(cuModuleLoadData(&adm_decouple_module, adm_decouple_ptx));
1029+
CHECK_CUDA(cuModuleLoadData(&adm_csf_den_module, adm_csf_den_ptx));
1030+
CHECK_CUDA(cuModuleLoadData(&adm_cm_module, adm_cm_ptx));
10311031

10321032
// Get DWT kernel function pointers check adm_dwt2.cu for __global__ templated kernels
10331033
CHECK_CUDA(cuModuleGetFunction(&s->func_dwt_s123_combined_vert_kernel_0_0_int32_t, adm_dwt_module, "dwt_s123_combined_vert_kernel_0_0_int32_t"));

libvmaf/src/feature/cuda/integer_adm_cuda.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ typedef struct AdmBufferCuda {
103103
void* results_host;
104104
} AdmBufferCuda;
105105

106-
extern unsigned char src_adm_dwt2_ptx[];
107-
extern unsigned char src_adm_csf_den_ptx[];
108-
extern unsigned char src_adm_csf_ptx[];
109-
extern unsigned char src_adm_decouple_ptx[];
110-
extern unsigned char src_adm_cm_ptx[];
106+
extern const unsigned char adm_dwt2_ptx[];
107+
extern const unsigned char adm_csf_den_ptx[];
108+
extern const unsigned char adm_csf_ptx[];
109+
extern const unsigned char adm_decouple_ptx[];
110+
extern const unsigned char adm_cm_ptx[];
111111

112112
#endif /* _FEATURE_ADM_CUDA_H_ */

libvmaf/src/feature/cuda/integer_motion_cuda.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ static int init_fex_cuda(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt
144144
CHECK_CUDA(cuEventCreate(&s->finished, CU_EVENT_DEFAULT));
145145

146146
CUmodule module;
147-
CHECK_CUDA(cuModuleLoadData(&module, src_motion_score_ptx));
147+
CHECK_CUDA(cuModuleLoadData(&module, motion_score_ptx));
148148

149149
CHECK_CUDA(cuModuleGetFunction(&s->funcbpc16, module, "calculate_motion_score_kernel_16bpc"));
150150
CHECK_CUDA(cuModuleGetFunction(&s->funcbpc8, module, "calculate_motion_score_kernel_8bpc"));

libvmaf/src/feature/cuda/integer_motion_cuda.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@
2424
#include "integer_motion.h"
2525
#include "common.h"
2626

27-
extern unsigned char src_motion_score_ptx[];
27+
extern const unsigned char motion_score_ptx[];
2828
#endif /* _FEATURE_MOTION_CUDA_H_ */

libvmaf/src/feature/cuda/integer_vif_cuda.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ static int init_fex_cuda(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt
104104

105105
// make this static
106106
CUmodule filter1d_module;
107-
CHECK_CUDA(cuModuleLoadData(&filter1d_module, src_filter1d_ptx));
107+
CHECK_CUDA(cuModuleLoadData(&filter1d_module, filter1d_ptx));
108108
CHECK_CUDA(cuModuleGetFunction(&s->func_filter1d_8_vertical_kernel_uint32_t_17_9,
109109
filter1d_module, "filter1d_8_vertical_kernel_uint32_t_17_9"));
110110
CHECK_CUDA(cuModuleGetFunction(&s->func_filter1d_8_horizontal_kernel_2_17_9,

libvmaf/src/feature/cuda/integer_vif_cuda.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,6 @@ typedef struct vif_accums {
8484
int64_t den_non_log;
8585
} vif_accums;
8686

87-
extern unsigned char src_filter1d_ptx[];
87+
extern const unsigned char filter1d_ptx[];
8888

8989
#endif /* _FEATURE_VIF_CUDA_H_ */

libvmaf/src/meson.build

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ if built_in_models_enabled
151151
output : model_file,
152152
copy: true
153153
),
154-
command : [xxd, '--include', '@INPUT@', '@OUTPUT@'],
154+
command : [xxd, '-i', '@INPUT@', '@OUTPUT@'],
155155
)
156156
endforeach
157157

@@ -333,14 +333,19 @@ if is_cuda_enabled
333333

334334
message('ptx_files = @0@'.format(ptx_files))
335335

336-
xxd_exe = find_program('xxd')
336+
# bin2c is distributed along with cuda tools. Use '--padd 0x00' to add a NULL-terminator byte
337+
# to the end of the generated array.
338+
bin2c_exe = find_program('bin2c')
337339
ptx_arrays = []
338340
foreach name, _ptx : ptx_files
339-
t = custom_target('ptx_xxd_@0@'.format(name),
341+
t = custom_target('ptx_bin2c_@0@'.format(name),
340342
build_by_default: true,
341343
output : ['@PLAINNAME@.c'],
342344
input : _ptx,
343-
command : [xxd_exe, '--include','@INPUT@', '@OUTPUT@'],
345+
capture : true,
346+
command : [bin2c_exe, '--const', '--padd', '0x00',
347+
'--name', '@BASENAME@_ptx', '@INPUT@',
348+
]
344349
)
345350
ptx_arrays += t
346351
endforeach

0 commit comments

Comments
 (0)