Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package/crossfiles/aarch64-linux-clang.meson
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[binaries]
c = 'clang'
c = '/home/amunoz/ia2/llvm-project/build/bin/clang'
cpp = 'clang++'
ar = 'aarch64-linux-gnu-ar'
strip = 'aarch64-linux-gnu-strip'
Expand Down
38 changes: 22 additions & 16 deletions rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,14 @@ def main(
rt_libs_build_dir = custom_llvm_project / "build-rtlibs"
cmake_cflags += [
"--rtlib=compiler-rt",
"--unwindlib=libunwind",
"--stdlib=libc++",
f"-I{str(rt_libs_build_dir / "include/c++/v1")}",
]
cmake_link_flags += [
"--rtlib=compiler-rt",
"--unwindlib=libunwind",
"--stdlib=libc++",
f"-B{str(rt_libs_build_dir / "compiler-rt/lib/linux")}",
f"-L{str(rt_libs_build_dir /"lib")}",
]
Expand Down Expand Up @@ -239,7 +241,7 @@ def main(
f"-DCMAKE_SHARED_LINKER_FLAGS={cmake_shared_linker_flags}",
]

cross_file = original_dir / "package" / "crossfiles" / f"{cross_target}.meson"
cross_file = original_dir / "package" / "crossfiles" / "aarch64-linux-clang.meson"
cross = parse_machine_files(
filenames=[str(cross_file)], sourcedir=str(original_dir)
)
Expand All @@ -252,8 +254,8 @@ def main(
ia2_dir,
"-G",
"Ninja",
f"-DClang_DIR={str(llvm_cmake_dir / ".." / "clang")}",
f"-DLLVM_DIR={str(llvm_cmake_dir)}",
f"-DClang_DIR=/usr/lib/llvm-14/lib/cmake/clang",
f"-DLLVM_DIR=/usr/lib/llvm-14/lib/cmake/llvm",
f"-DLLVM_EXTERNAL_LIT={str(lit.executable)}",
f"-DCMAKE_C_COMPILER={str(clang.executable)}",
f"-DCMAKE_CXX_COMPILER={str(clang_cpp.executable)}",
Expand Down Expand Up @@ -292,7 +294,7 @@ def main(
if stashed:
git["stash", "pop"]()

clang_include_dir = find_clang_include_dir(llvm_config)
clang_include_dir = "/usr/lib/llvm-14/lib/clang/14.0.0/include"

cc_text = cc_db.read_text()
cmds = json.loads(cc_text)
Expand All @@ -312,13 +314,11 @@ def main(
original_dir,
"--output-directory",
rewritten_dir,
f"--enable-dav1d_get_picture-post-condition={enable_dav1d_get_picture_post_condition}",
"-p",
cc_db.parent,
*extra_args(
"-isystem",
"include-fixed",
"-isystem",
"--extra-arg ",
clang_include_dir,
),
*srcs_to_rewrite,
Expand Down Expand Up @@ -389,10 +389,10 @@ def main(
"validate_input_or_ret(IA2_ADDR(s->allocator.release_picture_callback) != NULL,",
),
(
Path("callgate_wrapper.h"),
src / "../../dav1d-ia2/callgate_wrapper.h",
{
TargetArch.X86_64: "struct __va_list_tag *",
TargetArch.AArch64: "struct __va_list",
TargetArch.AArch64: "struct std::__va_list",
}[target_arch],
"va_list",
),
Expand Down Expand Up @@ -421,25 +421,31 @@ def main(
f"-Dia2_permissive_mode={permissive_mode}",
f"--buildtype={dav1d_meson_build_type.value}",
]()
retcode, stdout, stderr = ninja["tools/dav1d"].run(
retcode, stdout, stderr = ninja["tools/dav1d", "-vvv"].run(
# retcode=None,
stdout=sys.stdout,
stderr=sys.stderr,
)
# Path("ninja.out").write_text(stdout)
# Path("ninja.err").write_text(stderr)
assert retcode == 0
retcode, stdout, stderr = ninja["src/libdav1d.so"].run(
# retcode=None,
stdout=sys.stdout,
stderr=sys.stderr,
)
assert retcode == 0
canonicalize_compile_command_paths()

dav1d = rewritten_build_dir / "tools/dav1d"
pad_tls[dav1d]()

for ldd in parse_ldd(ldd[dav1d]()):
padded = rpath / ldd.name
if padded.exists() and ldd.path.samefile(padded):
continue
shutil.copy(ldd.path, padded)
pad_tls["--allow-no-tls", padded]()
#for ldd in parse_ldd(ldd[dav1d]()):
# padded = rpath / ldd.name
# if padded.exists() and ldd.path.samefile(padded):
# continue
# shutil.copy(ldd.path, padded)
# pad_tls["--allow-no-tls", padded]()


if __name__ == "__main__":
Expand Down
16 changes: 16 additions & 0 deletions src/arm/64/mc.S
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include "util.S"

.macro avg dst, t0, t1, t2, t3
extr x2, x2, x18, #56
extr x2, x2, x2, #8
extr x3, x3, x18, #56
extr x3, x3, x3, #8
ld1 {\t0\().8h,\t1\().8h}, [x2], 32
ld1 {\t2\().8h,\t3\().8h}, [x3], 32
add \t0\().8h, \t0\().8h, \t2\().8h
Expand All @@ -39,6 +43,10 @@
.endm

.macro w_avg dst, t0, t1, t2, t3
extr x2, x2, x18, #56
extr x2, x2, x2, #8
extr x3, x3, x18, #56
extr x3, x3, x3, #8
ld1 {\t0\().8h,\t1\().8h}, [x2], 32
ld1 {\t2\().8h,\t3\().8h}, [x3], 32
sub \t0\().8h, \t2\().8h, \t0\().8h
Expand All @@ -52,6 +60,12 @@
.endm

.macro mask dst, t0, t1, t2, t3
extr x6, x6, x18, #56
extr x6, x6, x6, #8
extr x2, x2, x18, #56
extr x2, x2, x2, #8
extr x3, x3, x18, #56
extr x3, x3, x3, #8
ld1 {v30.16b}, [x6], 16
ld1 {\t0\().8h,\t1\().8h}, [x2], 32
mul v30.16b, v30.16b, v31.16b
Expand Down Expand Up @@ -436,6 +450,8 @@ w_mask_fn 420


function blend_8bpc_neon, export=1
extr x5, x5, x18, #56
extr x5, x5, x5, #8
movrel x6, blend_tbl
clz w3, w3
sub w3, w3, #26
Expand Down
4 changes: 4 additions & 0 deletions src/arm/64/mc16.S
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@

.macro bidir_fn type, bdmax
function \type\()_16bpc_neon, export=1
extr x6, x6, x18, #56
extr x6, x6, x6, #8
clz w4, w4
.ifnc \type, avg
dup v31.8h, \bdmax // bitdepth_max
Expand Down Expand Up @@ -565,6 +567,8 @@ w_mask_fn 420


function blend_16bpc_neon, export=1
extr x5, x5, x18, #56
extr x5, x5, x5, #8
movrel x6, blend_tbl
clz w3, w3
sub w3, w3, #26
Expand Down
2 changes: 0 additions & 2 deletions src/arm/asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
#include "config.h"

#if ARCH_AARCH64
#define x18 do_not_use_x18
#define w18 do_not_use_w18

#if HAVE_AS_ARCH_DIRECTIVE
.arch AS_ARCH_LEVEL
Expand Down
14 changes: 7 additions & 7 deletions src/arm/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
#define HWCAP2_AARCH64_SVE2 (1 << 1)
#define HWCAP2_AARCH64_I8MM (1 << 13)

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
#if HAVE_GETAUXVAL
unsigned long hw_cap = getauxval(AT_HWCAP);
unsigned long hw_cap2 = getauxval(AT_HWCAP2);
Expand All @@ -68,7 +68,7 @@ COLD unsigned dav1d_get_cpu_flags_arm(void) {
#define HWCAP_ARM_ASIMDDP (1 << 24)
#define HWCAP_ARM_I8MM (1 << 27)

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
#if HAVE_GETAUXVAL
unsigned long hw_cap = getauxval(AT_HWCAP);
#else
Expand Down Expand Up @@ -96,7 +96,7 @@ static int have_feature(const char *feature) {
return supported;
}

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
unsigned flags = dav1d_get_default_cpu_flags();
if (have_feature("hw.optional.arm.FEAT_DotProd"))
flags |= DAV1D_ARM_CPU_FLAG_DOTPROD;
Expand All @@ -112,7 +112,7 @@ COLD unsigned dav1d_get_cpu_flags_arm(void) {
#include <sys/types.h>
#include <sys/sysctl.h>

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
unsigned flags = dav1d_get_default_cpu_flags();

#ifdef CPU_ID_AA64ISAR0
Expand Down Expand Up @@ -146,7 +146,7 @@ COLD unsigned dav1d_get_cpu_flags_arm(void) {
#elif defined(_WIN32)
#include <windows.h>

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
unsigned flags = dav1d_get_default_cpu_flags();
#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
Expand Down Expand Up @@ -211,7 +211,7 @@ static unsigned parse_proc_cpuinfo(const char *flag) {
return 0;
}

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
unsigned flags = dav1d_get_default_cpu_flags();
flags |= parse_proc_cpuinfo("neon") ? DAV1D_ARM_CPU_FLAG_NEON : 0;
flags |= parse_proc_cpuinfo("asimd") ? DAV1D_ARM_CPU_FLAG_NEON : 0;
Expand All @@ -226,7 +226,7 @@ COLD unsigned dav1d_get_cpu_flags_arm(void) {

#else /* Unsupported OS */

COLD unsigned dav1d_get_cpu_flags_arm(void) {
DAV1D_API COLD unsigned dav1d_get_cpu_flags_arm(void) {
return dav1d_get_default_cpu_flags();
}

Expand Down
7 changes: 7 additions & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,17 @@ link_args = []

if ia2_path != ''
libdav1d_flags += [
'-O3',
'-ffixed-x18',
'-march=armv8.5-a+memtag',
'-B/home/amunoz/ia2/llvm-project/build-rtlibs/compiler-rt/lib/linux',
'-L/home/amunoz/ia2/llvm-project/build-rtlibs/lib',
'--rtlib=compiler-rt',
f'-DPKEY=@pkey@',
]
if ia2_enable
link_args += [
'--rtlib=compiler-rt',
'-Wl,@' + join_paths(dav1d_src_root, f'callgate_wrapper_@pkey@.ld'),
]
endif
Expand Down
13 changes: 10 additions & 3 deletions tools/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,21 @@ link_args = []

if ia2_path != ''
c_args += [
'-O3',
'-g',
'-ffixed-x18',
'-march=armv8.5-a+memtag',
'-B/home/amunoz/ia2/llvm-project/build-rtlibs/compiler-rt/lib/linux',
'-L/home/amunoz/ia2/llvm-project/build-rtlibs/lib',
'--rtlib=compiler-rt',
f'-DPKEY=@pkey@',
]
if ia2_enable
link_args += [
'-Wl,@' + join_paths(dav1d_src_root, f'callgate_wrapper_@pkey@.ld'),
'-Wl,--dynamic-linker=/tmp/build/aarch64/tools/ld-linux-aarch64.so.1',
'-Wl,--rpath=/tmp/build/aarch64/tools/',
'--rtlib=compiler-rt',
'-Wl,--wrap=main',
'-Wl,--dynamic-list=' + join_paths(ia2_path, 'runtime/libia2/dynsym.syms'),
'-Wl,--export-dynamic',
Expand All @@ -77,9 +87,6 @@ if ia2_path != ''
# <stdio.h> functions
'-Wl,-wrap,asprintf',
'-Wl,-wrap,vasprintf',

# for `__tls_get_addr@@GLIBC_2.3`, which didn't use to error for being undefined, but now does
'/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2',
]
endif
endif
Expand Down