From 6359fbad7be5df011c2f6684a44bc6e7de695304 Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Mon, 21 Oct 2024 22:24:46 -0400 Subject: [PATCH] [SystemZ][XRay] XRay runtime support for SystemZ Adds the runtime support routines for XRay on SystemZ. Only function entry/exit is implemented. --- .../cmake/Modules/AllSupportedArchDefs.cmake | 2 +- compiler-rt/lib/xray/CMakeLists.txt | 9 + compiler-rt/lib/xray/xray_interface.cpp | 2 + .../lib/xray/xray_interface_internal.h | 4 + compiler-rt/lib/xray/xray_s390x.cpp | 88 +++++++++ compiler-rt/lib/xray/xray_trampoline_s390x.S | 176 ++++++++++++++++++ compiler-rt/lib/xray/xray_tsc.h | 22 +++ 7 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/lib/xray/xray_s390x.cpp create mode 100644 compiler-rt/lib/xray/xray_trampoline_s390x.S diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index fb4dfa7bd09df..69e9b522775f9 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -102,7 +102,7 @@ if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64}) else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} - powerpc64le ${HEXAGON} ${LOONGARCH64}) + powerpc64le ${HEXAGON} ${LOONGARCH64} ${S390X}) endif() set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64}) set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64}) diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt index f38c07420c9ab..20bdeeaee09e4 100644 --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -92,6 +92,13 @@ set(hexagon_SOURCES xray_trampoline_hexagon.S ) +set(s390x_SOURCES + xray_s390x.cpp + xray_trampoline_s390x.S + ) +# Enable vector instructions in the assembly file. +set_source_files_properties(xray_trampoline_s390x.S PROPERTIES COMPILE_FLAGS -mvx) + set(XRAY_SOURCE_ARCHS arm armhf @@ -102,6 +109,7 @@ set(XRAY_SOURCE_ARCHS mips64 mips64el powerpc64le + s390x x86_64 ) @@ -152,6 +160,7 @@ set(XRAY_ALL_SOURCE_FILES ${mips64_SOURCES} ${mips64el_SOURCES} ${powerpc64le_SOURCES} + ${s390x_SOURCES} ${XRAY_IMPL_HEADERS} ) list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index b6f0e6762f168..2ebaebd279e87 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -57,6 +57,8 @@ static const int16_t cSledLength = 64; static const int16_t cSledLength = 8; #elif defined(__hexagon__) static const int16_t cSledLength = 20; +#elif defined(__s390x__) +static const int16_t cSledLength = 18; #else #error "Unsupported CPU Architecture" #endif /* CPU architecture */ diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h index a8cfe0fde84dd..5dcccfe825cf5 100644 --- a/compiler-rt/lib/xray/xray_interface_internal.h +++ b/compiler-rt/lib/xray/xray_interface_internal.h @@ -29,6 +29,10 @@ extern void __xray_FunctionTailExit(); extern void __xray_ArgLoggerEntry(); extern void __xray_CustomEvent(); extern void __xray_TypedEvent(); +#if defined(__s390x__) +extern void __xray_FunctionEntryVec(); +extern void __xray_FunctionExitVec(); +#endif } extern "C" { diff --git a/compiler-rt/lib/xray/xray_s390x.cpp b/compiler-rt/lib/xray/xray_s390x.cpp new file mode 100644 index 0000000000000..86ef3573ce509 --- /dev/null +++ b/compiler-rt/lib/xray/xray_s390x.cpp @@ -0,0 +1,88 @@ +//===-- xray_s390x.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of s390x routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include +#include + +bool __xray::patchFunctionEntry(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + uint32_t *Address = reinterpret_cast(Sled.address()); + if (Enable) { + // The resulting code is: + // stmg %r2, %r15, 16(%r15) + // llilf %2, FuncID + // brasl %r14, __xray_FunctionEntry@GOT + // The FuncId and the stmg instruction must be written. + + // Write FuncId into llilf. + Address[2] = FuncId; + // Write last part of stmg. + reinterpret_cast(Address)[2] = 0x24; + // Write first part of stmg. + Address[0] = 0xeb2ff010; + } else { + // j +16 instructions. + Address[0] = 0xa7f4000b; + } + return true; +} + +bool __xray::patchFunctionExit(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { + uint32_t *Address = reinterpret_cast(Sled.address()); + if (Enable) { + // The resulting code is: + // stmg %r2, %r15, 24(%r15) + // llilf %2,FuncID + // j __xray_FunctionEntry@GOT + // The FuncId and the stmg instruction must be written. + + // Write FuncId into llilf. + Address[2] = FuncId; + // Write last part of of stmg. + reinterpret_cast(Address)[2] = 0x24; + // Write first part of stmg. + Address[0] = 0xeb2ff010; + } else { + // br %14 instruction. + reinterpret_cast(Address)[0] = 0x07fe; + } + return true; +} + +bool __xray::patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { + return patchFunctionExit(Enable, FuncId, Sled); +} + +bool __xray::patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // TODO Implement. + return false; +} + +bool __xray::patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // TODO Implement. + return false; +} + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // TODO this will have to be implemented in the trampoline assembly file. +} diff --git a/compiler-rt/lib/xray/xray_trampoline_s390x.S b/compiler-rt/lib/xray/xray_trampoline_s390x.S new file mode 100644 index 0000000000000..4073943641b99 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_s390x.S @@ -0,0 +1,176 @@ +//===-- xray_trampoline_s390x.s ---------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the s390x-specific assembler for the trampolines. +// 2 versions of the functions are provided: one which does not store the +// vector registers, and one which does store them. The compiler decides +// which to call based on the availability of the vector extension. +// +//===----------------------------------------------------------------------===// + + .text + +// Minimal stack frame size +#define STACKSZ 160 + +// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes. +#define STACKSZ_VEC 288 + +//===----------------------------------------------------------------------===// + + .globl __xray_FunctionEntry + .p2align 4 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + # The registers r2-15 of the instrumented function are already saved in the + # stack frame. On entry, r2 contains the function id, and %r14 the address + # of the first instruction of the instrumented function. + # Register r14 will be stored in the slot reserved for compiler use. + stg %r14, 8(%r15) + std %f0, 128(%r15) + std %f2, 136(%r15) + std %f4, 144(%r15) + std %f6, 152(%r15) + aghi %r15, -STACKSZ + + lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT + ltg %r1, 0(%r1) + je .Lrestore0 + + # Set r3 to XRayEntryType::ENTRY = 0. + # The FuncId is still stored in r2. + lghi %r3, 0 + basr %r14, %r1 + +.Lrestore0: + ld %f6, STACKSZ+152(%r15) + ld %f4, STACKSZ+144(%r15) + ld %f2, STACKSZ+136(%r15) + ld %f0, STACKSZ+128(%r15) + lmg %r1, %r15, STACKSZ+8(%r15) + br %r1 +.Lfunc_end0: + .size __xray_FunctionEntry, .Lfunc_end0-__xray_FunctionEntry + +//===----------------------------------------------------------------------===// + + .globl __xray_FunctionEntryVec + .p2align 4 + .type __xray_FunctionEntryVec,@function +__xray_FunctionEntryVec: + # The registers r2-15 of the instrumented function are already saved in the + # stack frame. On entry, r2 contains the function id, and %r14 the address + # of the first instruction of the instrumented function. + # Register r14 will be stored in the slot reserved for compiler use. + stg %r14, 8(%r15) + std %f0, 128(%r15) + std %f2, 136(%r15) + std %f4, 144(%r15) + std %f6, 152(%r15) + aghi %r15, -STACKSZ_VEC + vstm %v24, %v31, 160(%r15) + + lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT + ltg %r1, 0(%r1) + je .Lrestore1 + + # Set r3 to XRayEntryType::ENTRY = 0. + # The FuncId is still stored in r2. + lghi %r3, 0 + basr %r14, %r1 + +.Lrestore1: + vlm %v24, %v31, 160(%r15) + ld %f6, STACKSZ_VEC+152(%r15) + ld %f4, STACKSZ_VEC+144(%r15) + ld %f2, STACKSZ_VEC+136(%r15) + ld %f0, STACKSZ_VEC+128(%r15) + lmg %r1, %r15, STACKSZ_VEC+8(%r15) + br %r1 +.Lfunc_end1: + .size __xray_FunctionEntryVec, .Lfunc_end1-__xray_FunctionEntryVec + +//===----------------------------------------------------------------------===// + + .globl __xray_FunctionExit + .p2align 4 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + # The registers r2-15 of the instrumented function are already saved in the + # stack frame. On entry, the register r2 contains the function id. + # At the end, the function jumps to the address saved in the slot for r14, + # which contains the return address into the caller of the instrumented + # function. + std %f0, 128(%r15) + std %f2, 136(%r15) + std %f4, 144(%r15) + std %f6, 152(%r15) + aghi %r15, -STACKSZ + + lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT + ltg %r1, 0(%r1) + je .Lrestore2 + + # Set r3 to XRayEntryType::EXIT = 1. + # The FuncId is still stored in r2. + lghi %r3, 1 + basr %r14, %r1 + +.Lrestore2: + ld %f6, STACKSZ+152(%r15) + ld %f4, STACKSZ+144(%r15) + ld %f2, STACKSZ+136(%r15) + ld %f0, STACKSZ+128(%r15) + lmg %r2, %r15, STACKSZ+16(%r15) + br %r14 +.Lfunc_end2: + .size __xray_FunctionExit, .Lfunc_end2-__xray_FunctionExit + +//===----------------------------------------------------------------------===// + + .globl __xray_FunctionExitVec + .p2align 4 + .type __xray_FunctionExitVec,@function +__xray_FunctionExitVec: + # The registers r2-15 of the instrumented function are already saved in the + # stack frame. On entry, the register r2 contains the function id. + # At the end, the function jumps to the address saved in the slot for r14, + # which contains the return address into the caller of the instrumented + # function. + std %f0, 128(%r15) + std %f2, 136(%r15) + std %f4, 144(%r15) + std %f6, 152(%r15) + aghi %r15, -STACKSZ_VEC + vstm %v24, %v31, 160(%r15) + + lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT + ltg %r1, 0(%r1) + je .Lrestore3 + + # Set r3 to XRayEntryType::EXIT = 1. + # The FuncId is still stored in r2. + lghi %r3, 1 + basr %r14, %r1 + +.Lrestore3: + vlm %v24, %v31, 160(%r15) + ld %f6, STACKSZ_VEC+152(%r15) + ld %f4, STACKSZ_VEC+144(%r15) + ld %f2, STACKSZ_VEC+136(%r15) + ld %f0, STACKSZ_VEC+128(%r15) + lmg %r2, %r15, STACKSZ_VEC+16(%r15) + br %r14 +.Lfunc_end3: + .size __xray_FunctionExit, .Lfunc_end3-__xray_FunctionExit + +//===----------------------------------------------------------------------===// + + .section ".note.GNU-stack","",@progbits diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h index e1cafe1bf11d2..5b2ba49b85f5d 100644 --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -83,6 +83,28 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { } // namespace __xray +#elif defined(__s390x__) +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" +#include +#include +#include + +namespace __xray { + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + return __builtin_readcyclecounter(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + return NanosecondsPerSecond; +} + +} // namespace __xray + #else #error Target architecture is not supported. #endif // CPU architecture