From 1eb711675b2b7877bb477fb49d526efdd1c6d9b8 Mon Sep 17 00:00:00 2001 From: Xin Liu Date: Mon, 2 Oct 2023 22:06:07 -0700 Subject: [PATCH 1/2] Add a Module pass that reverse all global variables of c-str. --- llvm/examples/CMakeLists.txt | 1 + llvm/examples/ReverseStr/CMakeLists.txt | 17 +++ .../ReverseStr/ReverseGlobalStrPass.cpp | 105 ++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 llvm/examples/ReverseStr/CMakeLists.txt create mode 100644 llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt index 74613bd1350bd..567fed12f54a3 100644 --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(ModuleMaker) add_subdirectory(OrcV2Examples) add_subdirectory(SpeculativeJIT) add_subdirectory(Bye) +add_subdirectory(ReverseStr) if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM")) add_subdirectory(ExceptionDemo) diff --git a/llvm/examples/ReverseStr/CMakeLists.txt b/llvm/examples/ReverseStr/CMakeLists.txt new file mode 100644 index 0000000000000..f9b51eaa37a87 --- /dev/null +++ b/llvm/examples/ReverseStr/CMakeLists.txt @@ -0,0 +1,17 @@ +# The plugin expects to not link against the Support and Core libraries, +# but expects them to exist in the process loading the plugin. This doesn't +# work with DLLs on Windows (where a shared library can't have undefined +# references), so just skip this example on Windows. +if (NOT WIN32 AND NOT CYGWIN) + add_llvm_pass_plugin(ReverseStr + ReverseGlobalStrPass.cpp + DEPENDS + intrinsics_gen + LLVMAnalysis + + BUILDTREE_ONLY + ) + + install(TARGETS ${name} RUNTIME DESTINATION "${LLVM_EXAMPLES_INSTALL_DIR}") + set_target_properties(${name} PROPERTIES FOLDER "Examples") +endif() diff --git a/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp b/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp new file mode 100644 index 0000000000000..4ee5ac10668a1 --- /dev/null +++ b/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp @@ -0,0 +1,105 @@ +#include "llvm/IR/Module.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ConstantFolding.h" + +#define DEBUG_TYPE "ReverseStr" +using namespace llvm; + +namespace { + +bool reverse_global_str(Module &M) { + LLVMContext &ctx = M.getContext(); + + for (auto &g: M.globals()) { + if (g.hasName() && g.hasInitializer()) { + LLVM_DEBUG(dbgs() << "global variable: " << g << '\n'); + + const Constant *c = g.getInitializer(); + Type *ty = c->getType(); + if(ty->isArrayTy() && cast(ty)->getElementType()->isIntegerTy(8)/*i8*/) { + LLVM_DEBUG(dbgs() << "initializer: " << c << '\n'); + + Constant *contents= llvm::ReadByteArrayFromGlobal(&g, 0); + if (contents != nullptr) { + ConstantDataSequential *cds = cast(contents); + if (cds->isCString()) { + StringRef literal = cds->getAsCString(); + LLVM_DEBUG(dbgs() << "original c-string literal : " << literal << '\n'); + + if (!literal.empty()) { + size_t len = literal.size(); + SmallVector RawBytes(len + 1); + for (size_t i=0; i < len; ++i) { + RawBytes[i] = literal[len - 1 - i]; + } + RawBytes[len] = '\0'; + + auto reversed = ConstantDataArray::get(ctx, RawBytes); + + g.setInitializer(reversed); + LLVM_DEBUG(dbgs() << "after updated GV: " << g << '\n'); + } else { + LLVM_DEBUG(dbgs() << "[empty] skipped!"); + } + } + } + } + } + } + return true; +} + + +struct LegacyReverseStr : public ModulePass { + static char ID; + LegacyReverseStr() : ModulePass(ID) {} + bool runOnFunction(Module &M) override { return reverse_global_str(M); } +}; + +struct ReverseGlobalStrPass: PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { + reverse_global_str(M); + return PreservedAnalyses::none(); + } + + static bool isRequired() { return true; } +}; + +} // namespace + +char LegacyReverseStr::ID = 0; + +static RegisterPass X("ReverseStr", "Reverse global variables of c-str" + false /* Only looks at CFG */, + false /* Analysis Pass */); + +/* New PM Registration */ +llvm::PassPluginLibraryInfo getReverseStrPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "ReverseStr", LLVM_VERSION_STRING, + [](PassBuilder &PB) { + PB.registerPipelineStartEPCallback( + [](llvm::ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(ReverseGlobalStrPass()); + }); + PB.registerPipelineParsingCallback( + [](StringRef Name, llvm::ModulePassManager &PM, + ArrayRef) { + if (Name == "ReverseStr") { + PM.addPass(ReverseGlobalStrPass()); + return true; + } + return false; + }); + }}; +} + +extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo +llvmGetPassPluginInfo() { + return getReverseStrPluginInfo(); +} \ No newline at end of file From 12698e7a30875eb2393393b729eee3d07f1c8f76 Mon Sep 17 00:00:00 2001 From: Xin Liu Date: Tue, 3 Oct 2023 00:00:49 -0700 Subject: [PATCH 2/2] Add a regression test for ReverseStr plugin. --- llvm/examples/ReverseStr/README.md | 42 +++++++++++++++++++ .../ReverseStr/ReverseGlobalStrPass.cpp | 28 +++++++------ llvm/test/Transforms/HelloNew/reverse_cstr.ll | 5 +++ llvm/test/lit.cfg.py | 8 ++++ 4 files changed, 71 insertions(+), 12 deletions(-) create mode 100644 llvm/examples/ReverseStr/README.md create mode 100644 llvm/test/Transforms/HelloNew/reverse_cstr.ll diff --git a/llvm/examples/ReverseStr/README.md b/llvm/examples/ReverseStr/README.md new file mode 100644 index 0000000000000..2b3be078ca319 --- /dev/null +++ b/llvm/examples/ReverseStr/README.md @@ -0,0 +1,42 @@ +# Task +Write a plugin of opt and clang that can capture global variables of c-str in a translation unit and reverse the string literals. + + +```c +$cat t.c +#include +const char * GLOBAL_CONSTANT_MSG = "hello world @ compile"; +int main() { + puts(GLOBAL_CONSTANT_MSG); +} +``` + +# Approach +1. write a ModulePass using llvm. +2. idea + a. iterates all global variables of a module; + b. filter out whose type of initializer is an array of i8. + c. get the contents of initializer. + d. create a new Constant. the contents are the reversed string literal. replace the old initializer with the new Constant. + +# Demo +```bash +$clang -emit-llvm -S t.c +$./bin/lli ./t.ll +hello world @ compile + +$./bin/opt -load-pass-plugin=./lib/ReverseStr.so -passes=ReverseStr -debug ./t.ll > t.after.bc +Args: ./bin/opt -load-pass-plugin=./lib/ReverseStr.so -passes=ReverseStr -debug ./t.ll +global variable: @.str = private unnamed_addr constant [22 x i8] c"hello world @ compile\00", align 1 +initializer: 0x556208a997a0 +original c-string literal : hello world @ compile +after updated GV: @.str = private unnamed_addr constant [22 x i8] c"elipmoc @ dlrow olleh\00", align 1 +global variable: @GLOBAL_CONSTANT_MSG = dso_local global ptr @.str, align 8 + +./bin/llvm-dis < t.after.bc| grep ^@ +@.str = private unnamed_addr constant [22 x i8] c"elipmoc @ dlrow olleh\00", align 1 +@GLOBAL_CONSTANT_MSG = dso_local global ptr @.str, align 8 + +$./bin/lli < t.after.bc +elipmoc @ dlrow olleh +``` diff --git a/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp b/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp index 4ee5ac10668a1..947f55c02e5de 100644 --- a/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp +++ b/llvm/examples/ReverseStr/ReverseGlobalStrPass.cpp @@ -12,6 +12,18 @@ using namespace llvm; namespace { +// only support latin-1? Do we need to support wide char? +static Constant *reverse_string_literal(LLVMContext &ctx, const StringRef & literal) { + size_t len = literal.size(); + SmallVector RawBytes(len + 1); + + for (size_t i=0; i < len; ++i) { + RawBytes[i] = literal[len - 1 - i]; + } + RawBytes[len] = '\0'; + + return ConstantDataArray::get(ctx, RawBytes); +} bool reverse_global_str(Module &M) { LLVMContext &ctx = M.getContext(); @@ -33,15 +45,7 @@ bool reverse_global_str(Module &M) { LLVM_DEBUG(dbgs() << "original c-string literal : " << literal << '\n'); if (!literal.empty()) { - size_t len = literal.size(); - SmallVector RawBytes(len + 1); - for (size_t i=0; i < len; ++i) { - RawBytes[i] = literal[len - 1 - i]; - } - RawBytes[len] = '\0'; - - auto reversed = ConstantDataArray::get(ctx, RawBytes); - + auto reversed = reverse_string_literal(ctx, literal); g.setInitializer(reversed); LLVM_DEBUG(dbgs() << "after updated GV: " << g << '\n'); } else { @@ -59,7 +63,7 @@ bool reverse_global_str(Module &M) { struct LegacyReverseStr : public ModulePass { static char ID; LegacyReverseStr() : ModulePass(ID) {} - bool runOnFunction(Module &M) override { return reverse_global_str(M); } + bool runOnModule(Module &M) override { return reverse_global_str(M); } }; struct ReverseGlobalStrPass: PassInfoMixin { @@ -75,7 +79,7 @@ struct ReverseGlobalStrPass: PassInfoMixin { char LegacyReverseStr::ID = 0; -static RegisterPass X("ReverseStr", "Reverse global variables of c-str" +static RegisterPass X("ReverseStr", "Reverse global variables of c-str", false /* Only looks at CFG */, false /* Analysis Pass */); @@ -102,4 +106,4 @@ llvm::PassPluginLibraryInfo getReverseStrPluginInfo() { extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getReverseStrPluginInfo(); -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/HelloNew/reverse_cstr.ll b/llvm/test/Transforms/HelloNew/reverse_cstr.ll new file mode 100644 index 0000000000000..e719d37d670d3 --- /dev/null +++ b/llvm/test/Transforms/HelloNew/reverse_cstr.ll @@ -0,0 +1,5 @@ +; RUN: opt %loadnewpmreversestr -passes=ReverseStr < %s -S | FileCheck %s + +; CHECK: c"elipmoc @ dlrow olleh\00" +@.str = private unnamed_addr constant [22 x i8] c"hello world @ compile\00", align 1 +@GLOBAL_CONSTANT_MSG = dso_local global i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str, i32 0, i32 0), align 8 diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 7a1e30b49b0c3..591fe4d4a8c23 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -393,6 +393,14 @@ def version_int(ver): ), ) ) + config.substitutions.append( + ( + "%loadnewpmreversestr", + "-load-pass-plugin={}/ReverseStr{}".format( + config.llvm_shlib_dir, config.llvm_shlib_ext + ), + ) + ) if config.linked_exampleirtransforms_extension: config.substitutions.append(("%loadexampleirtransforms", ""))