diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt index 52e3f942d16ea..5cd612a14b540 100644 --- a/libc/benchmarks/CMakeLists.txt +++ b/libc/benchmarks/CMakeLists.txt @@ -212,5 +212,3 @@ target_link_libraries(libc.benchmarks.memory_functions.opt_host benchmark_main ) llvm_update_compile_flags(libc.benchmarks.memory_functions.opt_host) - -add_subdirectory(automemcpy) diff --git a/libc/benchmarks/automemcpy/CMakeLists.txt b/libc/benchmarks/automemcpy/CMakeLists.txt deleted file mode 100644 index ef9b4218c8d61..0000000000000 --- a/libc/benchmarks/automemcpy/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(NOT LIBC_BUILD_AUTOMEMCPY) - return () -endif() - -if(NOT LLVM_WITH_Z3) - MESSAGE(FATAL_ERROR "Building llvm-libc automemcpy requires Z3") -endif() - -set(LIBC_AUTOMEMCPY_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) - -add_subdirectory(lib) -add_subdirectory(unittests) diff --git a/libc/benchmarks/automemcpy/README.md b/libc/benchmarks/automemcpy/README.md deleted file mode 100644 index 8583368993ef0..0000000000000 --- a/libc/benchmarks/automemcpy/README.md +++ /dev/null @@ -1,111 +0,0 @@ -This folder contains an implementation of [automemcpy: A framework for automatic generation of fundamental memory operations](https://research.google/pubs/pub50338/). - -It uses the [Z3 theorem prover](https://github.com/Z3Prover/z3) to enumerate a subset of valid memory function implementations. These implementations are then materialized as C++ code and can be [benchmarked](../) against various [size distributions](../distributions). This process helps the design of efficient implementations for a particular environnement (size distribution, processor or custom compilation options). - -This is not enabled by default, as it is mostly useful when working on tuning the library implementation. To build it, use `LIBC_BUILD_AUTOMEMCPY=ON` (see below). - -## Prerequisites - -You may need to install `Z3` from source if it's not available on your system. -Here we show instructions to install it into ``. -You may need to `sudo` to `make install`. - -```shell -mkdir -p ~/git -cd ~/git -git clone https://github.com/Z3Prover/z3.git -python scripts/mk_make.py --prefix= -cd build -make -j -make install -``` - -## Configuration - -```shell -mkdir -p -cd /llvm -cmake -DCMAKE_C_COMPILER=/usr/bin/clang \ - -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \ - -DLLVM_ENABLE_PROJECTS="libc" \ - -DLLVM_ENABLE_Z3_SOLVER=ON \ - -DLLVM_Z3_INSTALL_DIR= \ - -DLIBC_BUILD_AUTOMEMCPY=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -B -``` - -## Targets and compilation - -There are three main CMake targets - 1. `automemcpy_implementations` - - runs `Z3` and materializes valid memory functions as C++ code, a message will display its ondisk location. - - the source code is then compiled using the native host optimizations (i.e. `-march=native` or `-mcpu=native` depending on the architecture). - 2. `automemcpy` - - the binary that benchmarks the autogenerated implementations. - 3. `automemcpy_result_analyzer` - - the binary that analyses the benchmark results. - -You may only compile the binaries as they both pull the autogenerated code as a dependency. - -```shell -make -C -j automemcpy automemcpy_result_analyzer -``` - -## Running the benchmarks - -Make sure to save the results of the benchmark as a json file. - -```shell -/bin/automemcpy --benchmark_out_format=json --benchmark_out=/results.json -``` - -### Additional useful options - - - - `--benchmark_min_time=.2` - - By default, each function is benchmarked for at least one second, here we lower it to 200ms. - - - `--benchmark_filter="BM_Memset|BM_Bzero"` - - By default, all functions are benchmarked, here we restrict them to `memset` and `bzero`. - -Other options might be useful, use `--help` for more information. - -## Analyzing the benchmarks - -Analysis is performed by running `automemcpy_result_analyzer` on one or more json result files. - -```shell -/bin/automemcpy_result_analyzer /results.json -``` - -What it does: - 1. Gathers all throughput values for each function / distribution pair and picks the median one.\ - This allows picking a representative value over many runs of the benchmark. Please make sure all the runs happen under similar circumstances. - - 2. For each distribution, look at the span of throughputs for functions of the same type (e.g. For distribution `A`, memcpy throughput spans from 2GiB/s to 5GiB/s). - - 3. For each distribution, give a normalized score to each function (e.g. For distribution `A`, function `M` scores 0.65).\ - This score is then turned into a grade `EXCELLENT`, `VERY_GOOD`, `GOOD`, `PASSABLE`, `INADEQUATE`, `MEDIOCRE`, `BAD` - so that each distribution categorizes how function perform according to them. - - 4. A [Majority Judgement](https://en.wikipedia.org/wiki/Majority_judgment) process is then used to categorize each function. This enables finer analysis of how distributions agree on which function is better. In the following example, `Function_1` and `Function_2` are rated `EXCELLENT` but looking at the grade's distribution might help decide which is best. - -| | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | INADEQUATE | MEDIOCRE | BAD | -|------------|:---------:|:---------:|:----:|:--------:|:----------:|:--------:|:---:| -| Function_1 | 7 | 1 | 2 | | | | | -| Function_2 | 6 | 4 | | | | | | - -The tool outputs the histogram of grades for each function. In case of tie, other dimensions might help decide (e.g. code size, performance on other microarchitectures). - -``` -EXCELLENT |█▁▂ | Function_0 -EXCELLENT |█▅ | Function_1 -VERY_GOOD |▂█▁ ▁ | Function_2 -GOOD | ▁█▄ | Function_3 -PASSABLE | ▂▆▄█ | Function_4 -INADEQUATE | ▃▃█▁ | Function_5 -MEDIOCRE | █▆▁| Function_6 -BAD | ▁▁█| Function_7 -``` diff --git a/libc/benchmarks/automemcpy/include/automemcpy/CodeGen.h b/libc/benchmarks/automemcpy/include/automemcpy/CodeGen.h deleted file mode 100644 index 389e8249f9399..0000000000000 --- a/libc/benchmarks/automemcpy/include/automemcpy/CodeGen.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-- C++ code generation from NamedFunctionDescriptors -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H -#define LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H - -#include "automemcpy/FunctionDescriptor.h" -#include -#include -#include - -namespace llvm { -namespace automemcpy { - -// This function serializes the array of FunctionDescriptors as a C++ file. -void Serialize(raw_ostream &Stream, ArrayRef FD); - -} // namespace automemcpy -} // namespace llvm - -#endif // LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H diff --git a/libc/benchmarks/automemcpy/include/automemcpy/FunctionDescriptor.h b/libc/benchmarks/automemcpy/include/automemcpy/FunctionDescriptor.h deleted file mode 100644 index 65477d9d72a0e..0000000000000 --- a/libc/benchmarks/automemcpy/include/automemcpy/FunctionDescriptor.h +++ /dev/null @@ -1,159 +0,0 @@ -//===-- Pod structs to describe a memory function----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H -#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H - -#include -#include -#include -#include -#include -#include -#include - -namespace llvm { -namespace automemcpy { - -// Boilerplate code to be able to sort and hash types. -#define COMPARABLE_AND_HASHABLE(T, ...) \ - inline auto asTuple() const { return std::tie(__VA_ARGS__); } \ - bool operator==(const T &O) const { return asTuple() == O.asTuple(); } \ - bool operator<(const T &O) const { return asTuple() < O.asTuple(); } \ - struct Hasher { \ - std::size_t operator()(const T &K) const { \ - return llvm::hash_value(K.asTuple()); \ - } \ - }; - -// Represents the maximum value for the size parameter of a memory function. -// This is an `int` so we can use it as an expression in Z3. -// It also allows for a more readable and compact representation when storing -// the SizeSpan in the autogenerated C++ file. -static constexpr int kMaxSize = INT_MAX; - -// This mimics the `Arg` type in libc/src/string/memory_utils/elements.h without -// having to depend on it. -enum class AlignArg { _1, _2, ARRAY_SIZE }; - -// Describes a range of sizes. -// We use the begin/end representation instead of first/last to allow for empty -// range (i.e. Begin == End) -struct SizeSpan { - size_t Begin = 0; - size_t End = 0; - - COMPARABLE_AND_HASHABLE(SizeSpan, Begin, End) -}; - -// Describes a contiguous region. -// In such a region all sizes are handled individually. -// e.g. with Span = {0, 2}; -// if(size == 0) return Handle<0>(); -// if(size == 1) return Handle<1>(); -struct Contiguous { - SizeSpan Span; - - COMPARABLE_AND_HASHABLE(Contiguous, Span) -}; - -// This struct represents a range of sizes over which to use an overlapping -// strategy. An overlapping strategy of size N handles all sizes from N to 2xN. -// The span may represent several contiguous overlaps. -// e.g. with Span = {16, 128}; -// if(size >= 16 and size < 32) return Handle>(); -// if(size >= 32 and size < 64) return Handle>(); -// if(size >= 64 and size < 128) return Handle>(); -struct Overlap { - SizeSpan Span; - - COMPARABLE_AND_HASHABLE(Overlap, Span) -}; - -// Describes a region using a loop handling BlockSize bytes at a time. The -// remaining bytes of the loop are handled with an overlapping operation. -struct Loop { - SizeSpan Span; - size_t BlockSize = 0; - - COMPARABLE_AND_HASHABLE(Loop, Span, BlockSize) -}; - -// Same as `Loop` but starts by aligning a buffer on `Alignment` bytes. -// A first operation handling 'Alignment` bytes is performed followed by a -// sequence of Loop.BlockSize bytes operation. The Loop starts processing from -// the next aligned byte in the chosen buffer. The remaining bytes of the loop -// are handled with an overlapping operation. -struct AlignedLoop { - Loop Loop; - size_t Alignment = 0; // Size of the alignment. - AlignArg AlignTo = AlignArg::_1; // Which buffer to align. - - COMPARABLE_AND_HASHABLE(AlignedLoop, Loop, Alignment, AlignTo) -}; - -// Some processors offer special instruction to handle the memory function -// completely, we refer to such instructions as accelerators. -struct Accelerator { - SizeSpan Span; - - COMPARABLE_AND_HASHABLE(Accelerator, Span) -}; - -// The memory functions are assembled out of primitives that can be implemented -// with regular scalar operations (SCALAR), with the help of vector or bitcount -// instructions (NATIVE) or by deferring it to the compiler (BUILTIN). -enum class ElementTypeClass { - SCALAR, - NATIVE, - BUILTIN, -}; - -// A simple enum to categorize which function is being implemented. -enum class FunctionType { - MEMCPY, - MEMCMP, - BCMP, - MEMSET, - BZERO, -}; - -// This struct describes the skeleton of the implementation, it does not go into -// every detail but is enough to uniquely identify the implementation. -struct FunctionDescriptor { - FunctionType Type; - std::optional Contiguous; - std::optional Overlap; - std::optional Loop; - std::optional AlignedLoop; - std::optional Accelerator; - ElementTypeClass ElementClass; - - COMPARABLE_AND_HASHABLE(FunctionDescriptor, Type, Contiguous, Overlap, Loop, - AlignedLoop, Accelerator, ElementClass) - - inline size_t id() const { return llvm::hash_value(asTuple()); } -}; - -// Same as above but with the function name. -struct NamedFunctionDescriptor { - StringRef Name; - FunctionDescriptor Desc; -}; - -template llvm::hash_code hash_value(const ArrayRef &V) { - return llvm::hash_combine_range(V.begin(), V.end()); -} -template llvm::hash_code hash_value(const T &O) { - return llvm::hash_value(O.asTuple()); -} - -} // namespace automemcpy -} // namespace llvm - -#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H */ diff --git a/libc/benchmarks/automemcpy/include/automemcpy/RandomFunctionGenerator.h b/libc/benchmarks/automemcpy/include/automemcpy/RandomFunctionGenerator.h deleted file mode 100644 index 28756e8f86c0e..0000000000000 --- a/libc/benchmarks/automemcpy/include/automemcpy/RandomFunctionGenerator.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- Generate random but valid function descriptors ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H -#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H - -#include "automemcpy/FunctionDescriptor.h" -#include -#include -#include -#include -#include -#include -#include - -namespace llvm { -namespace automemcpy { - -// Holds the state for the constraint solver. -// It implements a single method that returns the next valid description. -struct RandomFunctionGenerator { - RandomFunctionGenerator(); - - // Get the next valid FunctionDescriptor or std::nullopt. - std::optional next(); - -private: - // Returns an expression where `Variable` is forced to be one of the `Values`. - z3::expr inSetConstraint(z3::expr &Variable, ArrayRef Values) const; - // Add constaints to `Begin` and `End` so that they are: - // - between 0 and kMaxSize (inclusive) - // - ordered (begin<=End) - // - amongst a set of predefined values. - void addBoundsAndAnchors(z3::expr &Begin, z3::expr &End); - // Add constraints to make sure that the loop block size is amongst a set of - // predefined values. Also makes sure that the loop that the loop is iterated - // at least `LoopMinIter` times. - void addLoopConstraints(const z3::expr &LoopBegin, const z3::expr &LoopEnd, - z3::expr &LoopBlockSize, int LoopMinIter); - - z3::context Context; - z3::solver Solver; - - z3::expr Type; - z3::expr ContiguousBegin, ContiguousEnd; - z3::expr OverlapBegin, OverlapEnd; - z3::expr LoopBegin, LoopEnd, LoopBlockSize; - z3::expr AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, - AlignedAlignment, AlignedArg; - z3::expr AcceleratorBegin, AcceleratorEnd; - z3::expr ElementClass; -}; - -} // namespace automemcpy -} // namespace llvm - -#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H */ diff --git a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h deleted file mode 100644 index d4bf272582767..0000000000000 --- a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h +++ /dev/null @@ -1,109 +0,0 @@ -//===-- Analyze benchmark JSON files ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H -#define LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H - -#include "automemcpy/FunctionDescriptor.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringMap.h" -#include -#include - -namespace llvm { -namespace automemcpy { - -// A Grade as in the Majority Judgment voting system. -struct Grade { - enum GradeEnum { - EXCELLENT, - VERY_GOOD, - GOOD, - PASSABLE, - INADEQUATE, - MEDIOCRE, - BAD, - ARRAY_SIZE, - }; - - // Returns a human readable string of the enum. - static StringRef getString(const GradeEnum &GE); - - // Turns 'Score' into a GradeEnum. - static GradeEnum judge(double Score); -}; - -// A 'GradeEnum' indexed array with counts for each grade. -using GradeHistogram = std::array; - -// Identifies a Function by its name and type. Used as a key in a map. -struct FunctionId { - StringRef Name; - FunctionType Type; - COMPARABLE_AND_HASHABLE(FunctionId, Type, Name) -}; - -struct PerDistributionData { - std::vector BytesPerSecondSamples; - double BytesPerSecondMedian; // Median of samples for this distribution. - double BytesPerSecondMean; // Mean of samples for this distribution. - double BytesPerSecondVariance; // Variance of samples for this distribution. - double Score; // Normalized score for this distribution. - Grade::GradeEnum Grade; // Grade for this distribution. -}; - -struct FunctionData { - FunctionId Id; - StringMap PerDistributionData; - double ScoresGeoMean; // Geomean of scores for each distribution. - GradeHistogram GradeHisto = {}; // GradeEnum indexed array - Grade::GradeEnum FinalGrade = Grade::BAD; // Overall grade for this function -}; - -// Identifies a Distribution by its name. Used as a key in a map. -struct DistributionId { - StringRef Name; - COMPARABLE_AND_HASHABLE(DistributionId, Name) -}; - -// Identifies a Sample by its distribution and function. Used as a key in a map. -struct SampleId { - FunctionId Function; - DistributionId Distribution; - COMPARABLE_AND_HASHABLE(SampleId, Function.Type, Function.Name, - Distribution.Name) -}; - -// The type of Samples as reported by the Google Benchmark's JSON result file. -// We are only interested in the "iteration" samples, the "aggregate" ones -// represent derived metrics such as 'mean' or 'median'. -enum class SampleType { UNKNOWN, ITERATION, AGGREGATE }; - -// A SampleId with an associated measured throughput. -struct Sample { - SampleId Id; - SampleType Type = SampleType::UNKNOWN; - double BytesPerSecond = 0; -}; - -// This function collects Samples that belong to the same distribution and -// function and retains the median one. It then stores each of them into a -// 'FunctionData' and returns them as a vector. -std::vector getThroughputs(ArrayRef Samples); - -// Normalize the function's throughput per distribution. -void fillScores(MutableArrayRef Functions); - -// Convert scores into Grades, stores an histogram of Grade for each functions -// and cast a median grade for the function. -void castVotes(MutableArrayRef Functions); - -} // namespace automemcpy -} // namespace llvm - -#endif // LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H diff --git a/libc/benchmarks/automemcpy/lib/CMakeLists.txt b/libc/benchmarks/automemcpy/lib/CMakeLists.txt deleted file mode 100644 index e66b9045b6074..0000000000000 --- a/libc/benchmarks/automemcpy/lib/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -add_library(automemcpy_codegen CodeGen.cpp) -target_link_libraries(automemcpy_codegen PUBLIC LLVMSupport) -target_include_directories(automemcpy_codegen PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) -llvm_update_compile_flags(automemcpy_codegen) - -add_executable(automemcpy_codegen_main CodeGenMain.cpp RandomFunctionGenerator.cpp) -target_link_libraries(automemcpy_codegen_main PUBLIC automemcpy_codegen ${Z3_LIBRARIES}) -llvm_update_compile_flags(automemcpy_codegen_main) - -set(Implementations "${CMAKE_CURRENT_BINARY_DIR}/Implementations.cpp") -add_custom_command( - OUTPUT ${Implementations} - COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/automemcpy_codegen_main" > "${Implementations}" - COMMAND echo "automemcpy implementations generated in ${Implementations}" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - DEPENDS automemcpy_codegen_main -) - -add_library(automemcpy_implementations "${Implementations}") -target_link_libraries(automemcpy_implementations PUBLIC LLVMSupport libc-memory-benchmark) -target_include_directories(automemcpy_implementations PRIVATE - ${LIBC_SOURCE_DIR} ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) -target_compile_options(automemcpy_implementations PRIVATE ${LIBC_COMPILE_OPTIONS_NATIVE} "SHELL:-mllvm -combiner-global-alias-analysis" -fno-builtin) -llvm_update_compile_flags(automemcpy_implementations) - -add_executable(automemcpy EXCLUDE_FROM_ALL ${LIBC_SOURCE_DIR}/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp) -target_link_libraries(automemcpy PRIVATE libc-memory-benchmark benchmark_main automemcpy_implementations) -llvm_update_compile_flags(automemcpy) - -add_library(automemcpy_result_analyzer_lib EXCLUDE_FROM_ALL ResultAnalyzer.cpp) -target_link_libraries(automemcpy_result_analyzer_lib PUBLIC LLVMSupport) -target_include_directories(automemcpy_result_analyzer_lib PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) -llvm_update_compile_flags(automemcpy_result_analyzer_lib) - -add_executable(automemcpy_result_analyzer EXCLUDE_FROM_ALL ResultAnalyzerMain.cpp) -target_link_libraries(automemcpy_result_analyzer PRIVATE automemcpy_result_analyzer_lib automemcpy_implementations) -llvm_update_compile_flags(automemcpy_result_analyzer) diff --git a/libc/benchmarks/automemcpy/lib/CodeGen.cpp b/libc/benchmarks/automemcpy/lib/CodeGen.cpp deleted file mode 100644 index d1336eaf31f34..0000000000000 --- a/libc/benchmarks/automemcpy/lib/CodeGen.cpp +++ /dev/null @@ -1,644 +0,0 @@ -//===-- C++ code generation from NamedFunctionDescriptors -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This code is responsible for generating the "Implementation.cpp" file. -// The file is composed like this: -// -// 1. Includes -// 2. Using statements to help readability. -// 3. Source code for all the mem function implementations. -// 4. The function to retrieve all the function descriptors with their name. -// llvm::ArrayRef getFunctionDescriptors(); -// 5. The functions for the benchmarking infrastructure: -// llvm::ArrayRef getMemcpyConfigurations(); -// llvm::ArrayRef getMemcmpConfigurations(); -// llvm::ArrayRef getBcmpConfigurations(); -// llvm::ArrayRef getMemsetConfigurations(); -// llvm::ArrayRef getBzeroConfigurations(); -// -// -// Sections 3, 4 and 5 are handled by the following namespaces: -// - codegen::functions -// - codegen::descriptors -// - codegen::configurations -// -// The programming style is functionnal. In each of these namespace, the -// original `NamedFunctionDescriptor` object is turned into a different type. We -// make use of overloaded stream operators to format the resulting type into -// either a function, a descriptor or a configuration. The entry point of each -// namespace is the Serialize function. -// -// Note the code here is better understood by starting from the `Serialize` -// function at the end of the file. - -#include "automemcpy/CodeGen.h" -#include "src/__support/macros/config.h" -#include -#include -#include -#include -#include -#include -#include - -namespace llvm { -namespace automemcpy { -namespace codegen { - -// The indentation string. -static constexpr StringRef kIndent = " "; - -// The codegen namespace handles the serialization of a NamedFunctionDescriptor -// into source code for the function, the descriptor and the configuration. - -namespace functions { - -// This namespace turns a NamedFunctionDescriptor into an actual implementation. -// ----------------------------------------------------------------------------- -// e.g. -// static void memcpy_0xB20D4702493C397E(char *__restrict dst, -// const char *__restrict src, -// size_t size) { -// using namespace LIBC_NAMESPACE::x86; -// if(size == 0) return; -// if(size == 1) return copy<_1>(dst, src); -// if(size < 4) return copy>(dst, src, size); -// if(size < 8) return copy>(dst, src, size); -// if(size < 16) return copy>(dst, src, size); -// if(size < 32) return copy>(dst, src, size); -// return copy(dst, src, size); -// } - -// The `Serialize` method turns a `NamedFunctionDescriptor` into a -// `FunctionImplementation` which holds all the information needed to produce -// the C++ source code. - -// An Element with its size (e.g. `_16` in the example above). -struct ElementType { - size_t Size; -}; -// The case `if(size == 0)` is encoded as a the Zero type. -struct Zero { - StringRef DefaultReturnValue; -}; -// An individual size `if(size == X)` is encoded as an Individual type. -struct Individual { - size_t IfEq; - ElementType Element; -}; -// An overlap strategy is encoded as an Overlap type. -struct Overlap { - size_t IfLt; - ElementType Element; -}; -// A loop strategy is encoded as a Loop type. -struct Loop { - size_t IfLt; - ElementType Element; -}; -// An aligned loop strategy is encoded as an AlignedLoop type. -struct AlignedLoop { - size_t IfLt; - ElementType Element; - ElementType Alignment; - StringRef AlignTo; -}; -// The accelerator strategy. -struct Accelerator { - size_t IfLt; -}; -// The Context stores data about the function type. -struct Context { - StringRef FunctionReturnType; // e.g. void* or int - StringRef FunctionArgs; - StringRef ElementOp; // copy, three_way_compare, splat_set, ... - StringRef FixedSizeArgs; - StringRef RuntimeSizeArgs; - StringRef DefaultReturnValue; -}; -// A detailed representation of the function implementation mapped from the -// NamedFunctionDescriptor. -struct FunctionImplementation { - Context Ctx; - StringRef Name; - std::vector Individuals; - std::vector Overlaps; - std::optional Loop; - std::optional AlignedLoop; - std::optional Accelerator; - ElementTypeClass ElementClass; -}; - -// Returns the Context for each FunctionType. -static Context getCtx(FunctionType FT) { - switch (FT) { - case FunctionType::MEMCPY: - return {"void", - "(char *__restrict dst, const char *__restrict src, size_t size)", - "copy", - "(dst, src)", - "(dst, src, size)", - ""}; - case FunctionType::MEMCMP: - return {"int", - "(const char * lhs, const char * rhs, size_t size)", - "three_way_compare", - "(lhs, rhs)", - "(lhs, rhs, size)", - "0"}; - case FunctionType::MEMSET: - return {"void", - "(char * dst, int value, size_t size)", - "splat_set", - "(dst, value)", - "(dst, value, size)", - ""}; - case FunctionType::BZERO: - return {"void", "(char * dst, size_t size)", - "splat_set", "(dst, 0)", - "(dst, 0, size)", ""}; - default: - report_fatal_error("Not yet implemented"); - } -} - -static StringRef getAligntoString(const AlignArg &AlignTo) { - switch (AlignTo) { - case AlignArg::_1: - return "Arg::P1"; - case AlignArg::_2: - return "Arg::P2"; - case AlignArg::ARRAY_SIZE: - report_fatal_error("logic error"); - } -} - -static raw_ostream &operator<<(raw_ostream &Stream, const ElementType &E) { - return Stream << '_' << E.Size; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Individual &O) { - return Stream << O.Element; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Overlap &O) { - return Stream << "HeadTail<" << O.Element << '>'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Loop &O) { - return Stream << "Loop<" << O.Element << '>'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const AlignedLoop &O) { - return Stream << "Align<" << O.Alignment << ',' << O.AlignTo << ">::Then<" - << Loop{O.IfLt, O.Element} << ">"; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Accelerator &O) { - return Stream << "Accelerator"; -} - -template struct IfEq { - StringRef Op; - StringRef Args; - const T ∈ -}; - -template struct IfLt { - StringRef Op; - StringRef Args; - const T ∈ -}; - -static raw_ostream &operator<<(raw_ostream &Stream, const Zero &O) { - Stream << kIndent << "if(size == 0) return"; - if (!O.DefaultReturnValue.empty()) - Stream << ' ' << O.DefaultReturnValue; - return Stream << ";\n"; -} - -template -static raw_ostream &operator<<(raw_ostream &Stream, const IfEq &O) { - return Stream << kIndent << "if(size == " << O.Element.IfEq << ") return " - << O.Op << '<' << O.Element << '>' << O.Args << ";\n"; -} - -template -static raw_ostream &operator<<(raw_ostream &Stream, const IfLt &O) { - Stream << kIndent; - if (O.Element.IfLt != kMaxSize) - Stream << "if(size < " << O.Element.IfLt << ") "; - return Stream << "return " << O.Op << '<' << O.Element << '>' << O.Args - << ";\n"; -} - -static raw_ostream &operator<<(raw_ostream &Stream, - const ElementTypeClass &Class) { - switch (Class) { - case ElementTypeClass::SCALAR: - return Stream << "scalar"; - case ElementTypeClass::BUILTIN: - return Stream << "builtin"; - case ElementTypeClass::NATIVE: - // FIXME: the framework should provide a `native` namespace that redirect to - // x86, arm or other architectures. - return Stream << "x86"; - } -} - -static raw_ostream &operator<<(raw_ostream &Stream, - const FunctionImplementation &FI) { - const auto &Ctx = FI.Ctx; - Stream << "static " << Ctx.FunctionReturnType << ' ' << FI.Name - << Ctx.FunctionArgs << " {\n"; - Stream << kIndent << "using namespace LIBC_NAMESPACE::" << FI.ElementClass - << ";\n"; - for (const auto &I : FI.Individuals) - if (I.Element.Size == 0) - Stream << Zero{Ctx.DefaultReturnValue}; - else - Stream << IfEq{Ctx.ElementOp, Ctx.FixedSizeArgs, I}; - for (const auto &O : FI.Overlaps) - Stream << IfLt{Ctx.ElementOp, Ctx.RuntimeSizeArgs, O}; - if (const auto &C = FI.Loop) - Stream << IfLt{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C}; - if (const auto &C = FI.AlignedLoop) - Stream << IfLt{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C}; - if (const auto &C = FI.Accelerator) - Stream << IfLt{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C}; - return Stream << "}\n"; -} - -// Turns a `NamedFunctionDescriptor` into a `FunctionImplementation` unfolding -// the contiguous and overlap region into several statements. The zero case is -// also mapped to its own type. -static FunctionImplementation -getImplementation(const NamedFunctionDescriptor &NamedFD) { - const FunctionDescriptor &FD = NamedFD.Desc; - FunctionImplementation Impl; - Impl.Ctx = getCtx(FD.Type); - Impl.Name = NamedFD.Name; - Impl.ElementClass = FD.ElementClass; - if (auto C = FD.Contiguous) - for (size_t I = C->Span.Begin; I < C->Span.End; ++I) - Impl.Individuals.push_back(Individual{I, ElementType{I}}); - if (auto C = FD.Overlap) - for (size_t I = C->Span.Begin; I < C->Span.End; I *= 2) - Impl.Overlaps.push_back(Overlap{2 * I, ElementType{I}}); - if (const auto &L = FD.Loop) - Impl.Loop = Loop{L->Span.End, ElementType{L->BlockSize}}; - if (const auto &AL = FD.AlignedLoop) - Impl.AlignedLoop = - AlignedLoop{AL->Loop.Span.End, ElementType{AL->Loop.BlockSize}, - ElementType{AL->Alignment}, getAligntoString(AL->AlignTo)}; - if (const auto &A = FD.Accelerator) - Impl.Accelerator = Accelerator{A->Span.End}; - return Impl; -} - -static void Serialize(raw_ostream &Stream, - ArrayRef Descriptors) { - - for (const auto &FD : Descriptors) - Stream << getImplementation(FD); -} - -} // namespace functions - -namespace descriptors { - -// This namespace generates the getFunctionDescriptors function: -// ------------------------------------------------------------- -// e.g. -// ArrayRef getFunctionDescriptors() { -// static constexpr NamedFunctionDescriptor kDescriptors[] = { -// {"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,std::nullopt,std::nullopt,std::nullopt,std::nullopt,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}}, -// {"memcpy_0x8661D80472487AB5",{FunctionType::MEMCPY,Contiguous{{0,1}},std::nullopt,std::nullopt,std::nullopt,Accelerator{{1,kMaxSize}},ElementTypeClass::NATIVE}}, -// ... -// }; -// return ArrayRef(kDescriptors); -// } - -static raw_ostream &operator<<(raw_ostream &Stream, const SizeSpan &SS) { - Stream << "{" << SS.Begin << ','; - if (SS.End == kMaxSize) - Stream << "kMaxSize"; - else - Stream << SS.End; - return Stream << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Contiguous &O) { - return Stream << "Contiguous{" << O.Span << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Overlap &O) { - return Stream << "Overlap{" << O.Span << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Loop &O) { - return Stream << "Loop{" << O.Span << ',' << O.BlockSize << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const AlignArg &O) { - switch (O) { - case AlignArg::_1: - return Stream << "AlignArg::_1"; - case AlignArg::_2: - return Stream << "AlignArg::_2"; - case AlignArg::ARRAY_SIZE: - report_fatal_error("logic error"); - } -} -static raw_ostream &operator<<(raw_ostream &Stream, const AlignedLoop &O) { - return Stream << "AlignedLoop{" << O.Loop << ',' << O.Alignment << ',' - << O.AlignTo << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const Accelerator &O) { - return Stream << "Accelerator{" << O.Span << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, const ElementTypeClass &O) { - switch (O) { - case ElementTypeClass::SCALAR: - return Stream << "ElementTypeClass::SCALAR"; - case ElementTypeClass::BUILTIN: - return Stream << "ElementTypeClass::BUILTIN"; - case ElementTypeClass::NATIVE: - return Stream << "ElementTypeClass::NATIVE"; - } -} -static raw_ostream &operator<<(raw_ostream &Stream, const FunctionType &T) { - switch (T) { - case FunctionType::MEMCPY: - return Stream << "FunctionType::MEMCPY"; - case FunctionType::MEMCMP: - return Stream << "FunctionType::MEMCMP"; - case FunctionType::BCMP: - return Stream << "FunctionType::BCMP"; - case FunctionType::MEMSET: - return Stream << "FunctionType::MEMSET"; - case FunctionType::BZERO: - return Stream << "FunctionType::BZERO"; - } -} -template -static raw_ostream &operator<<(raw_ostream &Stream, - const std::optional &MaybeT) { - if (MaybeT) - return Stream << *MaybeT; - return Stream << "std::nullopt"; -} -static raw_ostream &operator<<(raw_ostream &Stream, - const FunctionDescriptor &FD) { - return Stream << '{' << FD.Type << ',' << FD.Contiguous << ',' << FD.Overlap - << ',' << FD.Loop << ',' << FD.AlignedLoop << ',' - << FD.Accelerator << ',' << FD.ElementClass << '}'; -} -static raw_ostream &operator<<(raw_ostream &Stream, - const NamedFunctionDescriptor &NFD) { - return Stream << '{' << '"' << NFD.Name << '"' << ',' << NFD.Desc << '}'; -} -template -static raw_ostream &operator<<(raw_ostream &Stream, - const std::vector &VectorT) { - Stream << '{'; - bool First = true; - for (const auto &Obj : VectorT) { - if (!First) - Stream << ','; - Stream << Obj; - First = false; - } - return Stream << '}'; -} - -static void Serialize(raw_ostream &Stream, - ArrayRef Descriptors) { - Stream << R"(ArrayRef getFunctionDescriptors() { - static constexpr NamedFunctionDescriptor kDescriptors[] = { -)"; - for (size_t I = 0, E = Descriptors.size(); I < E; ++I) { - Stream << kIndent << kIndent << Descriptors[I] << ",\n"; - } - Stream << R"( }; - return ArrayRef(kDescriptors); -} -)"; -} - -} // namespace descriptors - -namespace configurations { - -// This namespace generates the getXXXConfigurations functions: -// ------------------------------------------------------------ -// e.g. -// llvm::ArrayRef getMemcpyConfigurations() { -// using namespace LIBC_NAMESPACE; -// static constexpr MemcpyConfiguration kConfigurations[] = { -// {Wrap, "memcpy_0xE00E29EE73994E2B"}, -// {Wrap, "memcpy_0x8661D80472487AB5"}, -// ... -// }; -// return llvm::ArrayRef(kConfigurations); -// } - -// The `Wrap` template function is provided in the `Main` function below. -// It is used to adapt the gnerated code to the prototype of the C function. -// For instance, the generated code for a `memcpy` takes `char*` pointers and -// returns nothing but the original C `memcpy` function take and returns `void*` -// pointers. - -struct FunctionName { - FunctionType ForType; -}; - -struct ReturnType { - FunctionType ForType; -}; - -struct Configuration { - FunctionName Name; - ReturnType Type; - std::vector Descriptors; -}; - -static raw_ostream &operator<<(raw_ostream &Stream, const FunctionName &FN) { - switch (FN.ForType) { - case FunctionType::MEMCPY: - return Stream << "getMemcpyConfigurations"; - case FunctionType::MEMCMP: - return Stream << "getMemcmpConfigurations"; - case FunctionType::BCMP: - return Stream << "getBcmpConfigurations"; - case FunctionType::MEMSET: - return Stream << "getMemsetConfigurations"; - case FunctionType::BZERO: - return Stream << "getBzeroConfigurations"; - } -} - -static raw_ostream &operator<<(raw_ostream &Stream, const ReturnType &RT) { - switch (RT.ForType) { - case FunctionType::MEMCPY: - return Stream << "MemcpyConfiguration"; - case FunctionType::MEMCMP: - case FunctionType::BCMP: - return Stream << "MemcmpOrBcmpConfiguration"; - case FunctionType::MEMSET: - return Stream << "MemsetConfiguration"; - case FunctionType::BZERO: - return Stream << "BzeroConfiguration"; - } -} - -static raw_ostream &operator<<(raw_ostream &Stream, - const NamedFunctionDescriptor *FD) { - return Stream << formatv("{Wrap<{0}>, \"{0}\"}", FD->Name); -} - -static raw_ostream & -operator<<(raw_ostream &Stream, - const std::vector &Descriptors) { - for (size_t I = 0, E = Descriptors.size(); I < E; ++I) - Stream << kIndent << kIndent << Descriptors[I] << ",\n"; - return Stream; -} - -static raw_ostream &operator<<(raw_ostream &Stream, const Configuration &C) { - Stream << "llvm::ArrayRef<" << C.Type << "> " << C.Name << "() {\n"; - if (C.Descriptors.empty()) - Stream << kIndent << "return {};\n"; - else { - Stream << kIndent << "using namespace LIBC_NAMESPACE;\n"; - Stream << kIndent << "static constexpr " << C.Type - << " kConfigurations[] = {\n"; - Stream << C.Descriptors; - Stream << kIndent << "};\n"; - Stream << kIndent << "return llvm::ArrayRef(kConfigurations);\n"; - } - Stream << "}\n"; - return Stream; -} - -static void Serialize(raw_ostream &Stream, FunctionType FT, - ArrayRef Descriptors) { - Configuration Conf; - Conf.Name = {FT}; - Conf.Type = {FT}; - for (const auto &FD : Descriptors) - if (FD.Desc.Type == FT) - Conf.Descriptors.push_back(&FD); - Stream << Conf; -} - -} // namespace configurations -static void Serialize(raw_ostream &Stream, - ArrayRef Descriptors) { - Stream << "// This file is auto-generated by libc/benchmarks/automemcpy.\n"; - Stream << "// Functions : " << Descriptors.size() << "\n"; - Stream << "\n"; - Stream << "#include \"LibcFunctionPrototypes.h\"\n"; - Stream << "#include \"automemcpy/FunctionDescriptor.h\"\n"; - Stream << "#include \"src/string/memory_utils/elements.h\"\n"; - Stream << "\n"; - Stream << "using llvm::libc_benchmarks::BzeroConfiguration;\n"; - Stream << "using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;\n"; - Stream << "using llvm::libc_benchmarks::MemcpyConfiguration;\n"; - Stream << "using llvm::libc_benchmarks::MemmoveConfiguration;\n"; - Stream << "using llvm::libc_benchmarks::MemsetConfiguration;\n"; - Stream << "\n"; - Stream << "namespace LIBC_NAMESPACE_DECL {\n"; - Stream << "\n"; - codegen::functions::Serialize(Stream, Descriptors); - Stream << "\n"; - Stream << "} // namespace LIBC_NAMESPACE_DECL\n"; - Stream << "\n"; - Stream << "namespace llvm {\n"; - Stream << "namespace automemcpy {\n"; - Stream << "\n"; - codegen::descriptors::Serialize(Stream, Descriptors); - Stream << "\n"; - Stream << "} // namespace automemcpy\n"; - Stream << "} // namespace llvm\n"; - Stream << "\n"; - Stream << R"( -using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t); -template -void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) { - Foo(reinterpret_cast(dst), - reinterpret_cast(src), size); - return dst; -} -)"; - codegen::configurations::Serialize(Stream, FunctionType::MEMCPY, Descriptors); - Stream << R"( -using MemcmpStub = int (*)(const char *, const char *, size_t); -template -int Wrap(const void *lhs, const void *rhs, size_t size) { - return Foo(reinterpret_cast(lhs), - reinterpret_cast(rhs), size); -} -)"; - codegen::configurations::Serialize(Stream, FunctionType::MEMCMP, Descriptors); - codegen::configurations::Serialize(Stream, FunctionType::BCMP, Descriptors); - Stream << R"( -using MemsetStub = void (*)(char *, int, size_t); -template void *Wrap(void *dst, int value, size_t size) { - Foo(reinterpret_cast(dst), value, size); - return dst; -} -)"; - codegen::configurations::Serialize(Stream, FunctionType::MEMSET, Descriptors); - Stream << R"( -using BzeroStub = void (*)(char *, size_t); -template void Wrap(void *dst, size_t size) { - Foo(reinterpret_cast(dst), size); -} -)"; - codegen::configurations::Serialize(Stream, FunctionType::BZERO, Descriptors); - Stream << R"( -llvm::ArrayRef getMemmoveConfigurations() { - return {}; -} -)"; - Stream << "// Functions : " << Descriptors.size() << "\n"; -} - -} // namespace codegen - -// Stores `VolatileStr` into a cache and returns a StringRef of the cached -// version. -StringRef getInternalizedString(std::string VolatileStr) { - static llvm::StringSet StringCache; - return StringCache.insert(std::move(VolatileStr)).first->getKey(); -} - -static StringRef getString(FunctionType FT) { - switch (FT) { - case FunctionType::MEMCPY: - return "memcpy"; - case FunctionType::MEMCMP: - return "memcmp"; - case FunctionType::BCMP: - return "bcmp"; - case FunctionType::MEMSET: - return "memset"; - case FunctionType::BZERO: - return "bzero"; - } -} - -void Serialize(raw_ostream &Stream, ArrayRef Descriptors) { - std::vector FunctionDescriptors; - FunctionDescriptors.reserve(Descriptors.size()); - for (auto &FD : Descriptors) { - FunctionDescriptors.emplace_back(); - FunctionDescriptors.back().Name = getInternalizedString( - formatv("{0}_{1:X16}", getString(FD.Type), FD.id())); - FunctionDescriptors.back().Desc = std::move(FD); - } - // Sort functions so they are easier to spot in the generated C++ file. - std::sort(FunctionDescriptors.begin(), FunctionDescriptors.end(), - [](const NamedFunctionDescriptor &A, - const NamedFunctionDescriptor &B) { return A.Desc < B.Desc; }); - codegen::Serialize(Stream, FunctionDescriptors); -} - -} // namespace automemcpy -} // namespace llvm diff --git a/libc/benchmarks/automemcpy/lib/CodeGenMain.cpp b/libc/benchmarks/automemcpy/lib/CodeGenMain.cpp deleted file mode 100644 index 3f4e6fc0423a1..0000000000000 --- a/libc/benchmarks/automemcpy/lib/CodeGenMain.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "automemcpy/CodeGen.h" -#include "automemcpy/RandomFunctionGenerator.h" -#include -#include - -namespace llvm { -namespace automemcpy { - -std::vector generateFunctionDescriptors() { - std::unordered_set Seen; - std::vector FunctionDescriptors; - RandomFunctionGenerator P; - while (std::optional MaybeFD = P.next()) { - FunctionDescriptor FD = *MaybeFD; - if (Seen.count(FD)) // FIXME: Z3 sometimes returns twice the same object. - continue; - Seen.insert(FD); - FunctionDescriptors.push_back(std::move(FD)); - } - return FunctionDescriptors; -} - -} // namespace automemcpy -} // namespace llvm - -int main(int, char **) { - llvm::automemcpy::Serialize(llvm::outs(), - llvm::automemcpy::generateFunctionDescriptors()); -} diff --git a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp b/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp deleted file mode 100644 index f438e2a405bd0..0000000000000 --- a/libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp +++ /dev/null @@ -1,280 +0,0 @@ -//===-- Generate random but valid function descriptors -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "automemcpy/RandomFunctionGenerator.h" - -#include -#include - -#include -#include - -namespace llvm { -namespace automemcpy { - -// Exploration parameters -// ---------------------- -// Here we define a set of values that will contraint the exploration and -// limit combinatorial explosion. - -// We limit the number of cases for individual sizes to sizes up to 4. -// More individual sizes don't bring much over the overlapping strategy. -static constexpr int kMaxIndividualSize = 4; - -// We limit Overlapping Strategy to sizes up to 256. -// An overlap of 256B means accessing 128B at once which is usually not -// feasible by current CPUs. We rely on the compiler to generate multiple -// loads/stores if needed but higher sizes are unlikely to benefit from hardware -// acceleration. -static constexpr int kMaxOverlapSize = 256; - -// For the loop strategies, we make sure that they iterate at least a certain -// number of times to amortize the cost of looping. -static constexpr int kLoopMinIter = 3; -static constexpr int kAlignedLoopMinIter = 2; - -// We restrict the size of the block of data to handle in a loop. -// Generally speaking block size <= 16 perform poorly. -static constexpr int kLoopBlockSize[] = {16, 32, 64}; - -// We restrict alignment to the following values. -static constexpr int kLoopAlignments[] = {16, 32, 64}; - -// We make sure that the region bounds are one of the following values. -static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, - 64, 96, 128, 256, 512, 1024, kMaxSize}; - -// We also allow disabling loops, aligned loops and accelerators. -static constexpr bool kDisableLoop = false; -static constexpr bool kDisableAlignedLoop = false; -static constexpr bool kDisableAccelerator = false; - -// For memcpy, we can also explore whether aligning on source or destination has -// an effect. -static constexpr bool kExploreAlignmentArg = true; - -// The function we generate code for. -// BCMP is specifically disabled for now. -static constexpr int kFunctionTypes[] = { - (int)FunctionType::MEMCPY, - (int)FunctionType::MEMCMP, - // (int)FunctionType::BCMP, - (int)FunctionType::MEMSET, - (int)FunctionType::BZERO, -}; - -// The actual implementation of each function can be handled via primitive types -// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). -// We want to move toward delegating the code generation entirely to the -// compiler but for now we have to make use of -per microarchitecture- custom -// implementations. Scalar being more portable but also less performant, we -// remove it as well. -static constexpr int kElementClasses[] = { - // (int)ElementTypeClass::SCALAR, - (int)ElementTypeClass::NATIVE, - // (int)ElementTypeClass::BUILTIN -}; - -RandomFunctionGenerator::RandomFunctionGenerator() - : Solver(Context), Type(Context.int_const("Type")), - ContiguousBegin(Context.int_const("ContiguousBegin")), - ContiguousEnd(Context.int_const("ContiguousEnd")), - OverlapBegin(Context.int_const("OverlapBegin")), - OverlapEnd(Context.int_const("OverlapEnd")), - LoopBegin(Context.int_const("LoopBegin")), - LoopEnd(Context.int_const("LoopEnd")), - LoopBlockSize(Context.int_const("LoopBlockSize")), - AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), - AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), - AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), - AlignedAlignment(Context.int_const("AlignedAlignment")), - AlignedArg(Context.int_const("AlignedArg")), - AcceleratorBegin(Context.int_const("AcceleratorBegin")), - AcceleratorEnd(Context.int_const("AcceleratorEnd")), - ElementClass(Context.int_const("ElementClass")) { - // All possible functions. - Solver.add(inSetConstraint(Type, kFunctionTypes)); - - // Add constraints for region bounds. - addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); - addBoundsAndAnchors(OverlapBegin, OverlapEnd); - addBoundsAndAnchors(LoopBegin, LoopEnd); - addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); - addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); - // We always consider strategies in this order, and we - // always end with the `Accelerator` strategy, as it's typically more - // efficient for large sizes. - // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator - Solver.add(ContiguousEnd == OverlapBegin); - Solver.add(OverlapEnd == LoopBegin); - Solver.add(LoopEnd == AlignedLoopBegin); - Solver.add(AlignedLoopEnd == AcceleratorBegin); - // Fix endpoints: The minimum size that we want to copy is 0, and we always - // start with the `Contiguous` strategy. The max size is `kMaxSize`. - Solver.add(ContiguousBegin == 0); - Solver.add(AcceleratorEnd == kMaxSize); - // Contiguous - Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); - // Overlap - Solver.add(OverlapEnd <= kMaxOverlapSize + 1); - // Overlap only ever makes sense when accessing multiple bytes at a time. - // i.e. Overlap<1> is useless. - Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); - // Loop - addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); - // Aligned Loop - addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, - kAlignedLoopMinIter); - Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); - Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); - Solver.add(AlignedLoopBlockSize >= AlignedAlignment); - Solver.add(AlignedLoopBlockSize >= LoopBlockSize); - z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; - z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; - Solver.add( - (ExploreAlignment && - inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || - (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); - // Accelerator - Solver.add(IsMemcpy || - (AcceleratorBegin == - AcceleratorEnd)); // Only Memcpy has accelerator for now. - // Element classes - Solver.add(inSetConstraint(ElementClass, kElementClasses)); - - if (kDisableLoop) - Solver.add(LoopBegin == LoopEnd); - if (kDisableAlignedLoop) - Solver.add(AlignedLoopBegin == AlignedLoopEnd); - if (kDisableAccelerator) - Solver.add(AcceleratorBegin == AcceleratorEnd); -} - -// Creates SizeSpan from Begin/End values. -// Returns std::nullopt if Begin==End. -static std::optional AsSizeSpan(size_t Begin, size_t End) { - if (Begin == End) - return std::nullopt; - SizeSpan SS; - SS.Begin = Begin; - SS.End = End; - return SS; -} - -// Generic method to create a `Region` struct with a Span or std::nullopt if -// span is empty. -template -static std::optional As(size_t Begin, size_t End) { - if (auto Span = AsSizeSpan(Begin, End)) { - Region Output; - Output.Span = *Span; - return Output; - } - return std::nullopt; -} - -// Returns a Loop struct or std::nullopt if span is empty. -static std::optional AsLoop(size_t Begin, size_t End, size_t BlockSize) { - if (auto Span = AsSizeSpan(Begin, End)) { - Loop Output; - Output.Span = *Span; - Output.BlockSize = BlockSize; - return Output; - } - return std::nullopt; -} - -// Returns an AlignedLoop struct or std::nullopt if span is empty. -static std::optional AsAlignedLoop(size_t Begin, size_t End, - size_t BlockSize, - size_t Alignment, - AlignArg AlignTo) { - if (auto Loop = AsLoop(Begin, End, BlockSize)) { - AlignedLoop Output; - Output.Loop = *Loop; - Output.Alignment = Alignment; - Output.AlignTo = AlignTo; - return Output; - } - return std::nullopt; -} - -std::optional RandomFunctionGenerator::next() { - if (Solver.check() != z3::sat) - return {}; - - z3::model m = Solver.get_model(); - - // Helper method to get the current numerical value of a z3::expr. - const auto E = [&m](z3::expr &V) -> int { - return m.eval(V).get_numeral_int(); - }; - - // Fill is the function descriptor to return. - FunctionDescriptor R; - R.Type = FunctionType(E(Type)); - R.Contiguous = As(E(ContiguousBegin), E(ContiguousEnd)); - R.Overlap = As(E(OverlapBegin), E(OverlapEnd)); - R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); - R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), - E(AlignedLoopBlockSize), E(AlignedAlignment), - AlignArg(E(AlignedArg))); - R.Accelerator = As(E(AcceleratorBegin), E(AcceleratorEnd)); - R.ElementClass = ElementTypeClass(E(ElementClass)); - - // Express current state as a set of constraints. - z3::expr CurrentLayout = - (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && - (ContiguousEnd == E(ContiguousEnd)) && - (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && - (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && - (LoopBlockSize == E(LoopBlockSize)) && - (AlignedLoopBegin == E(AlignedLoopBegin)) && - (AlignedLoopEnd == E(AlignedLoopEnd)) && - (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && - (AlignedAlignment == E(AlignedAlignment)) && - (AlignedArg == E(AlignedArg)) && - (AcceleratorBegin == E(AcceleratorBegin)) && - (AcceleratorEnd == E(AcceleratorEnd)) && - (ElementClass == E(ElementClass)); - - // Ask solver to never show this configuration ever again. - Solver.add(!CurrentLayout); - return R; -} - -// Make sure `Variable` is one of the provided values. -z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, - ArrayRef Values) const { - z3::expr_vector Args(Variable.ctx()); - for (int Value : Values) - Args.push_back(Variable == Value); - return z3::mk_or(Args); -} - -void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, - z3::expr &End) { - // Begin and End are picked amongst a set of predefined values. - Solver.add(inSetConstraint(Begin, kAnchors)); - Solver.add(inSetConstraint(End, kAnchors)); - Solver.add(Begin >= 0); - Solver.add(Begin <= End); - Solver.add(End <= kMaxSize); -} - -void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, - const z3::expr &LoopEnd, - z3::expr &LoopBlockSize, - int LoopMinIter) { - Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); - Solver.add(LoopBegin == LoopEnd || - (LoopBegin > (LoopMinIter * LoopBlockSize))); -} - -} // namespace automemcpy -} // namespace llvm diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp deleted file mode 100644 index b134f6c83a0df..0000000000000 --- a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp +++ /dev/null @@ -1,204 +0,0 @@ -//===-- Analyze benchmark JSON files --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// This code analyzes the json file produced by the `automemcpy` binary. -// -// As a remainder, `automemcpy` will benchmark each autogenerated memory -// functions against one of the predefined distributions available in the -// `libc/benchmarks/distributions` folder. -// -// It works as follows: -// - Reads one or more json files. -// - If there are several runs for the same function and distribution, picks the -// median throughput (aka `BytesPerSecond`). -// - Aggregates the throughput per distributions and scores them from worst (0) -// to best (1). -// - Each distribution categorizes each function into one of the following -// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, -// BAD. -// - A process similar to the Majority Judgment voting system is used to `elect` -// the best function. The histogram of grades is returned so we can -// distinguish between functions with the same final grade. In the following -// example both functions grade EXCELLENT but we may prefer the second one. -// -// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... -// |------------|-----------|-----------|------|----------| ... -// | Function_1 | 7 | 1 | 2 | | ... -// | Function_2 | 6 | 4 | | | ... - -#include "automemcpy/ResultAnalyzer.h" -#include "llvm/ADT/StringRef.h" -#include -#include - -namespace llvm { - -namespace automemcpy { - -StringRef Grade::getString(const GradeEnum &GE) { - switch (GE) { - case EXCELLENT: - return "EXCELLENT"; - case VERY_GOOD: - return "VERY_GOOD"; - case GOOD: - return "GOOD"; - case PASSABLE: - return "PASSABLE"; - case INADEQUATE: - return "INADEQUATE"; - case MEDIOCRE: - return "MEDIOCRE"; - case BAD: - return "BAD"; - case ARRAY_SIZE: - report_fatal_error("logic error"); - } -} - -Grade::GradeEnum Grade::judge(double Score) { - if (Score >= 6. / 7) - return EXCELLENT; - if (Score >= 5. / 7) - return VERY_GOOD; - if (Score >= 4. / 7) - return GOOD; - if (Score >= 3. / 7) - return PASSABLE; - if (Score >= 2. / 7) - return INADEQUATE; - if (Score >= 1. / 7) - return MEDIOCRE; - return BAD; -} - -static double computeUnbiasedSampleVariance(const std::vector &Samples, - const double SampleMean) { - assert(!Samples.empty()); - if (Samples.size() == 1) - return 0; - double DiffSquaresSum = 0; - for (const double S : Samples) { - const double Diff = S - SampleMean; - DiffSquaresSum += Diff * Diff; - } - return DiffSquaresSum / (Samples.size() - 1); -} - -static void processPerDistributionData(PerDistributionData &Data) { - auto &Samples = Data.BytesPerSecondSamples; - assert(!Samples.empty()); - // Sample Mean - const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); - Data.BytesPerSecondMean = Sum / Samples.size(); - // Unbiased Sample Variance - Data.BytesPerSecondVariance = - computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); - // Median - const size_t HalfSize = Samples.size() / 2; - std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); - Data.BytesPerSecondMedian = Samples[HalfSize]; -} - -std::vector getThroughputs(ArrayRef Samples) { - std::unordered_map Functions; - for (const auto &S : Samples) { - if (S.Type != SampleType::ITERATION) - break; - auto &Function = Functions[S.Id.Function]; - auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; - Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); - } - - std::vector Output; - for (auto &[FunctionId, Function] : Functions) { - Function.Id = FunctionId; - for (auto &Pair : Function.PerDistributionData) - processPerDistributionData(Pair.second); - Output.push_back(std::move(Function)); - } - return Output; -} - -void fillScores(MutableArrayRef Functions) { - // A key to bucket throughput per function type and distribution. - struct Key { - FunctionType Type; - StringRef Distribution; - - COMPARABLE_AND_HASHABLE(Key, Type, Distribution) - }; - - // Tracks minimum and maximum values. - struct MinMax { - double Min = std::numeric_limits::max(); - double Max = std::numeric_limits::min(); - void update(double Value) { - if (Value < Min) - Min = Value; - if (Value > Max) - Max = Value; - } - double normalize(double Value) const { return (Value - Min) / (Max - Min); } - }; - - std::unordered_map ThroughputMinMax; - for (const auto &Function : Functions) { - const FunctionType Type = Function.Id.Type; - for (const auto &Pair : Function.PerDistributionData) { - const auto &Distribution = Pair.getKey(); - const double Throughput = Pair.getValue().BytesPerSecondMedian; - const Key K{Type, Distribution}; - ThroughputMinMax[K].update(Throughput); - } - } - - for (auto &Function : Functions) { - const FunctionType Type = Function.Id.Type; - for (const auto &Pair : Function.PerDistributionData) { - const auto &Distribution = Pair.getKey(); - const double Throughput = Pair.getValue().BytesPerSecondMedian; - const Key K{Type, Distribution}; - Function.PerDistributionData[Distribution].Score = - ThroughputMinMax[K].normalize(Throughput); - } - } -} - -void castVotes(MutableArrayRef Functions) { - for (FunctionData &Function : Functions) { - Function.ScoresGeoMean = 1.0; - for (const auto &Pair : Function.PerDistributionData) { - const StringRef Distribution = Pair.getKey(); - const double Score = Pair.getValue().Score; - Function.ScoresGeoMean *= Score; - const auto G = Grade::judge(Score); - ++(Function.GradeHisto[G]); - Function.PerDistributionData[Distribution].Grade = G; - } - } - - for (FunctionData &Function : Functions) { - const auto &GradeHisto = Function.GradeHisto; - const size_t Votes = - std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); - const size_t MedianVote = Votes / 2; - size_t CountedVotes = 0; - Grade::GradeEnum MedianGrade = Grade::BAD; - for (size_t I = 0; I < GradeHisto.size(); ++I) { - CountedVotes += GradeHisto[I]; - if (CountedVotes > MedianVote) { - MedianGrade = Grade::GradeEnum(I); - break; - } - } - Function.FinalGrade = MedianGrade; - } -} - -} // namespace automemcpy -} // namespace llvm diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp deleted file mode 100644 index 00eef73a3f38a..0000000000000 --- a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp +++ /dev/null @@ -1,175 +0,0 @@ -//===-- Application to analyze benchmark JSON files -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "automemcpy/ResultAnalyzer.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/JSON.h" -#include "llvm/Support/MemoryBuffer.h" - -namespace llvm { - -// User can specify one or more json filenames to process on the command line. -static cl::list InputFilenames(cl::Positional, cl::OneOrMore, - cl::desc("")); - -// User can filter the distributions to be taken into account. -static cl::list - KeepOnlyDistributions("keep-only-distributions", - cl::desc("")); - -namespace automemcpy { - -// This is defined in the autogenerated 'Implementations.cpp' file. -extern ArrayRef getFunctionDescriptors(); - -// Iterates over all functions and fills a map of function name to function -// descriptor pointers. -static StringMap createFunctionDescriptorMap() { - StringMap Descriptors; - for (const NamedFunctionDescriptor &FD : getFunctionDescriptors()) - Descriptors.insert_or_assign(FD.Name, &FD.Desc); - return Descriptors; -} - -// Retrieves the function descriptor for a particular function name. -static const FunctionDescriptor &getFunctionDescriptor(StringRef FunctionName) { - static StringMap Descriptors = - createFunctionDescriptorMap(); - const auto *FD = Descriptors.lookup(FunctionName); - if (!FD) - report_fatal_error( - Twine("No FunctionDescriptor for ").concat(FunctionName)); - return *FD; -} - -// Functions and distributions names are stored quite a few times so it's more -// efficient to internalize these strings and refer to them through 'StringRef'. -static StringRef getInternalizedString(StringRef VolatileStr) { - static llvm::StringSet StringCache; - return StringCache.insert(VolatileStr).first->getKey(); -} - -// Helper function for the LLVM JSON API. -bool fromJSON(const json::Value &V, Sample &Out, json::Path P) { - std::string Label; - std::string RunType; - json::ObjectMapper O(V, P); - if (O && O.map("bytes_per_second", Out.BytesPerSecond) && - O.map("run_type", RunType) && O.map("label", Label)) { - const auto LabelPair = StringRef(Label).split(','); - Out.Id.Function.Name = getInternalizedString(LabelPair.first); - Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type; - Out.Id.Distribution.Name = getInternalizedString(LabelPair.second); - Out.Type = StringSwitch(RunType) - .Case("aggregate", SampleType::AGGREGATE) - .Case("iteration", SampleType::ITERATION); - return true; - } - return false; -} - -// An object to represent the content of the JSON file. -// This is easier to parse/serialize JSON when the structures of the json file -// maps the structure of the object. -struct JsonFile { - std::vector Samples; -}; - -// Helper function for the LLVM JSON API. -bool fromJSON(const json::Value &V, JsonFile &JF, json::Path P) { - json::ObjectMapper O(V, P); - return O && O.map("benchmarks", JF.Samples); -} - -// Global object to ease error reporting, it consumes errors and crash the -// application with a meaningful message. -static ExitOnError ExitOnErr; - -// Main JSON parsing method. Reads the content of the file pointed to by -// 'Filename' and returns a JsonFile object. -JsonFile parseJsonResultFile(StringRef Filename) { - auto Buf = ExitOnErr(errorOrToExpected( - MemoryBuffer::getFile(Filename, /*bool IsText=*/true, - /*RequiresNullTerminator=*/false))); - auto JsonValue = ExitOnErr(json::parse(Buf->getBuffer())); - json::Path::Root Root; - JsonFile JF; - if (!fromJSON(JsonValue, JF, Root)) - ExitOnErr(Root.getError()); - return JF; -} - -// Serializes the 'GradeHisto' to the provided 'Stream'. -static void Serialize(raw_ostream &Stream, const GradeHistogram &GH) { - static constexpr std::array kCharacters = { - " ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}; - - const size_t Max = *std::max_element(GH.begin(), GH.end()); - for (size_t I = 0; I < GH.size(); ++I) { - size_t Index = (float(GH[I]) / Max) * (kCharacters.size() - 1); - Stream << kCharacters.at(Index); - } -} - -int Main(int argc, char **argv) { - ExitOnErr.setBanner("Automemcpy Json Results Analyzer stopped with error: "); - cl::ParseCommandLineOptions(argc, argv, "Automemcpy Json Results Analyzer\n"); - - // Reads all samples stored in the input JSON files. - std::vector Samples; - for (const auto &Filename : InputFilenames) { - auto Result = parseJsonResultFile(Filename); - llvm::append_range(Samples, Result.Samples); - } - - if (!KeepOnlyDistributions.empty()) { - llvm::StringSet ValidDistributions; - ValidDistributions.insert(KeepOnlyDistributions.begin(), - KeepOnlyDistributions.end()); - llvm::erase_if(Samples, [&ValidDistributions](const Sample &S) { - return !ValidDistributions.contains(S.Id.Distribution.Name); - }); - } - - // Extracts median of throughputs. - std::vector Functions = getThroughputs(Samples); - fillScores(Functions); - castVotes(Functions); - - // Present data by function type, Grade and Geomean of scores. - std::sort(Functions.begin(), Functions.end(), - [](const FunctionData &A, const FunctionData &B) { - const auto Less = [](const FunctionData &FD) { - return std::make_tuple(FD.Id.Type, FD.FinalGrade, - -FD.ScoresGeoMean); - }; - return Less(A) < Less(B); - }); - - // Print result. - for (const FunctionData &Function : Functions) { - outs() << formatv("{0,-10}", Grade::getString(Function.FinalGrade)); - outs() << " |"; - Serialize(outs(), Function.GradeHisto); - outs() << "| "; - outs().resetColor(); - outs() << formatv("{0,+25}", Function.Id.Name); - outs() << "\n"; - } - - return EXIT_SUCCESS; -} - -} // namespace automemcpy -} // namespace llvm - -int main(int argc, char **argv) { return llvm::automemcpy::Main(argc, argv); } diff --git a/libc/benchmarks/automemcpy/unittests/CMakeLists.txt b/libc/benchmarks/automemcpy/unittests/CMakeLists.txt deleted file mode 100644 index 35caaac1519ba..0000000000000 --- a/libc/benchmarks/automemcpy/unittests/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_libc_benchmark_unittest(libc-automemcpy-codegen-test - SRCS CodeGenTest.cpp - DEPENDS automemcpy_codegen -) - -add_libc_benchmark_unittest(libc-automemcpy-result-analyzer-test - SRCS ResultAnalyzerTest.cpp - DEPENDS automemcpy_result_analyzer_lib -) diff --git a/libc/benchmarks/automemcpy/unittests/CodeGenTest.cpp b/libc/benchmarks/automemcpy/unittests/CodeGenTest.cpp deleted file mode 100644 index a7fc8570a73b0..0000000000000 --- a/libc/benchmarks/automemcpy/unittests/CodeGenTest.cpp +++ /dev/null @@ -1,226 +0,0 @@ -//===-- Automemcpy CodeGen Test -------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "automemcpy/CodeGen.h" -#include "automemcpy/RandomFunctionGenerator.h" -#include "src/__support/macros/config.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include - -using testing::AllOf; -using testing::AnyOf; -using testing::ElementsAre; -using testing::Ge; -using testing::Gt; -using testing::Le; -using testing::Lt; - -namespace llvm { -namespace automemcpy { -namespace { - -TEST(Automemcpy, Codegen) { - static constexpr FunctionDescriptor kDescriptors[] = { - {FunctionType::MEMCPY, std::nullopt, std::nullopt, std::nullopt, std::nullopt, - Accelerator{{0, kMaxSize}}, ElementTypeClass::NATIVE}, - {FunctionType::MEMCPY, Contiguous{{0, 4}}, Overlap{{4, 256}}, - Loop{{256, kMaxSize}, 64}, std::nullopt, std::nullopt, - ElementTypeClass::NATIVE}, - {FunctionType::MEMCMP, Contiguous{{0, 2}}, Overlap{{2, 64}}, std::nullopt, - AlignedLoop{Loop{{64, kMaxSize}, 16}, 16, AlignArg::_1}, std::nullopt, - ElementTypeClass::NATIVE}, - {FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, std::nullopt, - AlignedLoop{Loop{{256, kMaxSize}, 32}, 16, AlignArg::_1}, std::nullopt, - ElementTypeClass::NATIVE}, - {FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, std::nullopt, - AlignedLoop{Loop{{256, kMaxSize}, 32}, 32, AlignArg::_1}, std::nullopt, - ElementTypeClass::NATIVE}, - {FunctionType::BZERO, Contiguous{{0, 4}}, Overlap{{4, 128}}, std::nullopt, - AlignedLoop{Loop{{128, kMaxSize}, 32}, 32, AlignArg::_1}, std::nullopt, - ElementTypeClass::NATIVE}, - }; - - std::string Output; - raw_string_ostream OutputStream(Output); - Serialize(OutputStream, kDescriptors); - - EXPECT_STREQ(Output.c_str(), - R"(// This file is auto-generated by libc/benchmarks/automemcpy. -// Functions : 6 - -#include "LibcFunctionPrototypes.h" -#include "automemcpy/FunctionDescriptor.h" -#include "src/string/memory_utils/elements.h" - -using llvm::libc_benchmarks::BzeroConfiguration; -using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration; -using llvm::libc_benchmarks::MemcpyConfiguration; -using llvm::libc_benchmarks::MemmoveConfiguration; -using llvm::libc_benchmarks::MemsetConfiguration; - -namespace LIBC_NAMESPACE_DECL { - -static void memcpy_0xE00E29EE73994E2B(char *__restrict dst, const char *__restrict src, size_t size) { - using namespace LIBC_NAMESPACE::x86; - return copy(dst, src, size); -} -static void memcpy_0x7381B60C7BE75EF9(char *__restrict dst, const char *__restrict src, size_t size) { - using namespace LIBC_NAMESPACE::x86; - if(size == 0) return; - if(size == 1) return copy<_1>(dst, src); - if(size == 2) return copy<_2>(dst, src); - if(size == 3) return copy<_3>(dst, src); - if(size < 8) return copy>(dst, src, size); - if(size < 16) return copy>(dst, src, size); - if(size < 32) return copy>(dst, src, size); - if(size < 64) return copy>(dst, src, size); - if(size < 128) return copy>(dst, src, size); - if(size < 256) return copy>(dst, src, size); - return copy>(dst, src, size); -} -static int memcmp_0x348D7BA6DB0EE033(const char * lhs, const char * rhs, size_t size) { - using namespace LIBC_NAMESPACE::x86; - if(size == 0) return 0; - if(size == 1) return three_way_compare<_1>(lhs, rhs); - if(size < 4) return three_way_compare>(lhs, rhs, size); - if(size < 8) return three_way_compare>(lhs, rhs, size); - if(size < 16) return three_way_compare>(lhs, rhs, size); - if(size < 32) return three_way_compare>(lhs, rhs, size); - if(size < 64) return three_way_compare>(lhs, rhs, size); - return three_way_compare::Then>>(lhs, rhs, size); -} -static void memset_0x71E761699B999863(char * dst, int value, size_t size) { - using namespace LIBC_NAMESPACE::x86; - if(size == 0) return; - if(size == 1) return splat_set<_1>(dst, value); - if(size < 4) return splat_set>(dst, value, size); - if(size < 8) return splat_set>(dst, value, size); - if(size < 16) return splat_set>(dst, value, size); - if(size < 32) return splat_set>(dst, value, size); - if(size < 64) return splat_set>(dst, value, size); - if(size < 128) return splat_set>(dst, value, size); - if(size < 256) return splat_set>(dst, value, size); - return splat_set::Then>>(dst, value, size); -} -static void memset_0x3DF0F44E2ED6A50F(char * dst, int value, size_t size) { - using namespace LIBC_NAMESPACE::x86; - if(size == 0) return; - if(size == 1) return splat_set<_1>(dst, value); - if(size < 4) return splat_set>(dst, value, size); - if(size < 8) return splat_set>(dst, value, size); - if(size < 16) return splat_set>(dst, value, size); - if(size < 32) return splat_set>(dst, value, size); - if(size < 64) return splat_set>(dst, value, size); - if(size < 128) return splat_set>(dst, value, size); - if(size < 256) return splat_set>(dst, value, size); - return splat_set::Then>>(dst, value, size); -} -static void bzero_0x475977492C218AD4(char * dst, size_t size) { - using namespace LIBC_NAMESPACE::x86; - if(size == 0) return; - if(size == 1) return splat_set<_1>(dst, 0); - if(size == 2) return splat_set<_2>(dst, 0); - if(size == 3) return splat_set<_3>(dst, 0); - if(size < 8) return splat_set>(dst, 0, size); - if(size < 16) return splat_set>(dst, 0, size); - if(size < 32) return splat_set>(dst, 0, size); - if(size < 64) return splat_set>(dst, 0, size); - if(size < 128) return splat_set>(dst, 0, size); - return splat_set::Then>>(dst, 0, size); -} - -} // namespace LIBC_NAMESPACE_DECL - -namespace llvm { -namespace automemcpy { - -ArrayRef getFunctionDescriptors() { - static constexpr NamedFunctionDescriptor kDescriptors[] = { - {"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,std::nullopt,std::nullopt,std::nullopt,std::nullopt,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}}, - {"memcpy_0x7381B60C7BE75EF9",{FunctionType::MEMCPY,Contiguous{{0,4}},Overlap{{4,256}},Loop{{256,kMaxSize},64},std::nullopt,std::nullopt,ElementTypeClass::NATIVE}}, - {"memcmp_0x348D7BA6DB0EE033",{FunctionType::MEMCMP,Contiguous{{0,2}},Overlap{{2,64}},std::nullopt,AlignedLoop{Loop{{64,kMaxSize},16},16,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}}, - {"memset_0x71E761699B999863",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},std::nullopt,AlignedLoop{Loop{{256,kMaxSize},32},16,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}}, - {"memset_0x3DF0F44E2ED6A50F",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},std::nullopt,AlignedLoop{Loop{{256,kMaxSize},32},32,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}}, - {"bzero_0x475977492C218AD4",{FunctionType::BZERO,Contiguous{{0,4}},Overlap{{4,128}},std::nullopt,AlignedLoop{Loop{{128,kMaxSize},32},32,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}}, - }; - return ArrayRef(kDescriptors); -} - -} // namespace automemcpy -} // namespace llvm - - -using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t); -template -void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) { - Foo(reinterpret_cast(dst), - reinterpret_cast(src), size); - return dst; -} -llvm::ArrayRef getMemcpyConfigurations() { - using namespace LIBC_NAMESPACE; - static constexpr MemcpyConfiguration kConfigurations[] = { - {Wrap, "memcpy_0xE00E29EE73994E2B"}, - {Wrap, "memcpy_0x7381B60C7BE75EF9"}, - }; - return llvm::ArrayRef(kConfigurations); -} - -using MemcmpStub = int (*)(const char *, const char *, size_t); -template -int Wrap(const void *lhs, const void *rhs, size_t size) { - return Foo(reinterpret_cast(lhs), - reinterpret_cast(rhs), size); -} -llvm::ArrayRef getMemcmpConfigurations() { - using namespace LIBC_NAMESPACE; - static constexpr MemcmpOrBcmpConfiguration kConfigurations[] = { - {Wrap, "memcmp_0x348D7BA6DB0EE033"}, - }; - return llvm::ArrayRef(kConfigurations); -} -llvm::ArrayRef getBcmpConfigurations() { - return {}; -} - -using MemsetStub = void (*)(char *, int, size_t); -template void *Wrap(void *dst, int value, size_t size) { - Foo(reinterpret_cast(dst), value, size); - return dst; -} -llvm::ArrayRef getMemsetConfigurations() { - using namespace LIBC_NAMESPACE; - static constexpr MemsetConfiguration kConfigurations[] = { - {Wrap, "memset_0x71E761699B999863"}, - {Wrap, "memset_0x3DF0F44E2ED6A50F"}, - }; - return llvm::ArrayRef(kConfigurations); -} - -using BzeroStub = void (*)(char *, size_t); -template void Wrap(void *dst, size_t size) { - Foo(reinterpret_cast(dst), size); -} -llvm::ArrayRef getBzeroConfigurations() { - using namespace LIBC_NAMESPACE; - static constexpr BzeroConfiguration kConfigurations[] = { - {Wrap, "bzero_0x475977492C218AD4"}, - }; - return llvm::ArrayRef(kConfigurations); -} - -llvm::ArrayRef getMemmoveConfigurations() { - return {}; -} -// Functions : 6 -)"); -} -} // namespace -} // namespace automemcpy -} // namespace llvm diff --git a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp deleted file mode 100644 index 7b67f70eb89cd..0000000000000 --- a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp +++ /dev/null @@ -1,191 +0,0 @@ -//===-- Automemcpy Json Results Analyzer Test ----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "automemcpy/ResultAnalyzer.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using testing::DoubleNear; -using testing::ElementsAre; -using testing::Pair; -using testing::SizeIs; - -namespace llvm { -namespace automemcpy { -namespace { - -TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) { - static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; - static constexpr DistributionId DistA = {{"A"}}; - static constexpr SampleId Id = {Foo1, DistA}; - static constexpr Sample kSamples[] = { - Sample{Id, SampleType::ITERATION, 4}, - Sample{Id, SampleType::AGGREGATE, -1}, // Aggegates gets discarded - }; - - const std::vector Data = getThroughputs(kSamples); - EXPECT_THAT(Data, SizeIs(1)); - EXPECT_THAT(Data[0].Id, Foo1); - EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); - // A single value is provided. - const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name); - EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4); - EXPECT_THAT(DistributionData.BytesPerSecondMean, 4); - EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0); -} - -TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) { - static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; - static constexpr DistributionId DistA = {{"A"}}; - static constexpr SampleId Id = {Foo1, DistA}; - static constexpr Sample kSamples[] = {Sample{Id, SampleType::ITERATION, 4}, - Sample{Id, SampleType::ITERATION, 5}, - Sample{Id, SampleType::ITERATION, 5}}; - - const std::vector Data = getThroughputs(kSamples); - EXPECT_THAT(Data, SizeIs(1)); - EXPECT_THAT(Data[0].Id, Foo1); - EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); - // When multiple values are provided we pick the median one (here median of 4, - // 5, 5). - const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name); - EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5); - EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1)); - EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01)); -} - -TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) { - static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; - static constexpr DistributionId DistA = {{"A"}}; - static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; - static constexpr DistributionId DistB = {{"B"}}; - static constexpr Sample kSamples[] = { - Sample{{Foo1, DistA}, SampleType::ITERATION, 1}, - Sample{{Foo1, DistB}, SampleType::ITERATION, 2}, - Sample{{Foo2, DistA}, SampleType::ITERATION, 3}, - Sample{{Foo2, DistB}, SampleType::ITERATION, 4}}; - // Data is aggregated per function. - const std::vector Data = getThroughputs(kSamples); - EXPECT_THAT(Data, SizeIs(2)); // 2 functions Foo1 and Foo2. - // Each function has data for both distributions DistA and DistB. - EXPECT_THAT(Data[0].PerDistributionData, SizeIs(2)); - EXPECT_THAT(Data[1].PerDistributionData, SizeIs(2)); -} - -TEST(AutomemcpyJsonResultsAnalyzer, getScore) { - static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; - static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; - static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY}; - static constexpr DistributionId Dist = {{"A"}}; - static constexpr Sample kSamples[] = { - Sample{{Foo1, Dist}, SampleType::ITERATION, 1}, - Sample{{Foo2, Dist}, SampleType::ITERATION, 2}, - Sample{{Foo3, Dist}, SampleType::ITERATION, 3}}; - - // Data is aggregated per function. - std::vector Data = getThroughputs(kSamples); - - // Sort Data by function name so we can test them. - std::sort( - Data.begin(), Data.end(), - [](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; }); - - EXPECT_THAT(Data[0].Id, Foo1); - EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1); - EXPECT_THAT(Data[1].Id, Foo2); - EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2); - EXPECT_THAT(Data[2].Id, Foo3); - EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3); - - // Normalizes throughput per distribution. - fillScores(Data); - EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Score, 0); - EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Score, 0.5); - EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Score, 1); -} - -TEST(AutomemcpyJsonResultsAnalyzer, castVotes) { - static constexpr double kAbsErr = 0.01; - - static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY}; - static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY}; - static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY}; - static constexpr DistributionId DistA = {{"A"}}; - static constexpr DistributionId DistB = {{"B"}}; - static constexpr Sample kSamples[] = { - Sample{{Foo1, DistA}, SampleType::ITERATION, 0}, - Sample{{Foo1, DistB}, SampleType::ITERATION, 30}, - Sample{{Foo2, DistA}, SampleType::ITERATION, 1}, - Sample{{Foo2, DistB}, SampleType::ITERATION, 100}, - Sample{{Foo3, DistA}, SampleType::ITERATION, 7}, - Sample{{Foo3, DistB}, SampleType::ITERATION, 100}, - }; - - // DistA Thoughput ranges from 0 to 7. - // DistB Thoughput ranges from 30 to 100. - - // Data is aggregated per function. - std::vector Data = getThroughputs(kSamples); - - // Sort Data by function name so we can test them. - std::sort( - Data.begin(), Data.end(), - [](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; }); - - // Normalizes throughput per distribution. - fillScores(Data); - - // Cast votes - castVotes(Data); - - EXPECT_THAT(Data[0].Id, Foo1); - EXPECT_THAT(Data[1].Id, Foo2); - EXPECT_THAT(Data[2].Id, Foo3); - - const auto GetDistData = [&Data](size_t Index, StringRef Name) { - return Data[Index].PerDistributionData.lookup(Name); - }; - - // Distribution A - // Throughput is 0, 1 and 7, so normalized scores are 0, 1/7 and 1. - EXPECT_THAT(GetDistData(0, "A").Score, DoubleNear(0, kAbsErr)); - EXPECT_THAT(GetDistData(1, "A").Score, DoubleNear(1. / 7, kAbsErr)); - EXPECT_THAT(GetDistData(2, "A").Score, DoubleNear(1, kAbsErr)); - // which are turned into grades BAD, MEDIOCRE and EXCELLENT. - EXPECT_THAT(GetDistData(0, "A").Grade, Grade::BAD); - EXPECT_THAT(GetDistData(1, "A").Grade, Grade::MEDIOCRE); - EXPECT_THAT(GetDistData(2, "A").Grade, Grade::EXCELLENT); - - // Distribution B - // Throughput is 30, 100 and 100, so normalized scores are 0, 1 and 1. - EXPECT_THAT(GetDistData(0, "B").Score, DoubleNear(0, kAbsErr)); - EXPECT_THAT(GetDistData(1, "B").Score, DoubleNear(1, kAbsErr)); - EXPECT_THAT(GetDistData(2, "B").Score, DoubleNear(1, kAbsErr)); - // which are turned into grades BAD, EXCELLENT and EXCELLENT. - EXPECT_THAT(GetDistData(0, "B").Grade, Grade::BAD); - EXPECT_THAT(GetDistData(1, "B").Grade, Grade::EXCELLENT); - EXPECT_THAT(GetDistData(2, "B").Grade, Grade::EXCELLENT); - - // Now looking from the functions point of view. - EXPECT_THAT(Data[0].ScoresGeoMean, DoubleNear(0, kAbsErr)); - EXPECT_THAT(Data[1].ScoresGeoMean, DoubleNear(1. * (1. / 7), kAbsErr)); - EXPECT_THAT(Data[2].ScoresGeoMean, DoubleNear(1, kAbsErr)); - - // Note the array is indexed by GradeEnum values (EXCELLENT=0 / BAD = 6) - EXPECT_THAT(Data[0].GradeHisto, ElementsAre(0, 0, 0, 0, 0, 0, 2)); - EXPECT_THAT(Data[1].GradeHisto, ElementsAre(1, 0, 0, 0, 0, 1, 0)); - EXPECT_THAT(Data[2].GradeHisto, ElementsAre(2, 0, 0, 0, 0, 0, 0)); - - EXPECT_THAT(Data[0].FinalGrade, Grade::BAD); - EXPECT_THAT(Data[1].FinalGrade, Grade::MEDIOCRE); - EXPECT_THAT(Data[2].FinalGrade, Grade::EXCELLENT); -} - -} // namespace -} // namespace automemcpy -} // namespace llvm diff --git a/libc/docs/dev/source_tree_layout.rst b/libc/docs/dev/source_tree_layout.rst index 0010f138317b5..bd9d6ca453e08 100644 --- a/libc/docs/dev/source_tree_layout.rst +++ b/libc/docs/dev/source_tree_layout.rst @@ -29,8 +29,7 @@ The ``benchmarks`` directory ---------------------------- The ``benchmarks`` directory contains LLVM-libc's benchmarking utilities. These -are mostly used for the memory functions. This also includes the automemcpy -subdirectory for automatic generation of optimized memory functions. +are mostly used for the memory functions. The ``config`` directory ------------------------