Skip to content

Commit bab7691

Browse files
authored
Merge pull request llvm#381 from AMD-Lightning-Internal/amd/dev/juamarti/reduced-cache-over-mainline
[COMGR][Cache] No-Source Code Cache implementation
2 parents 4919bab + f1e4b64 commit bab7691

31 files changed

+1164
-19
lines changed

amd/comgr/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ option(COMGR_BUILD_SHARED_LIBS "Build the shared library"
6969
${build_shared_libs_default})
7070

7171
set(SOURCES
72+
src/comgr-cache.cpp
73+
src/comgr-cache-command.cpp
7274
src/comgr-compiler.cpp
7375
src/comgr.cpp
7476
src/comgr-device-libs.cpp

amd/comgr/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,26 @@ These include:
125125
certain runtime headers. If this is not set, it has a default value of
126126
"${ROCM_PATH}/llvm".
127127

128+
Comgr utilizes a cache to preserve the results of compilations between executions.
129+
The cache's status (enabled/disabled), storage location for its results,
130+
and eviction policy can be manipulated through specific environment variables.
131+
If an issue arises during cache initialization, the execution will proceed with
132+
the cache turned off.
133+
134+
By default, the cache is turned off, set the environment variable
135+
`AMD_COMGR_CACHE=1` to enable it. This may change in a future release.
136+
137+
* `AMD_COMGR_CACHE`: When unset or set to 0, the cache is turned off.
138+
* `AMD_COMGR_CACHE_DIR`: When set to "", the cache is turned off. If assigned a
139+
value, that value is used as the path for cache storage. By default, it is
140+
directed to "$XDG_CACHE_HOME/comgr_cache" (which defaults to
141+
"$USER/.cache/comgr_cache" on Linux, and "%LOCALAPPDATA%\cache\comgr_cache"
142+
on Microsoft Windows).
143+
* `AMD_COMGR_CACHE_POLICY`: If assigned a value, the string is interpreted and
144+
applied to the cache pruning policy. The cache is pruned only upon program
145+
termination. The string format aligns with [Clang's ThinLTO cache pruning policy](https://clang.llvm.org/docs/ThinLTO.html#cache-pruning).
146+
The default policy is set as: "prune_interval=1h:prune_expiration=0h:cache_size=75%:cache_size_bytes=30g:cache_size_files=0".
147+
128148
Comgr also supports some environment variables to aid in debugging. These
129149
include:
130150

amd/comgr/cmake/DeviceLibs.cmake

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS})
5959
add_dependencies(amd_comgr ${AMDGCN_LIB_TARGET}_header)
6060

6161
list(APPEND TARGETS_INCLUDES "#include \"${header}\"")
62+
list(APPEND TARGETS_HEADERS "${INC_DIR}/${header}")
6263
endforeach()
6364

6465
list(JOIN TARGETS_INCLUDES "\n" TARGETS_INCLUDES)
@@ -110,4 +111,17 @@ list(APPEND TARGETS_DEFS "#undef AMD_DEVICE_LIBS_FUNCTION")
110111
list(JOIN TARGETS_DEFS "\n" TARGETS_DEFS)
111112
file(GENERATE OUTPUT ${GEN_LIBRARY_DEFS_INC_FILE} CONTENT "${TARGETS_DEFS}")
112113

114+
# compute the sha256 of the device libraries to detect changes and pass them to comgr (used by the cache)
115+
find_package(Python3 REQUIRED Interpreter)
116+
set(DEVICE_LIBS_ID_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/device-libs-id.py")
117+
set(DEVICE_LIBS_ID_HEADER ${INC_DIR}/libraries_sha.inc)
118+
add_custom_command(OUTPUT ${DEVICE_LIBS_ID_HEADER}
119+
COMMAND ${Python3_EXECUTABLE} ${DEVICE_LIBS_ID_SCRIPT} --varname DEVICE_LIBS_ID --output ${DEVICE_LIBS_ID_HEADER} ${TARGETS_HEADERS}
120+
DEPENDS ${DEVICE_LIBS_ID_SCRIPT} ${TARGETS_HEADERS}
121+
COMMENT "Generating ${INC_DIR}/libraries_sha.inc"
122+
)
123+
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${INC_DIR}/libraries_sha.inc)
124+
add_custom_target(libraries_sha_header DEPENDS ${INC_DIR}/libraries_sha.inc)
125+
add_dependencies(amd_comgr libraries_sha_header)
126+
113127
include_directories(${INC_DIR})

amd/comgr/cmake/device-libs-id.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from argparse import ArgumentParser
2+
from hashlib import sha256
3+
from functools import reduce
4+
5+
if __name__ == "__main__":
6+
parser = ArgumentParser(description='Generate id by computing a hash of the generated headers')
7+
parser.add_argument("headers", nargs='+', help='List of headers to generate id from')
8+
parser.add_argument("--varname", help='Name of the variable to generate', required=True)
9+
parser.add_argument("--output", help='Name of the header to generate', required=True)
10+
11+
args = parser.parse_args()
12+
args.headers.sort()
13+
14+
hash = sha256()
15+
for x in args.headers:
16+
hash.update(open(x, 'rb').read())
17+
digest_uchar = hash.digest()
18+
digest_char = [e if e < 128 else e-256 for e in digest_uchar]
19+
digest_elts = ", ".join(map(str, digest_char))
20+
print(f"static const char {args.varname}[] = {{{digest_elts}, 0}};", file=open(args.output, 'w'))

amd/comgr/cmake/opencl_pch.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,7 @@ endfunction()
5050

5151
generate_pch(1.2)
5252
generate_pch(2.0)
53+
54+
# hash the opencl header and pass the result to comgr compilation
55+
file(SHA256 ${OPENCL_C_H} OPENCL_C_SHA)
56+
list(APPEND AMD_COMGR_PRIVATE_COMPILE_DEFINITIONS "OPENCL_C_SHA=${OPENCL_C_SHA}")

amd/comgr/src/comgr-cache-command.cpp

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
#include "comgr-cache-command.h"
2+
#include "comgr-cache.h"
3+
#include "comgr-device-libs.h"
4+
#include "comgr-env.h"
5+
#include "comgr.h"
6+
7+
#include <clang/Basic/Version.h>
8+
#include <clang/Driver/Job.h>
9+
#include <llvm/ADT/StringExtras.h>
10+
#include <llvm/ADT/StringSet.h>
11+
12+
#include <optional>
13+
14+
namespace COMGR {
15+
using namespace llvm;
16+
using namespace clang;
17+
18+
namespace {
19+
// std::isalnum is locale dependent and can have issues
20+
// depending on the stdlib version and application. We prefer to avoid it
21+
bool isalnum(char c) {
22+
char low[] = {'0', 'a', 'A'};
23+
char hi[] = {'9', 'z', 'Z'};
24+
for (unsigned i = 0; i != 3; ++i) {
25+
if (low[i] <= c && c <= hi[i])
26+
return true;
27+
}
28+
return false;
29+
}
30+
31+
std::optional<size_t> searchComgrTmpModel(StringRef S) {
32+
// Ideally, we would use std::regex_search with the regex
33+
// "comgr-[[:alnum:]]{6}". However, due to a bug in stdlibc++
34+
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85824) we have to roll our
35+
// own search of this regular expression. This bug resulted in a crash in
36+
// luxmarkv3, during the std::regex constructor.
37+
const StringRef Prefix = "comgr-";
38+
const size_t AlnumCount = 6;
39+
40+
size_t N = S.size();
41+
size_t Pos = S.find(Prefix);
42+
43+
size_t AlnumStart = Pos + Prefix.size();
44+
size_t AlnumEnd = AlnumStart + AlnumCount;
45+
if (Pos == StringRef::npos || N < AlnumEnd)
46+
return std::nullopt;
47+
48+
for (size_t i = AlnumStart; i < AlnumEnd; ++i) {
49+
if (!isalnum(S[i]))
50+
return std::nullopt;
51+
}
52+
53+
return Pos;
54+
}
55+
56+
bool hasDebugOrProfileInfo(ArrayRef<const char *> Args) {
57+
// These are too difficult to handle since they generate debug info that
58+
// refers to the temporary paths used by comgr.
59+
const StringRef Flags[] = {"-fdebug-info-kind", "-fprofile", "-coverage",
60+
"-ftime-trace"};
61+
62+
for (StringRef Arg : Args) {
63+
for (StringRef Flag : Flags) {
64+
if (Arg.starts_with(Flag))
65+
return true;
66+
}
67+
}
68+
return false;
69+
}
70+
71+
void addString(CachedCommandAdaptor::HashAlgorithm &H, StringRef S) {
72+
// hash size + contents to avoid collisions
73+
// for example, we have to ensure that the result of hashing "AA" "BB" is
74+
// different from "A" "ABB"
75+
H.update(S.size());
76+
H.update(S);
77+
}
78+
79+
Error addFile(CachedCommandAdaptor::HashAlgorithm &H, StringRef Path) {
80+
auto BufOrError = MemoryBuffer::getFile(Path);
81+
if (std::error_code EC = BufOrError.getError()) {
82+
return errorCodeToError(EC);
83+
}
84+
StringRef Buf = BufOrError.get()->getBuffer();
85+
86+
CachedCommandAdaptor::addFileContents(H, Buf);
87+
88+
return Error::success();
89+
}
90+
91+
template <typename IteratorTy>
92+
bool skipProblematicFlag(IteratorTy &It, const IteratorTy &End) {
93+
// Skip include paths, these should have been handled by preprocessing the
94+
// source first. Sadly, these are passed also to the middle-end commands. Skip
95+
// debug related flags (they should be ignored) like -dumpdir (used for
96+
// profiling/coverage/split-dwarf)
97+
StringRef Arg = *It;
98+
static const StringSet<> FlagsWithPathArg = {"-I", "-dumpdir"};
99+
bool IsFlagWithPathArg = It + 1 != End && FlagsWithPathArg.contains(Arg);
100+
if (IsFlagWithPathArg) {
101+
++It;
102+
return true;
103+
}
104+
105+
// Clang always appends the debug compilation dir,
106+
// even without debug info (in comgr it matches the current directory). We
107+
// only consider it if the user specified debug information
108+
bool IsFlagWithSingleArg = Arg.starts_with("-fdebug-compilation-dir=");
109+
if (IsFlagWithSingleArg) {
110+
return true;
111+
}
112+
113+
return false;
114+
}
115+
116+
SmallVector<StringRef, 1> getInputFiles(driver::Command &Command) {
117+
const auto &CommandInputs = Command.getInputInfos();
118+
119+
SmallVector<StringRef, 1> Paths;
120+
Paths.reserve(CommandInputs.size());
121+
122+
for (const auto &II : CommandInputs) {
123+
if (!II.isFilename())
124+
continue;
125+
Paths.push_back(II.getFilename());
126+
}
127+
128+
return Paths;
129+
}
130+
131+
bool isSourceCodeInput(const driver::InputInfo &II) {
132+
return driver::types::isSrcFile(II.getType());
133+
}
134+
} // namespace
135+
136+
void CachedCommandAdaptor::addFileContents(
137+
CachedCommandAdaptor::HashAlgorithm &H, StringRef Buf) {
138+
// this is a workaround temporary paths getting in the output files of the
139+
// different commands in #line directives in preprocessed files, and the
140+
// ModuleID or source_filename in the bitcode.
141+
while (!Buf.empty()) {
142+
std::optional<size_t> ComgrTmpPos = searchComgrTmpModel(Buf);
143+
if (!ComgrTmpPos) {
144+
addString(H, Buf);
145+
break;
146+
}
147+
148+
StringRef ToHash = Buf.substr(0, *ComgrTmpPos);
149+
addString(H, ToHash);
150+
Buf = Buf.substr(ToHash.size() + StringRef("comgr-xxxxxx").size());
151+
}
152+
}
153+
154+
Expected<CachedCommandAdaptor::Identifier>
155+
CachedCommandAdaptor::getIdentifier() const {
156+
CachedCommandAdaptor::HashAlgorithm H;
157+
H.update(getClass());
158+
H.update(env::shouldEmitVerboseLogs());
159+
addString(H, getClangFullVersion());
160+
addString(H, getComgrHashIdentifier());
161+
addString(H, getDeviceLibrariesIdentifier());
162+
163+
if (Error E = addInputIdentifier(H))
164+
return E;
165+
166+
addOptionsIdentifier(H);
167+
168+
CachedCommandAdaptor::Identifier Id;
169+
toHex(H.final(), true, Id);
170+
return Id;
171+
}
172+
173+
CachedCommand::CachedCommand(driver::Command &Command,
174+
DiagnosticOptions &DiagOpts,
175+
ExecuteFnTy &&ExecuteImpl)
176+
: Command(Command), DiagOpts(DiagOpts),
177+
ExecuteImpl(std::move(ExecuteImpl)) {}
178+
179+
Error CachedCommand::addInputIdentifier(HashAlgorithm &H) const {
180+
auto Inputs(getInputFiles(Command));
181+
for (StringRef Input : Inputs) {
182+
if (Error E = addFile(H, Input)) {
183+
// call Error's constructor again to silence copy elision warning
184+
return Error(std::move(E));
185+
}
186+
}
187+
return Error::success();
188+
}
189+
190+
void CachedCommand::addOptionsIdentifier(HashAlgorithm &H) const {
191+
auto Inputs(getInputFiles(Command));
192+
StringRef Output = Command.getOutputFilenames().front();
193+
ArrayRef<const char *> Arguments = Command.getArguments();
194+
for (auto It = Arguments.begin(), End = Arguments.end(); It != End; ++It) {
195+
if (skipProblematicFlag(It, End))
196+
continue;
197+
198+
StringRef Arg = *It;
199+
static const StringSet<> FlagsWithFileArgEmbededInComgr = {
200+
"-include-pch", "-mlink-builtin-bitcode"};
201+
if (FlagsWithFileArgEmbededInComgr.contains(Arg)) {
202+
// The next argument is a path to a "secondary" input-file (pre-compiled
203+
// header or device-libs builtin)
204+
// These two files kinds of files are embedded in comgr at compile time,
205+
// and in normally their remain constant with comgr's build. The user is
206+
// not able to change them.
207+
++It;
208+
if (It == End)
209+
break;
210+
continue;
211+
}
212+
213+
// input files are considered by their content
214+
// output files should not be considered at all
215+
bool IsIOFile = Output == Arg || is_contained(Inputs, Arg);
216+
if (IsIOFile)
217+
continue;
218+
219+
#ifndef NDEBUG
220+
bool IsComgrTmpPath = searchComgrTmpModel(Arg).has_value();
221+
// On debug builds, fail on /tmp/comgr-xxxx/... paths.
222+
// Implicit dependencies should have been considered before.
223+
// On release builds, add them to the hash to force a cache miss.
224+
assert(!IsComgrTmpPath &&
225+
"Unexpected flag and path to comgr temporary directory");
226+
#endif
227+
228+
addString(H, Arg);
229+
}
230+
}
231+
232+
CachedCommand::ActionClass CachedCommand::getClass() const {
233+
return Command.getSource().getKind();
234+
}
235+
236+
bool CachedCommand::canCache() const {
237+
bool HasOneOutput = Command.getOutputFilenames().size() == 1;
238+
bool IsPreprocessorCommand = getClass() == driver::Action::PreprocessJobClass;
239+
240+
// This reduces the applicability of the cache, but it helps us deliver
241+
// something now and deal with the PCH issues later. The cache would still
242+
// help for spirv compilation (e.g. bitcode->asm) and for intermediate
243+
// compilation steps
244+
bool HasSourceCodeInput = any_of(Command.getInputInfos(), isSourceCodeInput);
245+
246+
return HasOneOutput && !IsPreprocessorCommand && !HasSourceCodeInput &&
247+
!hasDebugOrProfileInfo(Command.getArguments());
248+
}
249+
250+
Error CachedCommand::writeExecuteOutput(StringRef CachedBuffer) {
251+
StringRef OutputFilename = Command.getOutputFilenames().front();
252+
std::error_code EC;
253+
raw_fd_ostream Out(OutputFilename, EC);
254+
if (EC) {
255+
Error E = createStringError(EC, Twine("Failed to open ") + OutputFilename +
256+
" : " + EC.message() + "\n");
257+
return E;
258+
}
259+
260+
Out.write(CachedBuffer.data(), CachedBuffer.size());
261+
Out.close();
262+
if (Out.has_error()) {
263+
Error E = createStringError(EC, Twine("Failed to write ") + OutputFilename +
264+
" : " + EC.message() + "\n");
265+
return E;
266+
}
267+
268+
return Error::success();
269+
}
270+
271+
Expected<StringRef> CachedCommand::readExecuteOutput() {
272+
StringRef OutputFilename = Command.getOutputFilenames().front();
273+
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
274+
MemoryBuffer::getFile(OutputFilename);
275+
if (!MBOrErr) {
276+
std::error_code EC = MBOrErr.getError();
277+
return createStringError(EC, Twine("Failed to open ") + OutputFilename +
278+
" : " + EC.message() + "\n");
279+
}
280+
Output = std::move(*MBOrErr);
281+
return Output->getBuffer();
282+
}
283+
284+
amd_comgr_status_t CachedCommand::execute(llvm::raw_ostream &LogS) {
285+
return ExecuteImpl(Command, LogS, DiagOpts);
286+
}
287+
} // namespace COMGR

0 commit comments

Comments
 (0)