Skip to content

Commit 9848e64

Browse files
committed
Add amalgamation script
Sources are #included instead of inlined by default.
1 parent 56c74db commit 9848e64

File tree

8 files changed

+5516
-2
lines changed

8 files changed

+5516
-2
lines changed

Makefile

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,27 @@ ifneq (Cygwin,$(UNAME))
172172
endif
173173
endif
174174

175+
AMALGAM ?= 0
176+
AMALGAM_INLINE ?= false
177+
AMALGAM_SOURCE_INCLUDE := build/libsass-amalgam-include.cpp
178+
AMALGAM_SOURCE_INLINE := build/libsass-amalgam-inline.cpp
179+
180+
ifeq (true,$(AMALGAM_INLINE))
181+
AMALGAM_SOURCE := $(AMALGAM_SOURCE_INLINE)
182+
else
183+
AMALGAM_SOURCE := $(AMALGAM_SOURCE_INCLUDE)
184+
endif
185+
186+
AMALGAM_SOURCE_DIR := $(abspath $(dir $(AMALGAM_SOURCE)))
187+
175188
include Makefile.conf
176-
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
177-
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
189+
ifeq (1,$(AMALGAM))
190+
OBJECTS = $(AMALGAM_SOURCE:.cpp=.o)
191+
COBJECTS =
192+
else
193+
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
194+
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
195+
endif
178196
RCOBJECTS = $(RESOURCES:.rc=.o)
179197

180198
DEBUG_LVL ?= NONE
@@ -184,6 +202,7 @@ CLEANUPS += $(RCOBJECTS)
184202
CLEANUPS += $(COBJECTS)
185203
CLEANUPS += $(OBJECTS)
186204
CLEANUPS += $(LIBSASS_LIB)
205+
CLEANUPS += $(AMALGAM_SOURCE_INCLUDE) $(AMALGAM_SOURCE_INLINE)
187206

188207
all: $(BUILD)
189208

@@ -199,6 +218,22 @@ debug-shared: CFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$
199218
debug-shared: CXXFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$(CXXFLAGS))
200219
debug-shared: shared
201220

221+
AMALGAMATE_BIN := script/amalgamate/build/amalgamate
222+
223+
$(AMALGAMATE_BIN): script/amalgamate/amalgamate.cpp
224+
$(MAKE) -C script/amalgamate build/amalgamate
225+
226+
$(AMALGAM_SOURCE_DIR):
227+
$(MKDIR) $(AMALGAM_SOURCE_DIR)
228+
229+
$(AMALGAM_SOURCE): $(AMALGAMATE_BIN) $(addprefix src/,$(SOURCES)) $(addprefix src/,$(CSOURCES)) | $(AMALGAM_SOURCE_DIR)
230+
$(AMALGAMATE_BIN) --out=$(AMALGAM_SOURCE) --inline=$(AMALGAM_INLINE)
231+
232+
ifneq (true,$(AMALGAM_INLINE))
233+
$(AMALGAM_SOURCE:.cpp=.o): $(AMALGAM_SOURCE)
234+
$(CXX) $(CXXFLAGS) -I src -c -o $@ $<
235+
endif
236+
202237
lib:
203238
$(MKDIR) lib
204239

script/amalgamate/.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
BasedOnStyle: Google

script/amalgamate/Makefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CXX ?= c++
2+
3+
CXXFLAGS := -std=c++11
4+
CXXFLAGS_OPT := $(CXXFLAGS) -O2 -s
5+
CXXFLAGS_DBG := $(CXXFLAGS) -fsanitize=address -g -O1 -fno-omit-frame-pointer
6+
CXXFLAGS_FASTBUILD := $(CXXFLAGS) -O0 -s
7+
8+
build/amalgamate: amalgamate.cpp | build
9+
$(CXX) $(CXXFLAGS_FASTBUILD) -o build/amalgamate amalgamate.cpp
10+
11+
build:
12+
@mkdir build
13+
14+
clean: | build
15+
rm -rf build
16+
17+
.PHONY: amalgamate clean

script/amalgamate/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# LibSass amalgamation script
2+
3+
This script concatenates LibSass sources into a single file.
4+
5+
This reduces single-core compilation time by 50% and the output shared library size by 10%.
6+
7+
SQLite has a great writeup on amalgamation here:
8+
<https://www.sqlite.org/amalgamation.html>.
9+
10+
With amalgamation:
11+
12+
~~~bash
13+
rm -f script/amalgamate/build/amalgamate && make clean AMALGAM=1 && \
14+
time make lib/libsass.so AMALGAM=1 && du -sh lib/libsass.so
15+
~~~
16+
17+
Compilation time (1 core): 30s
18+
`lib/libsass.so` size: 3.0M
19+
20+
Without amalgamation:
21+
22+
~~~bash
23+
make clean AMALGAM=0 && time make -j`nproc` lib/libsass.so AMALGAM=0 && du -sh lib/libsass.so
24+
~~~
25+
26+
Compilation time (1 core): 60s
27+
Compilation time (8 cores): 16s
28+
`lib/libsass.so` size: 3.3M

script/amalgamate/amalgamate.cpp

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
#include <algorithm>
2+
#include <cstddef>
3+
#include <fstream>
4+
#include <iostream>
5+
#include <regex>
6+
#include <unordered_map>
7+
#include <unordered_set>
8+
9+
#include "filesystem-polyfill/ghc/filesystem.hpp"
10+
11+
// #define DEBUG 1
12+
13+
namespace {
14+
15+
namespace fs = ghc::filesystem;
16+
17+
std::string ReadFile(const fs::path &path) {
18+
std::ifstream ifs(path, std::ios::binary);
19+
return std::string(std::istreambuf_iterator<char>(ifs), {});
20+
}
21+
22+
struct IncludeStatement {
23+
std::string relpath;
24+
std::size_t line_begin;
25+
std::size_t line_end;
26+
};
27+
28+
struct FileData {
29+
std::string contents;
30+
std::vector<IncludeStatement> includes;
31+
};
32+
33+
class Amalgamator {
34+
public:
35+
Amalgamator(std::vector<fs::path> src_dirs, std::vector<std::string> exts,
36+
std::unordered_set<std::string> exclude, bool inline_sources)
37+
: src_dirs_(std::move(src_dirs)),
38+
exts_(exts),
39+
exclude_(exclude),
40+
inline_sources_(inline_sources) {}
41+
42+
void Amalgamate(std::ostream &out) {
43+
LoadFiles();
44+
45+
#ifdef DEBUG
46+
const auto log_strings = [](const char *name,
47+
const std::vector<std::string> &xs) {
48+
std::cerr << name << " (" << xs.size() << "):";
49+
for (const std::string &x : xs) std::cerr << " " << x;
50+
std::cerr << std::endl;
51+
};
52+
53+
log_strings("Files", files_);
54+
#endif
55+
56+
if (files_.empty()) {
57+
throw std::runtime_error("Could not find any files to amalgamate");
58+
}
59+
60+
for (auto &it : files_data_) {
61+
AnalyzeIncludes(it.first, &it.second);
62+
}
63+
64+
if (inline_sources_) {
65+
std::unordered_set<std::string> written;
66+
for (const auto &file : files_) {
67+
WriteReplaceIncludes(out, file, /*parent=*/"", &written);
68+
}
69+
} else {
70+
const auto write_include = [&out](const std::string &file) {
71+
72+
};
73+
74+
std::unordered_set<std::string> included;
75+
for (const auto &file : files_) {
76+
// Reduce the number of #include statements.
77+
if (included.find(file) != included.end()) continue;
78+
out << "#include \"" << file << "\"" << std::endl;
79+
included.insert(file);
80+
for (const auto &incl : files_data_[file].includes) {
81+
// Consider direct includes as included but not transitive ones,
82+
// as there may be cycle.
83+
included.insert(incl.relpath);
84+
}
85+
}
86+
}
87+
out.flush();
88+
}
89+
90+
private:
91+
void WriteReplaceIncludes(std::ostream &out, const std::string relpath,
92+
const std::string &parent,
93+
std::unordered_set<std::string> *written) {
94+
if (written->find(relpath) != written->end()) return;
95+
written->insert(relpath);
96+
out << "/* AMALGAM: " << relpath;
97+
if (!parent.empty()) out << " included from " << parent;
98+
out << " */ \n";
99+
const auto &data = files_data_.at(relpath);
100+
std::size_t prev = 0;
101+
for (const auto &incl : data.includes) {
102+
out.write(data.contents.data() + prev, incl.line_begin - prev);
103+
WriteReplaceIncludes(out, incl.relpath, relpath, written);
104+
prev = incl.line_end;
105+
}
106+
out.write(data.contents.data() + prev, data.contents.size() - prev);
107+
if (data.contents.empty() || data.contents.back() != '\n') {
108+
out.write("\n", 1);
109+
}
110+
}
111+
112+
void LoadFiles() {
113+
for (const auto &src_dir : src_dirs_) {
114+
for (const auto &entry : fs::recursive_directory_iterator(src_dir)) {
115+
if (!entry.is_regular_file()) continue;
116+
const auto ext = entry.path().extension();
117+
if (std::find(exts_.begin(), exts_.end(), ext) == exts_.end()) continue;
118+
119+
std::string relpath =
120+
fs::relative(entry.path(), src_dir).generic_u8string();
121+
if (exclude_.find(relpath) != exclude_.end()) continue;
122+
123+
files_.emplace_back(relpath);
124+
files_data_[std::move(relpath)] = {ReadFile(entry.path())};
125+
}
126+
}
127+
std::sort(files_.begin(), files_.end());
128+
}
129+
130+
void AnalyzeIncludes(const std::string &relpath, FileData *data) {
131+
static const auto *const kIncludeRegex =
132+
new std::regex(R"([ \t]*#include (<[^"\n>]*>|"[^"\n]*")[^\n]*\n)",
133+
std::regex::optimize);
134+
const char *const s = data->contents.c_str();
135+
std::size_t pos = 0;
136+
std::unordered_set<std::string> found_includes;
137+
for (std::cmatch m; std::regex_search(s + pos, m, *kIncludeRegex);
138+
pos += m.position() + m.length()) {
139+
// Match only beginning of line:
140+
if (!(pos + m.position() == 0 || s[pos + m.position() - 1] == '\n')) {
141+
continue;
142+
}
143+
if (m[1].length() < 3) {
144+
throw std::runtime_error("Invalid include: " + m.str());
145+
}
146+
std::string include(m[1].first + 1, m[1].length() - 2);
147+
148+
auto found = ResolveInclude(relpath, include);
149+
if (found.empty()) continue;
150+
if (found == relpath) {
151+
std::cerr << "WARNING: Self-include in " << relpath << std::endl;
152+
continue;
153+
}
154+
if (found_includes.find(found) != found_includes.end()) {
155+
std::cerr << "WARNING: Duplicate #include of " << found << " in "
156+
<< relpath << std::endl;
157+
}
158+
found_includes.insert(found);
159+
data->includes.push_back({std::move(found), pos + m.position(),
160+
pos + m.position() + m.length()});
161+
}
162+
163+
#ifdef DEBUG
164+
if (data->includes.empty()) return;
165+
std::cerr << "Includes for " << relpath << ":";
166+
for (const auto &x : data->includes)
167+
std::cerr << " " << x.relpath << " (" << x.line_begin << "," << x.line_end
168+
<< ")";
169+
std::cerr << std::endl;
170+
#endif
171+
}
172+
173+
std::string ResolveInclude(std::string from_relpath, std::string include) {
174+
std::string resolved;
175+
if (include[0] == '.' || files_data_.find(resolved) == files_data_.end()) {
176+
resolved = fs::path(from_relpath, fs::path::generic_format)
177+
.remove_filename()
178+
.append(include)
179+
.lexically_normal()
180+
.generic_u8string();
181+
} else {
182+
resolved = include;
183+
}
184+
if (files_data_.find(resolved) == files_data_.end()) resolved.clear();
185+
#ifdef DEBUG
186+
if (!resolved.empty() && resolved != include) {
187+
std::cerr << " Resolved " << include << " to " << resolved << " in "
188+
<< from_relpath << " " << std::endl;
189+
}
190+
#endif
191+
return resolved;
192+
}
193+
194+
std::vector<fs::path> src_dirs_;
195+
std::vector<std::string> exts_;
196+
std::unordered_set<std::string> exclude_;
197+
bool inline_sources_;
198+
199+
std::vector<std::string> files_;
200+
std::unordered_map<std::string, FileData> files_data_;
201+
};
202+
203+
std::vector<std::string> StrSplit(const std::string &str, char sep) {
204+
std::vector<std::string> result;
205+
std::size_t end = 0;
206+
std::size_t pos = 0;
207+
while ((pos = str.find(sep, pos)) != std::string::npos) {
208+
result.push_back(str.substr(end, pos - end));
209+
++pos;
210+
end = pos;
211+
}
212+
result.push_back(str.substr(end, str.size() - end));
213+
return result;
214+
}
215+
216+
bool StartsWith(const std::string &str, const std::string &prefix,
217+
std::size_t pos = 0) {
218+
if (pos + prefix.size() > str.size()) return false;
219+
for (std::size_t i = 0; i < prefix.size(); ++i) {
220+
if (prefix[i] != str[i + pos]) return false;
221+
}
222+
return true;
223+
}
224+
225+
bool ParseFlag(const std::string &arg, const std::string &name,
226+
std::unordered_map<std::string, std::string> *flags) {
227+
if (!StartsWith(arg, name, 2)) return false;
228+
if (arg[name.size() + 2] != '=') {
229+
throw std::runtime_error("Invalid argument: " + arg + arg[name.size() + 2]);
230+
}
231+
flags->emplace(name, arg.substr(name.size() + 3));
232+
return true;
233+
}
234+
235+
} // namespace
236+
237+
int main(int argc, char *argv[]) {
238+
fs::path root_dir;
239+
std::unordered_map<std::string, std::string> flags;
240+
static const auto *const kFlags =
241+
new std::vector<std::string>{"root", "exts", "out", "exclude", "inline"};
242+
for (int i = 1; i < argc; ++i) {
243+
const std::string &arg = argv[i];
244+
if (!StartsWith(arg, "--")) {
245+
throw std::runtime_error("Invalid argument (must start with --): " + arg);
246+
}
247+
for (const std::string &name : *kFlags) {
248+
if (ParseFlag(arg, name, &flags)) break;
249+
}
250+
}
251+
252+
const auto &root_flag = flags.find("root");
253+
if (root_flag != flags.end() && !root_flag->second.empty()) {
254+
root_dir = root_flag->second;
255+
} else {
256+
root_dir = fs::current_path();
257+
}
258+
259+
std::vector<std::string> exts;
260+
const auto &exts_flag = flags.find("exts");
261+
if (exts_flag != flags.end() && !exts_flag->second.empty()) {
262+
exts = StrSplit(exts_flag->second, ',');
263+
} else {
264+
exts = {".h", ".c", ".hpp", ".cpp"};
265+
}
266+
267+
std::unordered_set<std::string> exclude;
268+
const auto &exclude_flag = flags.find("exclude");
269+
if (exclude_flag != flags.end()) {
270+
const auto vec = StrSplit(exclude_flag->second, ',');
271+
exclude = {vec.begin(), vec.end()};
272+
}
273+
274+
const auto &inline_flag = flags.find("inline");
275+
const bool inline_sources =
276+
!(inline_flag != flags.end() && inline_flag->second == "false");
277+
278+
Amalgamator amalgamator({root_dir.append("src")}, std::move(exts),
279+
std::move(exclude), inline_sources);
280+
const auto &out_flag = flags.find("out");
281+
if (out_flag != flags.end() && !out_flag->second.empty()) {
282+
std::ofstream out(out_flag->second);
283+
amalgamator.Amalgamate(out);
284+
} else {
285+
amalgamator.Amalgamate(std::cout);
286+
}
287+
288+
return 0;
289+
}

0 commit comments

Comments
 (0)