Skip to content

Commit b9b4080

Browse files
committed
Add amalgamation script
1 parent baba790 commit b9b4080

File tree

8 files changed

+5492
-2
lines changed

8 files changed

+5492
-2
lines changed

Makefile

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,16 @@ ifneq (Cygwin,$(UNAME))
172172
endif
173173
endif
174174

175+
AMALGAM ?= 0
176+
AMALGAM_SOURCE := build/libsass-amalgam.cpp
175177
include Makefile.conf
176-
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
177-
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
178+
ifeq (1,$(AMALGAM))
179+
OBJECTS = $(AMALGAM_SOURCE:.cpp=.o)
180+
COBJECTS =
181+
else
182+
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
183+
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
184+
endif
178185
RCOBJECTS = $(RESOURCES:.rc=.o)
179186

180187
DEBUG_LVL ?= NONE
@@ -184,6 +191,7 @@ CLEANUPS += $(RCOBJECTS)
184191
CLEANUPS += $(COBJECTS)
185192
CLEANUPS += $(OBJECTS)
186193
CLEANUPS += $(LIBSASS_LIB)
194+
CLEANUPS += $(AMALGAM_SOURCE)
187195

188196
all: $(BUILD)
189197

@@ -199,6 +207,15 @@ debug-shared: CFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$
199207
debug-shared: CXXFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$(CXXFLAGS))
200208
debug-shared: shared
201209

210+
space := $(null) #
211+
comma := ,
212+
213+
script/amalgamate/build/amalgamate: script/amalgamate/amalgamate.cpp
214+
$(MAKE) -C script/amalgamate build/amalgamate
215+
216+
$(AMALGAM_SOURCE): $(addprefix src/,$(SOURCES)) $(addprefix src/,$(CSOURCES)) script/amalgamate/build/amalgamate
217+
$(MAKE) -C script/amalgamate OUT=../../$(AMALGAM_SOURCE)
218+
202219
lib:
203220
$(MKDIR) lib
204221

script/amalgamate/.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
BasedOnStyle: Google

script/amalgamate/Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CXX ?= c++
2+
3+
CXXFLAGS := -std=c++11
4+
CXXFLAGS_OPT := $(CXXFLAGS) -O2 -s
5+
CXXFLAGS_DBG := $(CXXFLAGS) -fsanitize=address -g -O1 -fno-omit-frame-pointer
6+
CXXFLAGS_FASTBUILD := $(CXXFLAGS) -O0 -s
7+
8+
EXTS ?=
9+
EXCLUDE ?=
10+
OUT ?=
11+
12+
amalgamate: build/amalgamate
13+
build/amalgamate --root=../../ --exts='$(EXTS)' --exclude='$(EXCLUDE)' --out=$(OUT)
14+
15+
build:
16+
@mkdir build
17+
18+
build/amalgamate: amalgamate.cpp | build
19+
$(CXX) $(CXXFLAGS_FASTBUILD) -o build/amalgamate amalgamate.cpp
20+
21+
clean: | build
22+
rm -rf build
23+
24+
.PHONY: amalgamate clean

script/amalgamate/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# LibSass amalgamation script
2+
3+
This script concatenates LibSass sources into a single file.
4+
5+
This reduces single-core compilation time by 50% and the output shared library size by 10%.
6+
7+
SQLite has a great writeup on amalgamation here:
8+
<https://www.sqlite.org/amalgamation.html>.
9+
10+
With amalgamation:
11+
12+
~~~bash
13+
rm -f script/amalgamate/build/amalgamate && make clean AMALGAM=1 && \
14+
time make lib/libsass.so AMALGAM=1 && du -sh lib/libsass.so
15+
~~~
16+
17+
Compilation time (1 core): 30s
18+
`lib/libsass.so` size: 3.0M
19+
20+
Without amalgamation:
21+
22+
~~~bash
23+
make clean AMALGAM=0 && time make -j`nproc` lib/libsass.so AMALGAM=0 && du -sh lib/libsass.so
24+
~~~
25+
26+
Compilation time (1 core): 60s
27+
Compilation time (8 cores): 16s
28+
`lib/libsass.so` size: 3.3M

script/amalgamate/amalgamate.cpp

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
#include <algorithm>
2+
#include <cstdint>
3+
#include <cstring>
4+
#include <fstream>
5+
#include <iostream>
6+
#include <regex>
7+
#include <unordered_map>
8+
#include <unordered_set>
9+
10+
#include "filesystem-polyfill/ghc/filesystem.hpp"
11+
12+
// #define DEBUG 0
13+
14+
namespace {
15+
16+
namespace fs = ghc::filesystem;
17+
18+
std::string ReadFile(const fs::path &path) {
19+
std::ifstream ifs(path, std::ios::binary);
20+
return std::string(std::istreambuf_iterator<char>(ifs), {});
21+
}
22+
23+
struct IncludeStatement {
24+
std::string relpath;
25+
std::size_t line_begin;
26+
std::size_t line_end;
27+
bool duplicate;
28+
};
29+
30+
struct FileData {
31+
std::string contents;
32+
std::vector<IncludeStatement> includes;
33+
};
34+
35+
class Amalgamator {
36+
public:
37+
Amalgamator(fs::path root_dir, fs::path src_dir,
38+
std::vector<std::string> exts,
39+
std::unordered_set<std::string> exclude, std::ostream &out)
40+
: root_dir_(std::move(root_dir)),
41+
src_dir_(src_dir),
42+
exts_(exts),
43+
exclude_(exclude),
44+
out_(out) {}
45+
46+
void Amalgamate() {
47+
LoadFiles();
48+
49+
#ifdef DEBUG
50+
const auto log_strings = [](const char *name,
51+
const std::vector<std::string> &xs) {
52+
std::cerr << name << " (" << xs.size() << "):";
53+
for (const std::string &x : xs) std::cerr << " " << x;
54+
std::cerr << std::endl;
55+
};
56+
57+
log_strings("Files", files_);
58+
#endif
59+
60+
for (auto &it : files_data_) {
61+
AnalyzeIncludes(it.first, &it.second);
62+
}
63+
64+
#ifdef DEBUG
65+
log_strings("Sorted files", files_);
66+
#endif
67+
68+
std::unordered_set<std::string> written;
69+
for (const auto &file : files_) {
70+
WriteReplaceIncludes(out_, file, /*parent=*/"", &written);
71+
}
72+
out_.flush();
73+
}
74+
75+
private:
76+
void WriteReplaceIncludes(std::ostream &out, const std::string relpath,
77+
const std::string &parent,
78+
std::unordered_set<std::string> *written) {
79+
if (written->find(relpath) != written->end()) return;
80+
written->insert(relpath);
81+
out << "/* AMALGAM: " << relpath;
82+
if (!parent.empty()) out << " included from " << parent;
83+
out << " */ \n";
84+
const auto &data = files_data_.at(relpath);
85+
std::size_t prev = 0;
86+
for (const auto &incl : data.includes) {
87+
out.write(data.contents.data() + prev, incl.line_begin - prev);
88+
WriteReplaceIncludes(out, incl.relpath, relpath, written);
89+
prev = incl.line_end;
90+
}
91+
out.write(data.contents.data() + prev, data.contents.size() - prev);
92+
if (data.contents.empty() || data.contents.back() != '\n') {
93+
out.write("\n", 1);
94+
}
95+
}
96+
97+
void LoadFiles() {
98+
for (const auto &entry : fs::recursive_directory_iterator(src_dir_)) {
99+
if (!entry.is_regular_file()) continue;
100+
const auto ext = entry.path().extension();
101+
if (std::find(exts_.begin(), exts_.end(), ext) == exts_.end()) continue;
102+
103+
std::string relpath = PathToSrcRelativeUnix(entry.path());
104+
if (exclude_.find(relpath) != exclude_.end()) continue;
105+
106+
files_.emplace_back(relpath);
107+
files_data_[std::move(relpath)] = {ReadFile(entry.path())};
108+
}
109+
std::sort(files_.begin(), files_.end());
110+
}
111+
112+
void AnalyzeIncludes(const std::string &relpath, FileData *data) {
113+
static const auto *const kIncludeRegex = new std::regex(
114+
R"([ \t]*#include (<[^"\n>]*>|"[^"\n]*")[^\n]*\n)",
115+
std::regex::optimize);
116+
const char *const s = data->contents.c_str();
117+
std::size_t pos = 0;
118+
std::unordered_set<std::string> found_includes;
119+
for (std::cmatch m; std::regex_search(s + pos, m, *kIncludeRegex);
120+
pos += m.position() + m.length()) {
121+
// Match only beginning of line:
122+
if (!(pos + m.position() == 0 || s[pos + m.position() - 1] == '\n')) {
123+
continue;
124+
}
125+
if (m[1].length() < 3) {
126+
throw std::runtime_error("Invalid include: " + m.str());
127+
}
128+
std::string include(m[1].first + 1, m[1].length() - 2);
129+
130+
auto found = ResolveInclude(relpath, include);
131+
if (found.empty()) continue;
132+
if (found == relpath) {
133+
std::cerr << "WARNING: Self-include in " << relpath << std::endl;
134+
continue;
135+
}
136+
const bool duplicate = found_includes.find(found) != found_includes.end();
137+
if (duplicate) {
138+
std::cerr << "WARNING: Duplicate #include of " << found << " in "
139+
<< relpath << std::endl;
140+
}
141+
found_includes.insert(found);
142+
data->includes.push_back({std::move(found), pos + m.position(),
143+
pos + m.position() + m.length(), duplicate});
144+
}
145+
146+
#ifdef DEBUG
147+
if (data->includes.empty()) return;
148+
std::cerr << "Includes for " << relpath << ":";
149+
for (const auto &x : data->includes)
150+
std::cerr << " " << x.relpath << " (" << x.line_begin << "," << x.line_end
151+
<< ")";
152+
std::cerr << std::endl;
153+
#endif
154+
}
155+
156+
std::string ResolveInclude(std::string from_relpath, std::string include) {
157+
std::string resolved;
158+
if (include[0] == '.' || files_data_.find(resolved) == files_data_.end()) {
159+
resolved = fs::path(from_relpath)
160+
.parent_path()
161+
.append(include)
162+
.lexically_normal();
163+
} else {
164+
resolved = include;
165+
}
166+
if (files_data_.find(resolved) == files_data_.end()) {
167+
resolved.clear();
168+
return resolved;
169+
}
170+
return resolved;
171+
}
172+
173+
std::string PathToSrcRelativeUnix(const fs::path &path) {
174+
auto relpath = fs::relative(path, src_dir_);
175+
if (fs::path::preferred_separator == '/') return relpath.u8string();
176+
std::string result = relpath.u8string();
177+
std::replace(result.begin(), result.end(), fs::path::preferred_separator,
178+
'/');
179+
return result;
180+
}
181+
182+
fs::path root_dir_;
183+
fs::path src_dir_;
184+
std::vector<std::string> exts_;
185+
std::unordered_set<std::string> exclude_;
186+
std::ostream &out_;
187+
188+
std::vector<std::string> files_;
189+
std::unordered_map<std::string, FileData> files_data_;
190+
};
191+
192+
std::vector<std::string> StrSplit(const std::string &str, char sep) {
193+
std::vector<std::string> result;
194+
std::size_t end = 0;
195+
std::size_t pos = 0;
196+
while ((pos = str.find(sep, pos)) != std::string::npos) {
197+
result.push_back(str.substr(end, pos - end));
198+
++pos;
199+
end = pos;
200+
}
201+
result.push_back(str.substr(end, str.size() - end));
202+
return result;
203+
}
204+
205+
bool StartsWith(const std::string &str, const std::string &prefix,
206+
std::size_t pos = 0) {
207+
if (pos + prefix.size() > str.size()) return false;
208+
for (std::size_t i = 0; i < prefix.size(); ++i) {
209+
if (prefix[i] != str[i + pos]) return false;
210+
}
211+
return true;
212+
}
213+
214+
bool ParseFlag(const std::string &arg, const std::string &name,
215+
std::unordered_map<std::string, std::string> *flags) {
216+
if (!StartsWith(arg, name, 2)) return false;
217+
if (arg[name.size() + 2] != '=') {
218+
throw std::runtime_error("Invalid argument: " + arg + arg[name.size() + 2]);
219+
}
220+
flags->emplace(name, arg.substr(name.size() + 3));
221+
return true;
222+
}
223+
224+
} // namespace
225+
226+
int main(int argc, char *argv[]) {
227+
fs::path root_dir;
228+
std::unordered_map<std::string, std::string> flags;
229+
for (int i = 1; i < argc; ++i) {
230+
const std::string &arg = argv[i];
231+
if (!StartsWith(arg, "--")) {
232+
throw std::runtime_error("Invalid argument (must start with --): " + arg);
233+
}
234+
ParseFlag(arg, "root", &flags) || ParseFlag(arg, "exts", &flags) ||
235+
ParseFlag(arg, "out", &flags) || ParseFlag(arg, "exclude", &flags);
236+
}
237+
238+
const auto &root_flag = flags.find("root");
239+
if (root_flag != flags.end() && !root_flag->second.empty()) {
240+
root_dir = root_flag->second;
241+
} else {
242+
root_dir = fs::current_path();
243+
}
244+
245+
std::vector<std::string> exts;
246+
const auto &exts_flag = flags.find("exts");
247+
if (exts_flag != flags.end() && !exts_flag->second.empty()) {
248+
exts = StrSplit(exts_flag->second, ',');
249+
} else {
250+
exts = {".h", ".c", ".hpp", ".cpp"};
251+
}
252+
253+
std::ostream *out;
254+
std::ofstream outfile;
255+
const auto &out_flag = flags.find("out");
256+
if (out_flag != flags.end() && !out_flag->second.empty()) {
257+
outfile = std::ofstream(out_flag->second);
258+
out = &outfile;
259+
} else {
260+
out = &std::cout;
261+
}
262+
263+
std::unordered_set<std::string> exclude;
264+
const auto &exclude_flag = flags.find("exclude");
265+
if (exclude_flag != flags.end()) {
266+
const auto vec = StrSplit(exclude_flag->second, ',');
267+
exclude = {vec.begin(), vec.end()};
268+
}
269+
270+
Amalgamator(root_dir, root_dir.append("src"), std::move(exts),
271+
std::move(exclude), *out)
272+
.Amalgamate();
273+
if (outfile.is_open()) outfile.close();
274+
275+
return 0;
276+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DisableFormat: true
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
C++17 `std::filesystem` polyfill for C++11 from:
2+
https://github.com/gulrak/filesystem/tree/135015f20b6641140a408d3883c9c820948be1c5
3+
4+
Author: Steffen Schümann <[email protected]>
5+
License: BSD 3-Clause

0 commit comments

Comments
 (0)