Skip to content

Commit 062692f

Browse files
committed
Add amalgamation script
1 parent baba790 commit 062692f

File tree

8 files changed

+5481
-2
lines changed

8 files changed

+5481
-2
lines changed

Makefile

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,16 @@ ifneq (Cygwin,$(UNAME))
172172
endif
173173
endif
174174

175+
AMALGAM ?= 0
176+
AMALGAM_SOURCE := build/libsass-amalgam.cpp
175177
include Makefile.conf
176-
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
177-
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
178+
ifeq (1,$(AMALGAM))
179+
OBJECTS = $(AMALGAM_SOURCE:.cpp=.o)
180+
COBJECTS =
181+
else
182+
OBJECTS = $(addprefix src/,$(SOURCES:.cpp=.o))
183+
COBJECTS = $(addprefix src/,$(CSOURCES:.c=.o))
184+
endif
178185
RCOBJECTS = $(RESOURCES:.rc=.o)
179186

180187
DEBUG_LVL ?= NONE
@@ -184,6 +191,7 @@ CLEANUPS += $(RCOBJECTS)
184191
CLEANUPS += $(COBJECTS)
185192
CLEANUPS += $(OBJECTS)
186193
CLEANUPS += $(LIBSASS_LIB)
194+
CLEANUPS += $(AMALGAM_SOURCE)
187195

188196
all: $(BUILD)
189197

@@ -199,6 +207,15 @@ debug-shared: CFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$
199207
debug-shared: CXXFLAGS := -g -DDEBUG -DDEBUG_LVL="$(DEBUG_LVL)" $(filter-out -O2,$(CXXFLAGS))
200208
debug-shared: shared
201209

210+
space := $(null) #
211+
comma := ,
212+
213+
script/amalgamate/build/amalgamate: script/amalgamate/amalgamate.cpp
214+
$(MAKE) -C script/amalgamate build/amalgamate
215+
216+
$(AMALGAM_SOURCE): $(addprefix src/,$(SOURCES)) $(addprefix src/,$(CSOURCES)) script/amalgamate/build/amalgamate
217+
$(MAKE) -C script/amalgamate OUT=../../$(AMALGAM_SOURCE)
218+
202219
lib:
203220
$(MKDIR) lib
204221

script/amalgamate/.clang-format

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
BasedOnStyle: Google

script/amalgamate/Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
CXX ?= c++
2+
3+
CXXFLAGS := -std=c++11
4+
CXXFLAGS_OPT := $(CXXFLAGS) -O2 -s
5+
CXXFLAGS_DBG := $(CXXFLAGS) -fsanitize=address -g -O1 -fno-omit-frame-pointer
6+
CXXFLAGS_FASTBUILD := $(CXXFLAGS) -O0 -s
7+
8+
EXTS ?=
9+
EXCLUDE ?=
10+
OUT ?=
11+
12+
amalgamate: build/amalgamate
13+
build/amalgamate --root=../../ --exts='$(EXTS)' --exclude='$(EXCLUDE)' --out=$(OUT)
14+
15+
build:
16+
@mkdir build
17+
18+
build/amalgamate: amalgamate.cpp | build
19+
$(CXX) $(CXXFLAGS_FASTBUILD) -o build/amalgamate amalgamate.cpp
20+
21+
clean: | build
22+
rm -rf build
23+
24+
.PHONY: amalgamate clean

script/amalgamate/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# LibSass amalgamation script
2+
3+
This script concatenates LibSass sources into a single file.
4+
5+
This reduces single-core compilation time by 50% and the output shared library size by 10%.
6+
7+
SQLite has a great writeup on amalgamation here:
8+
<https://www.sqlite.org/amalgamation.html>.
9+
10+
With amalgamation:
11+
12+
~~~bash
13+
rm -f script/amalgamate/build/amalgamate && make clean AMALGAM=1 && \
14+
time make lib/libsass.so AMALGAM=1 && du -sh lib/libsass.so
15+
~~~
16+
17+
Compilation time (1 core): 30s
18+
`lib/libsass.so` size: 3.0M
19+
20+
Without amalgamation:
21+
22+
~~~bash
23+
make clean AMALGAM=0 && time make -j`nproc` lib/libsass.so AMALGAM=0 && du -sh lib/libsass.so
24+
~~~
25+
26+
Compilation time (1 core): 60s
27+
Compilation time (8 cores): 16s
28+
`lib/libsass.so` size: 3.3M

script/amalgamate/amalgamate.cpp

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
#include <algorithm>
2+
#include <cstdint>
3+
#include <fstream>
4+
#include <iostream>
5+
#include <regex>
6+
#include <unordered_map>
7+
#include <unordered_set>
8+
9+
#include "filesystem-polyfill/ghc/filesystem.hpp"
10+
11+
// #define DEBUG 0
12+
13+
namespace {
14+
15+
namespace fs = ghc::filesystem;
16+
17+
std::string ReadFile(const fs::path &path) {
18+
std::ifstream ifs(path, std::ios::binary);
19+
return std::string(std::istreambuf_iterator<char>(ifs), {});
20+
}
21+
22+
struct IncludeStatement {
23+
std::string relpath;
24+
std::size_t line_begin;
25+
std::size_t line_end;
26+
};
27+
28+
struct FileData {
29+
std::string contents;
30+
std::vector<IncludeStatement> includes;
31+
};
32+
33+
class Amalgamator {
34+
public:
35+
Amalgamator(fs::path root_dir, fs::path src_dir,
36+
std::vector<std::string> exts,
37+
std::unordered_set<std::string> exclude)
38+
: root_dir_(std::move(root_dir)),
39+
src_dir_(src_dir),
40+
exts_(exts),
41+
exclude_(exclude) {}
42+
43+
void Amalgamate(std::ostream &out) {
44+
LoadFiles();
45+
46+
#ifdef DEBUG
47+
const auto log_strings = [](const char *name,
48+
const std::vector<std::string> &xs) {
49+
std::cerr << name << " (" << xs.size() << "):";
50+
for (const std::string &x : xs) std::cerr << " " << x;
51+
std::cerr << std::endl;
52+
};
53+
54+
log_strings("Files", files_);
55+
#endif
56+
57+
for (auto &it : files_data_) {
58+
AnalyzeIncludes(it.first, &it.second);
59+
}
60+
61+
std::unordered_set<std::string> written;
62+
for (const auto &file : files_) {
63+
WriteReplaceIncludes(out, file, /*parent=*/"", &written);
64+
}
65+
out.flush();
66+
}
67+
68+
private:
69+
void WriteReplaceIncludes(std::ostream &out, const std::string relpath,
70+
const std::string &parent,
71+
std::unordered_set<std::string> *written) {
72+
if (written->find(relpath) != written->end()) return;
73+
written->insert(relpath);
74+
out << "/* AMALGAM: " << relpath;
75+
if (!parent.empty()) out << " included from " << parent;
76+
out << " */ \n";
77+
const auto &data = files_data_.at(relpath);
78+
std::size_t prev = 0;
79+
for (const auto &incl : data.includes) {
80+
out.write(data.contents.data() + prev, incl.line_begin - prev);
81+
WriteReplaceIncludes(out, incl.relpath, relpath, written);
82+
prev = incl.line_end;
83+
}
84+
out.write(data.contents.data() + prev, data.contents.size() - prev);
85+
if (data.contents.empty() || data.contents.back() != '\n') {
86+
out.write("\n", 1);
87+
}
88+
}
89+
90+
void LoadFiles() {
91+
for (const auto &entry : fs::recursive_directory_iterator(src_dir_)) {
92+
if (!entry.is_regular_file()) continue;
93+
const auto ext = entry.path().extension();
94+
if (std::find(exts_.begin(), exts_.end(), ext) == exts_.end()) continue;
95+
96+
std::string relpath = PathToSrcRelativeUnix(entry.path());
97+
if (exclude_.find(relpath) != exclude_.end()) continue;
98+
99+
files_.emplace_back(relpath);
100+
files_data_[std::move(relpath)] = {ReadFile(entry.path())};
101+
}
102+
std::sort(files_.begin(), files_.end());
103+
}
104+
105+
void AnalyzeIncludes(const std::string &relpath, FileData *data) {
106+
static const auto *const kIncludeRegex = new std::regex(
107+
R"([ \t]*#include (<[^"\n>]*>|"[^"\n]*")[^\n]*\n)",
108+
std::regex::optimize);
109+
const char *const s = data->contents.c_str();
110+
std::size_t pos = 0;
111+
std::unordered_set<std::string> found_includes;
112+
for (std::cmatch m; std::regex_search(s + pos, m, *kIncludeRegex);
113+
pos += m.position() + m.length()) {
114+
// Match only beginning of line:
115+
if (!(pos + m.position() == 0 || s[pos + m.position() - 1] == '\n')) {
116+
continue;
117+
}
118+
if (m[1].length() < 3) {
119+
throw std::runtime_error("Invalid include: " + m.str());
120+
}
121+
std::string include(m[1].first + 1, m[1].length() - 2);
122+
123+
auto found = ResolveInclude(relpath, include);
124+
if (found.empty()) continue;
125+
if (found == relpath) {
126+
std::cerr << "WARNING: Self-include in " << relpath << std::endl;
127+
continue;
128+
}
129+
if (found_includes.find(found) != found_includes.end()) {
130+
std::cerr << "WARNING: Duplicate #include of " << found << " in "
131+
<< relpath << std::endl;
132+
}
133+
found_includes.insert(found);
134+
data->includes.push_back({std::move(found), pos + m.position(),
135+
pos + m.position() + m.length()});
136+
}
137+
138+
#ifdef DEBUG
139+
if (data->includes.empty()) return;
140+
std::cerr << "Includes for " << relpath << ":";
141+
for (const auto &x : data->includes)
142+
std::cerr << " " << x.relpath << " (" << x.line_begin << "," << x.line_end
143+
<< ")";
144+
std::cerr << std::endl;
145+
#endif
146+
}
147+
148+
std::string ResolveInclude(std::string from_relpath, std::string include) {
149+
std::string resolved;
150+
if (include[0] == '.' || files_data_.find(resolved) == files_data_.end()) {
151+
resolved = fs::path(from_relpath)
152+
.parent_path()
153+
.append(include)
154+
.lexically_normal();
155+
} else {
156+
resolved = include;
157+
}
158+
if (files_data_.find(resolved) == files_data_.end()) {
159+
resolved.clear();
160+
return resolved;
161+
}
162+
return resolved;
163+
}
164+
165+
std::string PathToSrcRelativeUnix(const fs::path &path) {
166+
auto relpath = fs::relative(path, src_dir_);
167+
if (fs::path::preferred_separator == '/') return relpath.u8string();
168+
std::string result = relpath.u8string();
169+
std::replace(result.begin(), result.end(), fs::path::preferred_separator,
170+
'/');
171+
return result;
172+
}
173+
174+
fs::path root_dir_;
175+
fs::path src_dir_;
176+
std::vector<std::string> exts_;
177+
std::unordered_set<std::string> exclude_;
178+
179+
std::vector<std::string> files_;
180+
std::unordered_map<std::string, FileData> files_data_;
181+
};
182+
183+
std::vector<std::string> StrSplit(const std::string &str, char sep) {
184+
std::vector<std::string> result;
185+
std::size_t end = 0;
186+
std::size_t pos = 0;
187+
while ((pos = str.find(sep, pos)) != std::string::npos) {
188+
result.push_back(str.substr(end, pos - end));
189+
++pos;
190+
end = pos;
191+
}
192+
result.push_back(str.substr(end, str.size() - end));
193+
return result;
194+
}
195+
196+
bool StartsWith(const std::string &str, const std::string &prefix,
197+
std::size_t pos = 0) {
198+
if (pos + prefix.size() > str.size()) return false;
199+
for (std::size_t i = 0; i < prefix.size(); ++i) {
200+
if (prefix[i] != str[i + pos]) return false;
201+
}
202+
return true;
203+
}
204+
205+
bool ParseFlag(const std::string &arg, const std::string &name,
206+
std::unordered_map<std::string, std::string> *flags) {
207+
if (!StartsWith(arg, name, 2)) return false;
208+
if (arg[name.size() + 2] != '=') {
209+
throw std::runtime_error("Invalid argument: " + arg + arg[name.size() + 2]);
210+
}
211+
flags->emplace(name, arg.substr(name.size() + 3));
212+
return true;
213+
}
214+
215+
} // namespace
216+
217+
int main(int argc, char *argv[]) {
218+
fs::path root_dir;
219+
std::unordered_map<std::string, std::string> flags;
220+
static const auto *const kFlags =
221+
new std::vector<std::string>{"root", "exts", "out", "exclude"};
222+
for (int i = 1; i < argc; ++i) {
223+
const std::string &arg = argv[i];
224+
if (!StartsWith(arg, "--")) {
225+
throw std::runtime_error("Invalid argument (must start with --): " + arg);
226+
}
227+
for (const std::string &name : *kFlags) {
228+
if (ParseFlag(arg, name, &flags)) break;
229+
}
230+
}
231+
232+
const auto &root_flag = flags.find("root");
233+
if (root_flag != flags.end() && !root_flag->second.empty()) {
234+
root_dir = root_flag->second;
235+
} else {
236+
root_dir = fs::current_path();
237+
}
238+
239+
std::vector<std::string> exts;
240+
const auto &exts_flag = flags.find("exts");
241+
if (exts_flag != flags.end() && !exts_flag->second.empty()) {
242+
exts = StrSplit(exts_flag->second, ',');
243+
} else {
244+
exts = {".h", ".c", ".hpp", ".cpp"};
245+
}
246+
247+
std::unordered_set<std::string> exclude;
248+
const auto &exclude_flag = flags.find("exclude");
249+
if (exclude_flag != flags.end()) {
250+
const auto vec = StrSplit(exclude_flag->second, ',');
251+
exclude = {vec.begin(), vec.end()};
252+
}
253+
254+
Amalgamator amalgamator(root_dir, root_dir.append("src"), std::move(exts),
255+
std::move(exclude));
256+
const auto &out_flag = flags.find("out");
257+
if (out_flag != flags.end() && !out_flag->second.empty()) {
258+
std::ofstream out(out_flag->second);
259+
amalgamator.Amalgamate(out);
260+
} else {
261+
amalgamator.Amalgamate(std::cout);
262+
}
263+
264+
return 0;
265+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DisableFormat: true
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
C++17 `std::filesystem` polyfill for C++11 from:
2+
https://github.com/gulrak/filesystem/tree/135015f20b6641140a408d3883c9c820948be1c5
3+
4+
Author: Steffen Schümann <[email protected]>
5+
License: BSD 3-Clause

0 commit comments

Comments
 (0)