Skip to content

Commit 8e965e7

Browse files
authored
fix(stmt-html): Fix embedded Buffer processing performance issue. (#8748)
* fix(stmt-html): Fix embedded Buffer processing performance issue. * Improve loading performance of the assembly file when generating Stmt HTML. Move-optimized replace_all: allow for no-copy execution when target string is not found. * Avoid using inefficient std::string::append(iter, iter).
1 parent 11d8aa2 commit 8e965e7

File tree

8 files changed

+128
-107
lines changed

8 files changed

+128
-107
lines changed

src/Bounds.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,12 @@ class Bounds : public IRVisitor {
184184
Bounds *const self;
185185
BoundsLogger(Bounds *self, const char *pretty_function)
186186
: self(self) {
187-
string name = replace_all(pretty_function, "(anonymous namespace)::", "");
188-
name = replace_all(name, "virtual void Halide::Internal::", "");
189-
name = replace_all(name, "(const Halide::Internal::", "(");
190-
name = replace_all(name, "::visit", "");
191-
name = replace_all(name, " *)", ")");
187+
string name = pretty_function;
188+
name = replace_all(std::move(name), "(anonymous namespace)::", "");
189+
name = replace_all(std::move(name), "virtual void Halide::Internal::", "");
190+
name = replace_all(std::move(name), "(const Halide::Internal::", "(");
191+
name = replace_all(std::move(name), "::visit", "");
192+
name = replace_all(std::move(name), " *)", ")");
192193
log_line(name, " {");
193194
self->log_indent++;
194195
}
@@ -2220,16 +2221,17 @@ class BoxesTouched : public IRGraphVisitor {
22202221

22212222
BoxesTouchedLogger(BoxesTouched *self, const char *pretty_function)
22222223
: self(self), parent_logger(self->current_logger), boxes(self->boxes) {
2223-
string name = replace_all(pretty_function, "(anonymous namespace)::", "");
2224-
name = replace_all(name, "virtual void Halide::Internal::", "");
2225-
name = replace_all(name, "(const Halide::Internal::", "(");
2226-
name = replace_all(name, "::visit", "");
2227-
name = replace_all(name, " *)", ")");
2224+
string name = pretty_function;
2225+
name = replace_all(std::move(name), "(anonymous namespace)::", "");
2226+
name = replace_all(std::move(name), "virtual void Halide::Internal::", "");
2227+
name = replace_all(std::move(name), "(const Halide::Internal::", "(");
2228+
name = replace_all(std::move(name), "::visit", "");
2229+
name = replace_all(std::move(name), " *)", ")");
22282230

22292231
if (self->consider_calls && !self->consider_provides) {
2230-
name = replace_all(name, "BoxesTouched", "BoxesRequired");
2232+
name = replace_all(std::move(name), "BoxesTouched", "BoxesRequired");
22312233
} else if (!self->consider_calls && self->consider_provides) {
2232-
name = replace_all(name, "BoxesTouched", "BoxesProvided");
2234+
name = replace_all(std::move(name), "BoxesTouched", "BoxesProvided");
22332235
}
22342236

22352237
log_line(name, " {");

src/CompilerLogger.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ std::ostream &emit_value(std::ostream &o, const VALUE &value) {
179179
template<>
180180
std::ostream &emit_value<std::string>(std::ostream &o, const std::string &value) {
181181
std::string v = value;
182-
v = replace_all(v, "\\", "\\\\");
183-
v = replace_all(v, "\"", "\\\"");
182+
v = replace_all(std::move(v), "\\", "\\\\");
183+
v = replace_all(std::move(v), "\"", "\\\"");
184184
o << "\"" << v << "\"";
185185
return o;
186186
}

src/Module.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,10 @@ static Registerer registerer;
223223
}
224224
std::string nsreg = "halide_nsreg_" + replace_all(f.name, "::", "_");
225225
std::string s = replace_all(registration_template, "$NAMESPACEOPEN$", nsopen);
226-
s = replace_all(s, "$SHORTNAME$", simple_name);
227-
s = replace_all(s, "$NAMESPACECLOSE$", nsclose);
228-
s = replace_all(s, "$FULLNAME$", f.name);
229-
s = replace_all(s, "$NREGS$", nsreg);
226+
s = replace_all(std::move(s), "$SHORTNAME$", simple_name);
227+
s = replace_all(std::move(s), "$NAMESPACECLOSE$", nsclose);
228+
s = replace_all(std::move(s), "$FULLNAME$", f.name);
229+
s = replace_all(std::move(s), "$NREGS$", nsreg);
230230
stream << s;
231231
}
232232
}
@@ -305,15 +305,15 @@ inline void apply_schedule_$SHORTNAME$(
305305
target_string += t.to_string();
306306
}
307307
std::string body_text = indent_string(body, " ");
308-
s = replace_all(s, "$SCHEDULER$", scheduler_name);
309-
s = replace_all(s, "$NAMESPACEOPEN$", nsopen);
310-
s = replace_all(s, "$SHORTNAME$", simple_name);
311-
s = replace_all(s, "$CLEANNAME$", clean_name);
312-
s = replace_all(s, "$NAMESPACECLOSE$", nsclose);
313-
s = replace_all(s, "$TARGET$", target_string);
314-
s = replace_all(s, "$BODY$", body_text);
315-
s = replace_all(s, "$MPNAME$", "autoscheduler_params");
316-
s = replace_all(s, "$MACHINEPARAMS$", autoscheduler_params_string);
308+
s = replace_all(std::move(s), "$SCHEDULER$", scheduler_name);
309+
s = replace_all(std::move(s), "$NAMESPACEOPEN$", nsopen);
310+
s = replace_all(std::move(s), "$SHORTNAME$", simple_name);
311+
s = replace_all(std::move(s), "$CLEANNAME$", clean_name);
312+
s = replace_all(std::move(s), "$NAMESPACECLOSE$", nsclose);
313+
s = replace_all(std::move(s), "$TARGET$", target_string);
314+
s = replace_all(std::move(s), "$BODY$", body_text);
315+
s = replace_all(std::move(s), "$MPNAME$", "autoscheduler_params");
316+
s = replace_all(std::move(s), "$MACHINEPARAMS$", autoscheduler_params_string);
317317
stream << s;
318318
}
319319

@@ -692,12 +692,7 @@ void Module::compile(const std::map<OutputFileType, std::string> &output_files)
692692
debug(1) << "Module.compile(): device_code " << output_files.at(OutputFileType::device_code) << "\n";
693693
Buffer<> buf = get_device_code_buffer();
694694
if (buf.defined()) {
695-
int length = buf.size_in_bytes();
696-
while (length > 0 && ((const char *)buf.data())[length - 1] == '\0') {
697-
length--;
698-
}
699-
std::string str((const char *)buf.data(), length);
700-
std::string device_code = std::string((const char *)buf.data(), buf.size_in_bytes());
695+
std::string_view device_code((const char *)buf.data(), buf.size_in_bytes());
701696
while (!device_code.empty() && device_code.back() == '\0') {
702697
device_code = device_code.substr(0, device_code.length() - 1);
703698
}

src/StmtToHTML.cpp

Lines changed: 85 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -555,28 +555,40 @@ class AssemblyInfo : public IRVisitor {
555555
m.get_conceptual_stmt().accept(this);
556556
}
557557

558-
void generate(const std::string &code) {
558+
void generate(std::string_view code) {
559559
// Find markers in asm code
560-
std::istringstream asm_stream(code);
561-
std::string line;
562560
int lno = 1;
563-
while (getline(asm_stream, line)) {
564-
// Try all markers
565-
std::vector<uint64_t> matched_nodes;
566-
for (auto const &[node, marker] : markers) {
567-
if (std::regex_search(line, marker)) {
568-
// Save line number
569-
lnos[node] = lno;
570-
// Save this node's id
571-
matched_nodes.push_back(node);
572-
}
561+
size_t start = 0;
562+
563+
std::vector<uint64_t> matched_nodes;
564+
while (start < code.size()) {
565+
size_t end = code.find('\n', start);
566+
if (end == std::string_view::npos) {
567+
end = code.size();
573568
}
574-
// We map to the first match, stop
575-
// checking matched nodes
576-
for (auto const &node : matched_nodes) {
577-
markers.erase(node);
569+
std::string_view line = code.substr(start, end - start);
570+
std::string_view marker_prefix("%\"", 2);
571+
572+
// Quick check if the line contains %".
573+
if (line.find(marker_prefix) != std::string_view::npos) {
574+
// Try all markers
575+
matched_nodes.clear();
576+
for (auto const &[node, marker] : markers) {
577+
if (std::regex_search(line.begin(), line.end(), marker)) {
578+
// Save line number
579+
lnos[node] = lno;
580+
// Save this node's id
581+
matched_nodes.push_back(node);
582+
}
583+
}
584+
// We map to the first match, stop
585+
// checking matched nodes
586+
for (auto const &node : matched_nodes) {
587+
markers.erase(node);
588+
}
578589
}
579590

591+
start = end + 1;
580592
lno++;
581593
}
582594
}
@@ -785,12 +797,12 @@ class HTMLCodePrinter : public IRVisitor {
785797
}
786798

787799
std::string escape_html(std::string src) {
788-
src = replace_all(src, "&", "&amp;");
789-
src = replace_all(src, "<", "&lt;");
790-
src = replace_all(src, ">", "&gt;");
791-
src = replace_all(src, "\"", "&quot;");
792-
src = replace_all(src, "/", "&#x2F;");
793-
src = replace_all(src, "'", "&#39;");
800+
src = replace_all(std::move(src), "&", "&amp;");
801+
src = replace_all(std::move(src), "<", "&lt;");
802+
src = replace_all(std::move(src), ">", "&gt;");
803+
src = replace_all(std::move(src), "\"", "&quot;");
804+
src = replace_all(std::move(src), "/", "&#x2F;");
805+
src = replace_all(std::move(src), "'", "&#39;");
794806
return src;
795807
}
796808

@@ -850,29 +862,29 @@ class HTMLCodePrinter : public IRVisitor {
850862
scope.pop(current_kernel);
851863
}
852864

853-
line = replace_all(line, ".f32", ".<span class='OpF32'>f32</span>");
854-
line = replace_all(line, ".f64", ".<span class='OpF64'>f64</span>");
865+
line = replace_all(std::move(line), ".f32", ".<span class='OpF32'>f32</span>");
866+
line = replace_all(std::move(line), ".f64", ".<span class='OpF64'>f64</span>");
855867

856-
line = replace_all(line, ".s8", ".<span class='OpI8'>s8</span>");
857-
line = replace_all(line, ".s16", ".<span class='OpI16'>s16</span>");
858-
line = replace_all(line, ".s32", ".<span class='OpI32'>s32</span>");
859-
line = replace_all(line, ".s64", ".<span class='OpI64'>s64</span>");
868+
line = replace_all(std::move(line), ".s8", ".<span class='OpI8'>s8</span>");
869+
line = replace_all(std::move(line), ".s16", ".<span class='OpI16'>s16</span>");
870+
line = replace_all(std::move(line), ".s32", ".<span class='OpI32'>s32</span>");
871+
line = replace_all(std::move(line), ".s64", ".<span class='OpI64'>s64</span>");
860872

861-
line = replace_all(line, ".u8", ".<span class='OpI8'>u8</span>");
862-
line = replace_all(line, ".u16", ".<span class='OpI16'>u16</span>");
863-
line = replace_all(line, ".u32", ".<span class='OpI32'>u32</span>");
864-
line = replace_all(line, ".u64", ".<span class='OpI64'>u64</span>");
873+
line = replace_all(std::move(line), ".u8", ".<span class='OpI8'>u8</span>");
874+
line = replace_all(std::move(line), ".u16", ".<span class='OpI16'>u16</span>");
875+
line = replace_all(std::move(line), ".u32", ".<span class='OpI32'>u32</span>");
876+
line = replace_all(std::move(line), ".u64", ".<span class='OpI64'>u64</span>");
865877

866-
line = replace_all(line, ".b8", ".<span class='OpB8'>b8</span>");
867-
line = replace_all(line, ".b16", ".<span class='OpB16'>b16</span>");
868-
line = replace_all(line, ".b32", ".<span class='OpB32'>b32</span>");
869-
line = replace_all(line, ".b64", ".<span class='OpB64'>b64</span>");
878+
line = replace_all(std::move(line), ".b8", ".<span class='OpB8'>b8</span>");
879+
line = replace_all(std::move(line), ".b16", ".<span class='OpB16'>b16</span>");
880+
line = replace_all(std::move(line), ".b32", ".<span class='OpB32'>b32</span>");
881+
line = replace_all(std::move(line), ".b64", ".<span class='OpB64'>b64</span>");
870882

871-
line = replace_all(line, ".v2", ".<span class='OpVec2'>v2</span>");
872-
line = replace_all(line, ".v4", ".<span class='OpVec4'>v4</span>");
883+
line = replace_all(std::move(line), ".v2", ".<span class='OpVec2'>v2</span>");
884+
line = replace_all(std::move(line), ".v4", ".<span class='OpVec4'>v4</span>");
873885

874-
line = replace_all(line, "ld.", "<span class='Memory'>ld</span>.");
875-
line = replace_all(line, "st.", "<span class='Memory'>st</span>.");
886+
line = replace_all(std::move(line), "ld.", "<span class='Memory'>ld</span>.");
887+
line = replace_all(std::move(line), "st.", "<span class='Memory'>st</span>.");
876888

877889
size_t idx;
878890
if ((idx = line.find("&#x2F;&#x2F")) != std::string::npos) {
@@ -2306,7 +2318,7 @@ class PipelineHTMLInspector {
23062318
// use comments in the generated assembly to infer association
23072319
// between Halide IR and assembly -- unclear how reliable this is.
23082320
host_asm_info.gather_nodes_from_functions(m);
2309-
host_asm_info.generate(asm_stream.str());
2321+
host_asm_info.generate(asm_buffer);
23102322

23112323
Buffer<> device_code_buf = m.get_device_code_buffer();
23122324
if (device_code_buf.defined()) {
@@ -2315,7 +2327,7 @@ class PipelineHTMLInspector {
23152327
debug(1) << "Generating device AssemblyInfo\n";
23162328
// TODO(mcourteaux): This doesn't generate anything useful, as the
23172329
// LLVM comments are only added later in the LLVM CodeGen IRVisitor.
2318-
// This conceptual Stmt hasn't seen this seen this
2330+
// This conceptual Stmt hasn't seen this pass yet.
23192331
device_asm_info.gather_nodes_from_conceptual_stmt(m);
23202332
device_asm_info.generate(device_assembly);
23212333
} else {
@@ -2460,15 +2472,18 @@ class PipelineHTMLInspector {
24602472
stream << "<div id='host-assembly-pane' class='pane'>\n";
24612473
stream << "<div id='assemblyContent' class='shj-lang-asm'>\n";
24622474
stream << "<pre>\n";
2463-
std::istringstream ss{asm_stream.str()};
2464-
for (std::string line; std::getline(ss, line);) {
2465-
if (line.length() > 500) {
2466-
// Very long lines in the assembly are typically the _gpu_kernel_sources
2467-
// as a raw ASCII block in the assembly. Let's chop that off to make
2468-
// browsers faster when dealing with this.
2469-
line = line.substr(0, 100) + "\" # omitted the remainder of the ASCII buffer";
2475+
// The loop below is preferred to avoid copying the data
2476+
// again into a new std::istringstream.
2477+
std::string_view asm_str = asm_buffer;
2478+
size_t start = 0;
2479+
while (start < asm_str.size()) {
2480+
size_t end = asm_str.find('\n', start);
2481+
if (end == std::string_view::npos) {
2482+
end = asm_str.size();
24702483
}
2471-
stream << html_code_printer.escape_html(line) << "\n";
2484+
std::string line{asm_str.substr(start, end - start)};
2485+
stream << html_code_printer.escape_html(std::move(line)) << "\n";
2486+
start = end + 1;
24722487
}
24732488
stream << "\n";
24742489
stream << "</pre>\n";
@@ -2517,7 +2532,7 @@ class PipelineHTMLInspector {
25172532
/* Misc helper methods */
25182533

25192534
// Load assembly code from file
2520-
std::ostringstream asm_stream;
2535+
std::string asm_buffer;
25212536
AssemblyInfo host_asm_info;
25222537
AssemblyInfo device_asm_info;
25232538

@@ -2528,10 +2543,26 @@ class PipelineHTMLInspector {
25282543
std::ifstream assembly;
25292544
assembly.open(asm_file.c_str());
25302545

2531-
// Slurp the code into asm_stream
2546+
// Try to get size of the file for fewer allocations
2547+
asm_buffer.clear();
2548+
size_t file_size = assembly.seekg(0, std::ios::end).tellg();
2549+
asm_buffer.reserve(file_size + 16);
2550+
assembly.seekg(0);
2551+
2552+
// Slurp the code into asm_stream...
25322553
std::string line;
2554+
line.reserve(128);
25332555
while (getline(assembly, line)) {
2534-
asm_stream << line << "\n";
2556+
if (line.length() > 500) {
2557+
// Very long lines in the assembly are typically the _gpu_kernel_sources
2558+
// or other buffers (such as static LUTs) as a raw ASCII block in the
2559+
// assembly. Let's chop that off to make browsers faster when dealing with this.
2560+
asm_buffer.append(line.data(), 200);
2561+
asm_buffer.append("\" # omitted the remainder of the buffer\n");
2562+
} else {
2563+
asm_buffer.append(line);
2564+
asm_buffer.push_back('\n');
2565+
}
25352566
}
25362567
}
25372568
};

src/Target.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1133,7 +1133,7 @@ std::string Target::to_string() const {
11331133
// Use has_feature() multiple times (rather than features_any_of())
11341134
// to avoid constructing a temporary vector for this rather-common call.
11351135
if (has_feature(Target::TraceLoads) && has_feature(Target::TraceStores) && has_feature(Target::TraceRealizations)) {
1136-
result = Internal::replace_all(result, "trace_loads-trace_realizations-trace_stores", "trace_all");
1136+
result = Internal::replace_all(std::move(result), "trace_loads-trace_realizations-trace_stores", "trace_all");
11371137
}
11381138
if (vector_bits != 0) {
11391139
result += "-vector_bits_" + std::to_string(vector_bits);

src/Util.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,13 @@ bool ends_with(const string &str, const string &suffix) {
262262
return true;
263263
}
264264

265-
string replace_all(const string &str, const string &find, const string &replace) {
265+
string replace_all(string str, const string &find, const string &replace) {
266266
size_t pos = 0;
267-
string result = str;
268-
while ((pos = result.find(find, pos)) != string::npos) {
269-
result.replace(pos, find.length(), replace);
267+
while ((pos = str.find(find, pos)) != string::npos) {
268+
str.replace(pos, find.length(), replace);
270269
pos += replace.length();
271270
}
272-
return result;
271+
return str;
273272
}
274273

275274
std::vector<std::string> split_string(const std::string &source, const std::string &delim) {
@@ -388,7 +387,7 @@ std::string get_windows_tmp_dir() {
388387
std::string tmp = from_utf16(wlocal_path);
389388
CoTaskMemFree(wlocal_path);
390389

391-
tmp = replace_all(tmp, "\\", "/");
390+
tmp = replace_all(std::move(tmp), "\\", "/");
392391
if (tmp.back() != '/') tmp += '/';
393392
tmp += "Temp/";
394393
return tmp;

src/Util.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,12 @@ bool starts_with(const std::string &str, const std::string &prefix);
176176
/** Test if the first string ends with the second string */
177177
bool ends_with(const std::string &str, const std::string &suffix);
178178

179-
/** Replace all matches of the second string in the first string with the last string */
180-
std::string replace_all(const std::string &str, const std::string &find, const std::string &replace);
179+
/** Replace all matches of the second string in the first string with the last string.
180+
* The string to search-and-replace in is passed by value, offering the ability to
181+
* std::move() a string in if you're not interested in keeping the original string.
182+
* This is useful when the original string does not contain the find-string, causing
183+
* this function to return the same string without any copies being made. */
184+
std::string replace_all(std::string str, const std::string &find, const std::string &replace);
181185

182186
/** Split the source string using 'delim' as the divider. */
183187
std::vector<std::string> split_string(const std::string &source, const std::string &delim);

test/correctness/pytorch.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,6 @@ using namespace Halide;
66

77
namespace {
88

9-
std::string replace_all(const std::string &str, const std::string &find, const std::string &replace) {
10-
size_t pos = 0;
11-
std::string result = str;
12-
while ((pos = result.find(find, pos)) != std::string::npos) {
13-
result.replace(pos, find.length(), replace);
14-
pos += replace.length();
15-
}
16-
return result;
17-
}
18-
199
std::string read_entire_file(const std::string &pathname) {
2010
std::ifstream f(pathname, std::ios::in | std::ios::binary);
2111
std::string result;
@@ -31,7 +21,7 @@ std::string read_entire_file(const std::string &pathname) {
3121
}
3222
f.close();
3323
// Normalize file to Unix line endings
34-
result = replace_all(result, "\r\n", "\n");
24+
result = Internal::replace_all(result, "\r\n", "\n");
3525
return result;
3626
}
3727

0 commit comments

Comments
 (0)