@@ -555,28 +555,40 @@ class AssemblyInfo : public IRVisitor {
555555 m.get_conceptual_stmt ().accept (this );
556556 }
557557
558- void generate (const std::string & code) {
558+ void generate (std::string_view code) {
559559 // Find markers in asm code
560- std::istringstream asm_stream (code);
561- std::string line;
562560 int lno = 1 ;
563- while (getline (asm_stream, line)) {
564- // Try all markers
565- std::vector<uint64_t > matched_nodes;
566- for (auto const &[node, marker] : markers) {
567- if (std::regex_search (line, marker)) {
568- // Save line number
569- lnos[node] = lno;
570- // Save this node's id
571- matched_nodes.push_back (node);
572- }
561+ size_t start = 0 ;
562+
563+ std::vector<uint64_t > matched_nodes;
564+ while (start < code.size ()) {
565+ size_t end = code.find (' \n ' , start);
566+ if (end == std::string_view::npos) {
567+ end = code.size ();
573568 }
574- // We map to the first match, stop
575- // checking matched nodes
576- for (auto const &node : matched_nodes) {
577- markers.erase (node);
569+ std::string_view line = code.substr (start, end - start);
570+ std::string_view marker_prefix (" %\" " , 2 );
571+
572+ // Quick check if the line contains %".
573+ if (line.find (marker_prefix) != std::string_view::npos) {
574+ // Try all markers
575+ matched_nodes.clear ();
576+ for (auto const &[node, marker] : markers) {
577+ if (std::regex_search (line.begin (), line.end (), marker)) {
578+ // Save line number
579+ lnos[node] = lno;
580+ // Save this node's id
581+ matched_nodes.push_back (node);
582+ }
583+ }
584+ // We map to the first match, stop
585+ // checking matched nodes
586+ for (auto const &node : matched_nodes) {
587+ markers.erase (node);
588+ }
578589 }
579590
591+ start = end + 1 ;
580592 lno++;
581593 }
582594 }
@@ -785,12 +797,12 @@ class HTMLCodePrinter : public IRVisitor {
785797 }
786798
787799 std::string escape_html (std::string src) {
788- src = replace_all (src, " &" , " &" );
789- src = replace_all (src, " <" , " <" );
790- src = replace_all (src, " >" , " >" );
791- src = replace_all (src, " \" " , " "" );
792- src = replace_all (src, " /" , " /" );
793- src = replace_all (src, " '" , " '" );
800+ src = replace_all (std::move ( src) , " &" , " &" );
801+ src = replace_all (std::move ( src) , " <" , " <" );
802+ src = replace_all (std::move ( src) , " >" , " >" );
803+ src = replace_all (std::move ( src) , " \" " , " "" );
804+ src = replace_all (std::move ( src) , " /" , " /" );
805+ src = replace_all (std::move ( src) , " '" , " '" );
794806 return src;
795807 }
796808
@@ -850,29 +862,29 @@ class HTMLCodePrinter : public IRVisitor {
850862 scope.pop (current_kernel);
851863 }
852864
853- line = replace_all (line, " .f32" , " .<span class='OpF32'>f32</span>" );
854- line = replace_all (line, " .f64" , " .<span class='OpF64'>f64</span>" );
865+ line = replace_all (std::move ( line) , " .f32" , " .<span class='OpF32'>f32</span>" );
866+ line = replace_all (std::move ( line) , " .f64" , " .<span class='OpF64'>f64</span>" );
855867
856- line = replace_all (line, " .s8" , " .<span class='OpI8'>s8</span>" );
857- line = replace_all (line, " .s16" , " .<span class='OpI16'>s16</span>" );
858- line = replace_all (line, " .s32" , " .<span class='OpI32'>s32</span>" );
859- line = replace_all (line, " .s64" , " .<span class='OpI64'>s64</span>" );
868+ line = replace_all (std::move ( line) , " .s8" , " .<span class='OpI8'>s8</span>" );
869+ line = replace_all (std::move ( line) , " .s16" , " .<span class='OpI16'>s16</span>" );
870+ line = replace_all (std::move ( line) , " .s32" , " .<span class='OpI32'>s32</span>" );
871+ line = replace_all (std::move ( line) , " .s64" , " .<span class='OpI64'>s64</span>" );
860872
861- line = replace_all (line, " .u8" , " .<span class='OpI8'>u8</span>" );
862- line = replace_all (line, " .u16" , " .<span class='OpI16'>u16</span>" );
863- line = replace_all (line, " .u32" , " .<span class='OpI32'>u32</span>" );
864- line = replace_all (line, " .u64" , " .<span class='OpI64'>u64</span>" );
873+ line = replace_all (std::move ( line) , " .u8" , " .<span class='OpI8'>u8</span>" );
874+ line = replace_all (std::move ( line) , " .u16" , " .<span class='OpI16'>u16</span>" );
875+ line = replace_all (std::move ( line) , " .u32" , " .<span class='OpI32'>u32</span>" );
876+ line = replace_all (std::move ( line) , " .u64" , " .<span class='OpI64'>u64</span>" );
865877
866- line = replace_all (line, " .b8" , " .<span class='OpB8'>b8</span>" );
867- line = replace_all (line, " .b16" , " .<span class='OpB16'>b16</span>" );
868- line = replace_all (line, " .b32" , " .<span class='OpB32'>b32</span>" );
869- line = replace_all (line, " .b64" , " .<span class='OpB64'>b64</span>" );
878+ line = replace_all (std::move ( line) , " .b8" , " .<span class='OpB8'>b8</span>" );
879+ line = replace_all (std::move ( line) , " .b16" , " .<span class='OpB16'>b16</span>" );
880+ line = replace_all (std::move ( line) , " .b32" , " .<span class='OpB32'>b32</span>" );
881+ line = replace_all (std::move ( line) , " .b64" , " .<span class='OpB64'>b64</span>" );
870882
871- line = replace_all (line, " .v2" , " .<span class='OpVec2'>v2</span>" );
872- line = replace_all (line, " .v4" , " .<span class='OpVec4'>v4</span>" );
883+ line = replace_all (std::move ( line) , " .v2" , " .<span class='OpVec2'>v2</span>" );
884+ line = replace_all (std::move ( line) , " .v4" , " .<span class='OpVec4'>v4</span>" );
873885
874- line = replace_all (line, " ld." , " <span class='Memory'>ld</span>." );
875- line = replace_all (line, " st." , " <span class='Memory'>st</span>." );
886+ line = replace_all (std::move ( line) , " ld." , " <span class='Memory'>ld</span>." );
887+ line = replace_all (std::move ( line) , " st." , " <span class='Memory'>st</span>." );
876888
877889 size_t idx;
878890 if ((idx = line.find (" //" )) != std::string::npos) {
@@ -2306,7 +2318,7 @@ class PipelineHTMLInspector {
23062318 // use comments in the generated assembly to infer association
23072319 // between Halide IR and assembly -- unclear how reliable this is.
23082320 host_asm_info.gather_nodes_from_functions (m);
2309- host_asm_info.generate (asm_stream. str () );
2321+ host_asm_info.generate (asm_buffer );
23102322
23112323 Buffer<> device_code_buf = m.get_device_code_buffer ();
23122324 if (device_code_buf.defined ()) {
@@ -2315,7 +2327,7 @@ class PipelineHTMLInspector {
23152327 debug (1 ) << " Generating device AssemblyInfo\n " ;
23162328 // TODO(mcourteaux): This doesn't generate anything useful, as the
23172329 // LLVM comments are only added later in the LLVM CodeGen IRVisitor.
2318- // This conceptual Stmt hasn't seen this seen this
2330+ // This conceptual Stmt hasn't seen this pass yet.
23192331 device_asm_info.gather_nodes_from_conceptual_stmt (m);
23202332 device_asm_info.generate (device_assembly);
23212333 } else {
@@ -2460,15 +2472,18 @@ class PipelineHTMLInspector {
24602472 stream << " <div id='host-assembly-pane' class='pane'>\n " ;
24612473 stream << " <div id='assemblyContent' class='shj-lang-asm'>\n " ;
24622474 stream << " <pre>\n " ;
2463- std::istringstream ss{asm_stream.str ()};
2464- for (std::string line; std::getline (ss, line);) {
2465- if (line.length () > 500 ) {
2466- // Very long lines in the assembly are typically the _gpu_kernel_sources
2467- // as a raw ASCII block in the assembly. Let's chop that off to make
2468- // browsers faster when dealing with this.
2469- line = line.substr (0 , 100 ) + " \" # omitted the remainder of the ASCII buffer" ;
2475+ // The loop below is preferred to avoid copying the data
2476+ // again into a new std::istringstream.
2477+ std::string_view asm_str = asm_buffer;
2478+ size_t start = 0 ;
2479+ while (start < asm_str.size ()) {
2480+ size_t end = asm_str.find (' \n ' , start);
2481+ if (end == std::string_view::npos) {
2482+ end = asm_str.size ();
24702483 }
2471- stream << html_code_printer.escape_html (line) << " \n " ;
2484+ std::string line{asm_str.substr (start, end - start)};
2485+ stream << html_code_printer.escape_html (std::move (line)) << " \n " ;
2486+ start = end + 1 ;
24722487 }
24732488 stream << " \n " ;
24742489 stream << " </pre>\n " ;
@@ -2517,7 +2532,7 @@ class PipelineHTMLInspector {
25172532 /* Misc helper methods */
25182533
25192534 // Load assembly code from file
2520- std::ostringstream asm_stream ;
2535+ std::string asm_buffer ;
25212536 AssemblyInfo host_asm_info;
25222537 AssemblyInfo device_asm_info;
25232538
@@ -2528,10 +2543,26 @@ class PipelineHTMLInspector {
25282543 std::ifstream assembly;
25292544 assembly.open (asm_file.c_str ());
25302545
2531- // Slurp the code into asm_stream
2546+ // Try to get size of the file for fewer allocations
2547+ asm_buffer.clear ();
2548+ size_t file_size = assembly.seekg (0 , std::ios::end).tellg ();
2549+ asm_buffer.reserve (file_size + 16 );
2550+ assembly.seekg (0 );
2551+
2552+ // Slurp the code into asm_stream...
25322553 std::string line;
2554+ line.reserve (128 );
25332555 while (getline (assembly, line)) {
2534- asm_stream << line << " \n " ;
2556+ if (line.length () > 500 ) {
2557+ // Very long lines in the assembly are typically the _gpu_kernel_sources
2558+ // or other buffers (such as static LUTs) as a raw ASCII block in the
2559+ // assembly. Let's chop that off to make browsers faster when dealing with this.
2560+ asm_buffer.append (line.data (), 200 );
2561+ asm_buffer.append (" \" # omitted the remainder of the buffer\n " );
2562+ } else {
2563+ asm_buffer.append (line);
2564+ asm_buffer.push_back (' \n ' );
2565+ }
25352566 }
25362567 }
25372568};
0 commit comments