@@ -560,6 +560,23 @@ def genOrderFile(args):
560
560
return 0
561
561
562
562
563
+ def filter_bolt_optimized (inputs , instrumented_outputs ):
564
+ new_inputs = []
565
+ new_instrumented_ouputs = []
566
+ for input , instrumented_output in zip (inputs , instrumented_outputs ):
567
+ output = subprocess .check_output (
568
+ [opts .readelf , "-WS" , input ], universal_newlines = True
569
+ )
570
+
571
+ # This binary has already been bolt-optimized, so skip further processing.
572
+ if re .search ("\\ .bolt\\ .org\\ .text" , output , re .MULTILINE ):
573
+ print (f"Skipping { input } , it's already instrumented" )
574
+ else :
575
+ new_inputs .append (input )
576
+ new_instrumented_ouputs .append (instrumented_output )
577
+ return new_inputs , new_instrumented_ouputs
578
+
579
+
563
580
def bolt_optimize (args ):
564
581
parser = argparse .ArgumentParser ("%prog [options] " )
565
582
parser .add_argument ("--method" , choices = ["INSTRUMENT" , "PERF" , "LBR" ])
@@ -574,47 +591,67 @@ def bolt_optimize(args):
574
591
575
592
opts = parser .parse_args (args )
576
593
577
- output = subprocess .check_output (
578
- [opts .readelf , "-WS" , opts .input ], universal_newlines = True
579
- )
594
+ inputs = opts .input .split (";" )
595
+ instrumented_outputs = opts .instrumented_output .split (";" )
596
+ assert len (inputs ) == len (
597
+ instrumented_outputs
598
+ ), "inconsistent --input / --instrumented-output arguments"
580
599
581
- # This binary has already been bolt-optimized, so skip further processing.
582
- if re . search ( " \\ .bolt \\ .org \\ .text" , output , re . MULTILINE ) :
600
+ inputs , instrumented_outputs = filter_bolt_optimized ( inputs , instrumented_outputs )
601
+ if not inputs :
583
602
return 0
584
603
604
+ environ = os .environ .copy ()
585
605
if opts .method == "INSTRUMENT" :
586
- process = subprocess .run (
587
- [
606
+ preloads = []
607
+ for input , instrumented_output in zip (inputs , instrumented_outputs ):
608
+ args = [
588
609
opts .bolt ,
589
- opts . input ,
610
+ input ,
590
611
"-o" ,
591
- opts . instrumented_output ,
612
+ instrumented_output ,
592
613
"-instrument" ,
593
614
"--instrumentation-file-append-pid" ,
594
615
f"--instrumentation-file={ opts .fdata } " ,
595
- ],
596
- stdout = subprocess .PIPE ,
597
- stderr = subprocess .STDOUT ,
598
- text = True ,
599
- )
616
+ ]
617
+ print ("Running: " + " " .join (args ))
618
+ process = subprocess .run (
619
+ args ,
620
+ stdout = subprocess .PIPE ,
621
+ stderr = subprocess .STDOUT ,
622
+ text = True ,
623
+ )
600
624
601
- print (process .args )
602
- for line in process .stdout :
603
- sys .stdout .write (line )
604
- process .check_returncode ()
625
+ for line in process .stdout :
626
+ sys .stdout .write (line )
627
+ process .check_returncode ()
605
628
629
+ output = subprocess .check_output (
630
+ [opts .readelf , "--file-header" , input ], universal_newlines = True
631
+ )
632
+ if re .search (r"Type:\s*((Shared)|(DYN))" , output ):
633
+ # force using the instrumented version
634
+ preloads .append (instrumented_output )
635
+
636
+ if preloads :
637
+ print ("Patching execution environment for dynamic library" )
638
+ environ ["LD_PRELOAD" ] = os .pathsep .join (preloads )
639
+
640
+ args = [
641
+ sys .executable ,
642
+ opts .lit ,
643
+ "-v" ,
644
+ os .path .join (opts .perf_training_binary_dir , f"bolt-fdata" ),
645
+ ]
646
+ print ("Running: " + " " .join (args ))
606
647
process = subprocess .run (
607
- [
608
- sys .executable ,
609
- opts .lit ,
610
- os .path .join (opts .perf_training_binary_dir , "bolt-fdata" ),
611
- ],
648
+ args ,
612
649
stdout = subprocess .PIPE ,
613
650
stderr = subprocess .STDOUT ,
614
651
text = True ,
652
+ env = environ ,
615
653
)
616
654
617
- print (process .args )
618
655
for line in process .stdout :
619
656
sys .stdout .write (line )
620
657
process .check_returncode ()
@@ -624,14 +661,14 @@ def bolt_optimize(args):
624
661
625
662
merge_fdata ([opts .merge_fdata , opts .fdata , opts .perf_training_binary_dir ])
626
663
627
- shutil .copy (opts .input , f"{ opts .input } -prebolt" )
664
+ for input in inputs :
665
+ shutil .copy (input , f"{ input } -prebolt" )
628
666
629
- process = subprocess .run (
630
- [
667
+ args = [
631
668
opts .bolt ,
632
- f"{ opts . input } -prebolt" ,
669
+ f"{ input } -prebolt" ,
633
670
"-o" ,
634
- opts . input ,
671
+ input ,
635
672
"-data" ,
636
673
opts .fdata ,
637
674
"-reorder-blocks=ext-tsp" ,
@@ -643,16 +680,18 @@ def bolt_optimize(args):
643
680
"-use-gnu-stack" ,
644
681
"-update-debug-sections" ,
645
682
"-nl" if opts .method == "PERF" else "" ,
646
- ],
647
- stdout = subprocess .PIPE ,
648
- stderr = subprocess .STDOUT ,
649
- text = True ,
650
- )
683
+ ]
684
+ print ("Running: " + " " .join (args ))
685
+ process = subprocess .run (
686
+ args ,
687
+ stdout = subprocess .PIPE ,
688
+ stderr = subprocess .STDOUT ,
689
+ text = True ,
690
+ )
651
691
652
- print (process .args )
653
- for line in process .stdout :
654
- sys .stdout .write (line )
655
- process .check_returncode ()
692
+ for line in process .stdout :
693
+ sys .stdout .write (line )
694
+ process .check_returncode ()
656
695
657
696
658
697
commands = {
0 commit comments