Merge branch 'master' into powerletrics_pynvml_integratiion

mhkarsten · web-flow · commit b98e86b99705 · 2025-05-12T14:10:19.000+02:00
diff --git a/examples/hello-world-fibonacci/README.md b/examples/hello-world-fibonacci/README.md
@@ -0,0 +1,23 @@
+
+# Hello World Fibonacci
+
+A simple platform independent example that runs three different fibonacci implementations, 
+and measures their power consumption, runtime, and memory usage using [EnergiBridge](https://github.com/tdurieux/EnergiBridge).
+
+Note that admin permissions are needed to make use of EnergiBridge.
+
+
+## Running
+
+From the root directory of the repo, run the following command:
+
+```bash
+python experiment-runner/ examples/hello-world-fibonacci/RunnerConfig.py
+```
+
+## Results
+
+The results are generated in the `examples/hello-world-fibonacci/experiments` folder.
+
+**!!! WARNING !!!**: COLUMNS IN THE `energibridge.csv` FILES CAN BE DIFFERENT ACROSS MACHINES.
+ADJUST THE DATAFRAME COLUMN NAMES ACCORDINGLY.
diff --git a/examples/hello-world-fibonacci/RunnerConfig.py b/examples/hello-world-fibonacci/RunnerConfig.py
@@ -0,0 +1,127 @@
+from EventManager.Models.RunnerEvents import RunnerEvents
+from EventManager.EventSubscriptionController import EventSubscriptionController
+from ConfigValidator.Config.Models.RunTableModel import RunTableModel
+from ConfigValidator.Config.Models.FactorModel import FactorModel
+from ConfigValidator.Config.Models.RunnerContext import RunnerContext
+from ConfigValidator.Config.Models.OperationType import OperationType
+from ProgressManager.Output.OutputProcedure import OutputProcedure as output
+from Plugins.Profilers.EnergiBridge import EnergiBridge
+
+from typing import Dict, List, Any, Optional
+from pathlib import Path
+from os.path import dirname, realpath
+
+
+class RunnerConfig:
+    ROOT_DIR = Path(dirname(realpath(__file__)))
+
+    # ================================ USER SPECIFIC CONFIG ================================
+    """The name of the experiment."""
+    name:                       str             = "new_runner_experiment"
+
+    """The path in which Experiment Runner will create a folder with the name `self.name`, in order to store the
+    results from this experiment. (Path does not need to exist - it will be created if necessary.)
+    Output path defaults to the config file's path, inside the folder 'experiments'"""
+    results_output_path:        Path             = ROOT_DIR / 'experiments'
+
+    """Experiment operation type. Unless you manually want to initiate each run, use `OperationType.AUTO`."""
+    operation_type:             OperationType   = OperationType.AUTO
+
+    """The time Experiment Runner will wait after a run completes.
+    This can be essential to accommodate for cooldown periods on some systems."""
+    time_between_runs_in_ms:    int             = 1000
+
+    # Dynamic configurations can be one-time satisfied here before the program takes the config as-is
+    # e.g. Setting some variable based on some criteria
+    def __init__(self):
+        """Executes immediately after program start, on config load"""
+
+        EventSubscriptionController.subscribe_to_multiple_events([
+            (RunnerEvents.BEFORE_EXPERIMENT, self.before_experiment),
+            (RunnerEvents.BEFORE_RUN       , self.before_run       ),
+            (RunnerEvents.START_RUN        , self.start_run        ),
+            (RunnerEvents.START_MEASUREMENT, self.start_measurement),
+            (RunnerEvents.INTERACT         , self.interact         ),
+            (RunnerEvents.STOP_MEASUREMENT , self.stop_measurement ),
+            (RunnerEvents.STOP_RUN         , self.stop_run         ),
+            (RunnerEvents.POPULATE_RUN_DATA, self.populate_run_data),
+            (RunnerEvents.AFTER_EXPERIMENT , self.after_experiment )
+        ])
+        self.run_table_model = None  # Initialized later
+
+        output.console_log("Custom config loaded")
+
+    def create_run_table_model(self) -> RunTableModel:
+        """Create and return the run_table model here. A run_table is a List (rows) of tuples (columns),
+        representing each run performed"""
+        factor1 = FactorModel("fib_type", ['iter', 'mem', 'rec'])
+        factor2 = FactorModel("problem_size", [10, 20, 30])
+        self.run_table_model = RunTableModel(
+            factors=[factor1, factor2],
+            exclude_variations=[
+                {factor2: [10]},                    # all runs having treatment "10" will be excluded
+                {factor1: ['iter'], factor2: [30]}, # all runs having the combination ("iter", 30) will be excluded
+            ],
+            repetitions = 3,
+            data_columns=["total_power (J)", "runtime (sec)", "avg_mem (bytes)"]
+        )
+        return self.run_table_model
+
+    def before_experiment(self) -> None:
+        """Perform any activity required before starting the experiment here
+        Invoked only once during the lifetime of the program."""
+        pass
+
+    def before_run(self) -> None:
+        """Perform any activity required before starting a run.
+        No context is available here as the run is not yet active (BEFORE RUN)"""
+        pass
+
+    def start_run(self, context: RunnerContext) -> None:
+        """Perform any activity required for starting the run here.
+        For example, starting the target system to measure.
+        Activities after starting the run should also be performed here."""
+        pass       
+
+    def start_measurement(self, context: RunnerContext) -> None:
+        """Perform any activity required for starting measurements."""
+        fib_type = context.run_variation["fib_type"]
+        problem_size = context.run_variation["problem_size"]
+
+        self.profiler = EnergiBridge(target_program=f"python examples/hello-world-fibonacci/fibonacci_{fib_type}.py {problem_size}",
+                                     out_file=context.run_dir / "energibridge.csv")
+
+        self.profiler.start()
+
+    def interact(self, context: RunnerContext) -> None:
+        """Perform any interaction with the running target system here, or block here until the target finishes."""
+        pass
+
+    def stop_measurement(self, context: RunnerContext) -> None:
+        """Perform any activity here required for stopping measurements."""
+        stdout = self.profiler.stop(wait=True)
+
+    def stop_run(self, context: RunnerContext) -> None:
+        """Perform any activity here required for stopping the run.
+        Activities after stopping the run should also be performed here."""
+        pass
+
+    def populate_run_data(self, context: RunnerContext) -> Optional[Dict[str, Any]]:
+        """Parse and process any measurement data here.
+        You can also store the raw measurement data under `context.run_dir`
+        Returns a dictionary with keys `self.run_table_model.data_columns` and their values populated"""
+        
+        eb_log, eb_summary = self.profiler.parse_log(self.profiler.logfile, 
+                                                     self.profiler.summary_logfile)
+        
+        return {"total_power (J)": eb_summary["total_joules"], 
+                "runtime (sec)": eb_summary["runtime_seconds"], 
+                "total_mem (bytes)": list(eb_log["TOTAL_MEMORY"].values())[-1]}
+
+    def after_experiment(self) -> None:
+        """Perform any activity required after stopping the experiment here
+        Invoked only once during the lifetime of the program."""
+        pass
+
+    # ================================ DO NOT ALTER BELOW THIS LINE ================================
+    experiment_path:            Path             = None
diff --git a/examples/hello-world-fibonacci/fibonacci_iter.py b/examples/hello-world-fibonacci/fibonacci_iter.py
@@ -0,0 +1,11 @@
+# Implementation by Mandy Wong (https://realpython.com/fibonacci-sequence-python/)
+import sys
+
+def fib(n):
+	a, b = 0, 1
+	for i in range(0, n):
+		a, b = b, a + b
+	return a
+
+for n in range(int(sys.argv[1])):
+	print(fib(n))
diff --git a/examples/hello-world-fibonacci/fibonacci_mem.py b/examples/hello-world-fibonacci/fibonacci_mem.py
@@ -0,0 +1,14 @@
+# Implementation by Mandy Wong (https://realpython.com/fibonacci-sequence-python/)
+import sys
+
+cache = {0: 0, 1: 1}
+
+def fib(n):
+	if n in cache:  # Base case
+		return cache[n]
+	# Compute and cache the Fibonacci number
+	cache[n] = fib(n - 1) + fib(n - 2)  # Recursive case
+	return cache[n]
+
+for n in range(int(sys.argv[1])):
+	print(fib(n))
diff --git a/examples/hello-world-fibonacci/fibonacci_rec.py b/examples/hello-world-fibonacci/fibonacci_rec.py
@@ -0,0 +1,10 @@
+# Implementation by Mandy Wong (https://realpython.com/fibonacci-sequence-python/)
+import sys
+
+def fib(n):
+	if n in {0, 1}:  # Base case
+		return n
+	return fib(n - 1) + fib(n - 2)  # Recursive case
+
+for n in range(int(sys.argv[1])):
+	print(fib(n))
diff --git a/experiment-runner/Plugins/Profilers/DataSource.py b/experiment-runner/Plugins/Profilers/DataSource.py
@@ -9,6 +9,8 @@
 import ctypes
 from enum import StrEnum
 import shutil
+import ctypes
+import os
 import subprocess
 import threading
 import queue
@@ -86,6 +88,12 @@ def is_admin(self):
         except:
             return ctypes.windll.shell32.IsUserAdmin() == 1
 
+    def is_admin(self):
+        try:
+            return os.getuid() == 0
+        except:
+            return ctypes.windll.shell32.IsUserAdmin() == 1
+
     @property
     @abstractmethod
     def supported_platforms(self) -> list[str]:
@@ -110,11 +118,15 @@ def stop(self):
 
     @staticmethod
     @abstractmethod
-    def parse_log():
+    def parse_log(logfile):
         pass
 
 class CLISource(DataSource):
     def __init__(self):
+        super().__init__()
+        
+        self.requires_admin = False
+
         self.process = None
         self.args = None
         self._logfile = ValueRef(None)
@@ -141,7 +153,8 @@ def parameters(self) -> ParameterDict:
     def _validate_platform(self):
         super()._validate_platform()
                 
-        if shutil.which(self.source_name) is None:
+        if shutil.which(self.source_name) is None       \
+            and not os.access(self.source_name, os.X_OK):
             raise RuntimeError(f"The {self.source_name} cli tool is required for this plugin")
     
     def _validate_start(self):
@@ -179,7 +192,11 @@ def _validate_parameters(self, parameters: dict):
 
     def _format_cmd(self):
         self._validate_parameters(self.args)
+
         cmd = self.source_name
+
+        if self.requires_admin:
+            cmd = f"sudo {cmd}"
         
         # Transform the parameter dict into string format to be parsed by shlex
         for p, v in self.args.items():
@@ -219,13 +236,15 @@ def start(self):
         
         self._validate_start()
 
-    def stop(self):
+    def stop(self, wait=False):
         if not self.process:
             return
 
         try:
-            self.process.terminate()
-            stdout, stderr = self.process.communicate(timeout=5)
+            if not wait:
+                self.process.terminate()
+            
+            stdout, stderr = self.process.communicate(timeout=None if wait else 5)
 
         except Exception as e:
             self.process.kill()
diff --git a/experiment-runner/Plugins/Profilers/EnergiBridge.py b/experiment-runner/Plugins/Profilers/EnergiBridge.py
@@ -0,0 +1,117 @@
+from pathlib import Path
+import pandas as pd
+import re
+from Plugins.Profilers.DataSource import CLISource, ParameterDict
+
+# Supported Paramters for the PowerJoular metrics plugin
+ENERGIBRIDGE_PARAMETERS = {
+    ("-o","--output"): Path,
+    ("-s","--separator"): str,
+    ("-c","--output-command"): str,
+    ("-i","--interval"): int,
+    ("-m","--max-execution"): int,
+    ("-g","--gpu"): None,
+    ("--summary",): None
+}
+
+class EnergiBridge(CLISource):
+    parameters = ParameterDict(ENERGIBRIDGE_PARAMETERS)
+    source_name = "energibridge"
+    supported_platforms = ["Linux", "Darwin", "Windows"]
+
+    """An integration of PowerJoular into experiment-runner as a data source plugin"""
+    def __init__(self,
+                 sample_frequency:      int                 = 200,
+                 out_file:              Path                = "energibridge.csv",
+                 summary:               bool                = True,
+                 target_program:        str                 = "sleep 1000000",
+                 additional_args:       dict                = {}):
+        
+        super().__init__()
+        
+        self.requires_admin = True
+        self.target_program = target_program
+        self.logfile = out_file
+        self.args = {
+            "-o": Path(self.logfile),
+            "-i": sample_frequency,
+        }
+
+        if summary:
+            self.update_parameters(add={"--summary": None})
+
+        self.update_parameters(add=additional_args)
+    
+    @property
+    def summary(self):
+        return "--summary" in self.args.keys()
+                    
+    @property
+    def summary_logfile(self):
+        if  not self.logfile \
+            or not any(map(lambda x: x in self.args.keys(), ["-o", "--output"])):
+            
+            return None
+
+        return self.logfile.parent / Path(self.logfile.name.split(".")[0] + "-summary.txt")
+    
+    def _stat_delta(self, data, stat):
+        return list(data[stat].values())[-1] - list(data[stat].values())[0]
+
+    # Less accurate than the summary from EB, but better than nothing
+    # TODO: EnergiBridge calculates this differently in a system dependent way,
+    #       this approximates using available data
+    def generate_summary(self):
+        log_data = self.parse_log(self.logfile)
+        
+        elapsed_time = self._stat_delta(log_data, "Time") / 1000
+        total_joules = self._stat_delta(log_data, "PACKAGE_ENERGY (J)")
+
+        return f"Energy consumption in joules: {total_joules} for {elapsed_time} sec of execution"
+
+    # We also want to save the summary of EnergiBridge if present
+    def stop(self, wait=False):
+
+        stdout = super().stop(wait)
+
+        if self.summary and self.summary_logfile:
+            with open(self.summary_logfile, "w") as f:
+                # The last line is the summary, if present
+                last_line = stdout.splitlines()[-1]
+                
+                # If runtime was too short, energibridge doesnt provide a summary
+                # Approximate this instead
+                if not last_line.startswith("Energy consumption"):
+                    last_line = self.generate_summary()
+
+                f.write(last_line)
+
+        return stdout
+
+    def _format_cmd(self):
+        cmd = super()._format_cmd()
+
+        return cmd + f" -- {self.target_program}"
+
+    @staticmethod
+    def parse_log(logfile: Path, summary_logfile: Path|None=None):
+        # Things are already in csv format here, no checks needed
+        log_data = pd.read_csv(logfile).to_dict()
+
+        if not summary_logfile:
+            return log_data
+
+        with open(summary_logfile, "r") as f:
+            summary_data = f.read()
+            
+            # Extract the floats from the string, we expect always positive X.X
+            values = re.findall("[0-9]+[.]?[0-9]*", summary_data)
+
+            if len(values) == 2:
+                summary_data = {
+                    "total_joules": float(values[0]), 
+                    "runtime_seconds": float(values[1])
+                }
+
+        return (log_data, summary_data)
+