Merge pull request #326 from sunya-ch/entrypoint-patch

Sunil Thaha · web-flow · commit 4005b5577fad · 2024-07-26T14:23:32.000+10:00
make filter pod by benchmark optional
diff --git a/cmd/cmd_util.py b/cmd/cmd_util.py
@@ -103,7 +103,8 @@ def get_validate_df(data_path, benchmark_filename, query_response):
     container_queries = [query for query in query_results.keys() if "container" in query]
     print("Container Queries: ", container_queries)
     status_data = load_json(data_path, benchmark_filename)
-    if status_data is None or status_data.get("status", None) == None:
+    filter_by_benchmark = False
+    if status_data is None or "status" not in status_data:
         # select all with keyword
         for query in container_queries:
             df = query_results[query]
@@ -120,7 +121,11 @@ def get_validate_df(data_path, benchmark_filename, query_response):
                 continue
             filtered_df = df.copy()
             if "pod_name" in df.columns:
-                filtered_df = filtered_df[filtered_df["pod_name"].str.contains(benchmark_filename)]
+                # check if we can use inputted benchmark to filtered stressing pods
+                podname_filtered = filtered_df[filtered_df["pod_name"].str.contains(benchmark_filename)]
+                if len(podname_filtered) > 0:
+                    filter_by_benchmark = True
+                    filtered_df = podname_filtered
             # set validate item
             item = dict()
             item["pod"] = benchmark_filename
@@ -131,6 +136,7 @@ def get_validate_df(data_path, benchmark_filename, query_response):
             item["total"] = filtered_df[query].max()
             items += [item]
     else:
+        filtered_by_benchmark = True
         cpe_results = status_data["status"]["results"]
         for result in cpe_results:
             scenarioID = result["scenarioID"]
@@ -217,6 +223,11 @@ def get_validate_df(data_path, benchmark_filename, query_response):
         item["total"] = df[query].max()
         items += [item]
     validate_df = pd.DataFrame(items)
+
+    if filter_by_benchmark:
+        print("===========================================\n Use benchmark name to filter pod results: \n\n", benchmark_filename)
+    else:
+        print("============================================\n Present results from all pods: \n\n")
     if not validate_df.empty:
         print(validate_df.groupby(["scenarioID", "query"]).sum()[["count", ">0"]])
     else:
diff --git a/model_training/cmd_instruction.md b/model_training/cmd_instruction.md
@@ -1,16 +1,22 @@
 # Manual Metric Collection and Training with Entrypoint
 
 ## 1. Collect metrics
-Without benchmark/pipeline automation, kepler metrics can be collected by `query` function by either one of the following options.
-### 1.1. by defining start time and end time
+Without benchmark/pipeline automation, kepler metrics can be collected by `query` function by setting `BENCHMARK`, `PROM_URL`, `COLLECT_ID` and either one of the following time options.
+
+> It is recommend to set BENCHMARK name as a part of the pod name such as `stressng` to filter the validated results. BENCHMARK name will be also used by the TrainerIsolator to filter the target pods. If the BENCHMARK cannot be used to filter the target pods, the validated results will show result from all pods.
 
 ```bash
-# value setting
 BENCHMARK= # name of the benchmark (will generate [BENCHMARK].json to save start and end time for reference)
 PROM_URL= # e.g., http://localhost:9090
+COLLECT_ID= # any unique id e.g., machine name
+```
+
+### 1.1. by defining start time and end time
+
+```bash
+# time value setting
 START_TIME= # format date +%Y-%m-%dT%H:%M:%SZ
 END_TIME= # format date +%Y-%m-%dT%H:%M:%SZ
-COLLECT_ID= # any unique id e.g., machine name
 
 # query execution
 DATAPATH=/path/to/workspace python cmd/main.py query --benchmark $BENCHMARK --server $PROM_URL --output kepler_query --start-time $START_TIME --end-time $END_TIME --id $COLLECT_ID
@@ -19,11 +25,8 @@ DATAPATH=/path/to/workspace python cmd/main.py query --benchmark $BENCHMARK --se
 ### 1.2. by defining last interval from the execution time
 
 ```bash
-# value setting
-BENCHMARK= # name of the benchmark (will generate [BENCHMARK].json to save start and end time for reference)
-PROM_URL= # e.g., http://localhost:9090
+# time value setting
 INTERVAL= # in second
-COLLECT_ID= # any unique id e.g., machine name
 
 # query execution
 DATAPATH=/path/to/workspace python cmd/main.py query --benchmark $BENCHMARK --server $PROM_URL --output kepler_query --interval $INTERVAL --id $COLLECT_ID
@@ -47,31 +50,30 @@ DATAPATH=/path/to/workspace MODEL_PATH=/path/to/workspace python cmd/main.py tra
 ```
 
 ## 3. Export models
-Export function is to archive the model that has an error less than threshold from the trained pipeline and make a report in the format that is ready to push to kepler-model-db.
-
-### 3.1. exporting the trained pipeline with BENCHMARK
-
-The benchmark file is created by CPE operator or by step 1.1. or 1.2..
+Export function is to archive the model that has an error less than threshold from the trained pipeline and make a report in the format that is ready to push to kepler-model-db. To use export function, need to set `EXPORTER_PATH` and `PUBLISHER`, and collect date option.
 
 ```bash
-# value setting
 EXPORT_PATH= # /path/to/kepler-model-db/models
 PUBLISHER= # github account of publisher
+```
 
+### 3.1. extracting collect date from benchmark file
+
+The benchmark file is created by CPE operator or by query function from step 1.
+
+```bash
 # export execution
 # require BENCHMARK from collect step
 # require PIPELINE_NAME from train step
 DATAPATH=/path/to/workspace MODEL_PATH=/path/to/workspace python cmd/main.py export --benchmark $BENCHMARK --pipeline-name $PIPELINE_NAME -o $EXPORT_PATH --publisher $PUBLISHER --zip=true 
 ```
 
-### 3.2. exporting the trained models without BENCHMARK
+### 3.2. manually set collect-date
 
 If the data is collected by tekton, there is no benchmark file created. Need to manually set `--collect-date` instead of `--benchmark` parameter.
 
 ```bash
-# value setting
-EXPORT_PATH= # /path/to/kepler-model-db/models
-PUBLISHER= # github account of publisher
+# collect date value setting
 COLLECT_DATE= # collect date
 
 # export execution