Branch and filename are now always not null (#602)

ArneTR · web-flow · commit b20d32a6040f · 2023-12-20T22:09:39.000+01:00
* Branch and filename are now always not null

* Branch is always set to value in directory

* Changed test logic

* Reverted checkout branch for local directory
diff --git a/api/api_helpers.py b/api/api_helpers.py
@@ -103,17 +103,15 @@ def get_run_info(run_id):
     return DB().fetch_one(query, params=params, row_factory=psycopg_rows_dict_row)
 
 
-def get_timeline_query(uri,filename,machine_id, branch, metrics, phase, start_date=None, end_date=None, detail_name=None, limit_365=False, sorting='run'):
+def get_timeline_query(uri, filename, machine_id, branch, metrics, phase, start_date=None, end_date=None, detail_name=None, limit_365=False, sorting='run'):
 
     if filename is None or filename.strip() == '':
         filename =  'usage_scenario.yml'
 
-    params = [uri, filename, machine_id, f"%{phase}"]
+    if branch is None or branch.strip() != '':
+        branch = 'main'
 
-    branch_condition = 'AND r.branch IS NULL'
-    if branch is not None and branch.strip() != '':
-        branch_condition = 'AND r.branch = %s'
-        params.append(branch)
+    params = [uri, filename, branch, machine_id, f"%{phase}"]
 
     metrics_condition = ''
     if metrics is None or metrics.strip() == '' or metrics.strip() == 'key':
@@ -156,10 +154,10 @@ def get_timeline_query(uri,filename,machine_id, branch, metrics, phase, start_da
             WHERE
                 r.uri = %s
                 AND r.filename = %s
+                AND r.branch = %s
                 AND r.end_measurement IS NOT NULL
                 AND r.machine_id = %s
                 AND p.phase LIKE %s
-                {branch_condition}
                 {metrics_condition}
                 {start_date_condition}
                 {end_date_condition}
@@ -171,13 +169,14 @@ def get_timeline_query(uri,filename,machine_id, branch, metrics, phase, start_da
                 p.phase ASC, {sorting_condition}
 
             """
+
     return (query, params)
 
 def determine_comparison_case(ids):
 
     query = '''
             WITH uniques as (
-                SELECT uri, filename, machine_id, commit_hash, COALESCE(branch, 'main / master') as branch FROM runs
+                SELECT uri, filename, machine_id, commit_hash, branch FROM runs
                 WHERE id = ANY(%s::uuid[])
                 GROUP BY uri, filename, machine_id, commit_hash, branch
             )
@@ -277,7 +276,7 @@ def get_phase_stats(ids):
     query = """
             SELECT
                 a.phase, a.metric, a.detail_name, a.value, a.type, a.max_value, a.min_value, a.unit,
-                b.uri, c.description, b.filename, b.commit_hash, COALESCE(b.branch, 'main / master') as branch
+                b.uri, c.description, b.filename, b.commit_hash, b.branch
             FROM phase_stats as a
             LEFT JOIN runs as b on b.id = a.run_id
             LEFT JOIN machines as c on c.id = b.machine_id
diff --git a/api/main.py b/api/main.py
@@ -227,7 +227,7 @@ async def get_repositories(uri: str | None = None, branch: str | None = None, ma
 async def get_runs(uri: str | None = None, branch: str | None = None, machine_id: int | None = None, machine: str | None = None, filename: str | None = None, limit: int | None = None):
 
     query = """
-            SELECT r.id, r.name, r.uri, COALESCE(r.branch, 'main / master'), r.created_at, r.invalid_run, r.filename, m.description, r.commit_hash, r.end_measurement
+            SELECT r.id, r.name, r.uri, r.branch, r.created_at, r.invalid_run, r.filename, m.description, r.commit_hash, r.end_measurement
             FROM runs as r
             LEFT JOIN machines as m on r.machine_id = m.id
             WHERE 1=1
@@ -303,7 +303,7 @@ async def compare_in_repo(ids: str):
         machine = machines[run_info['machine_id']]
         uri = run_info['uri']
         usage_scenario = run_info['usage_scenario']['name']
-        branch = run_info['branch'] if run_info['branch'] is not None else 'main / master'
+        branch = run_info['branch']
         commit = run_info['commit_hash']
         filename = run_info['filename']
 
@@ -519,8 +519,8 @@ async def get_timeline_projects():
                 FROM runs as r
                 WHERE
                     p.url = r.uri
-                    AND COALESCE(p.branch, 'main / master') = COALESCE(r.branch, 'main / master')
-                    AND COALESCE(p.filename, 'usage_scenario.yml') = COALESCE(r.filename, 'usage_scenario.yml')
+                    AND p.branch = r.branch
+                    AND p.filename = r.filename
                     AND p.machine_id = r.machine_id
                 ORDER BY r.created_at DESC
                 LIMIT 1
@@ -1017,15 +1017,15 @@ async def software_add(software: Software):
     if software.email is None or software.email.strip() == '':
         raise RequestValidationError('E-mail is empty')
 
-    if not DB().fetch_one('SELECT id FROM machines WHERE id=%s AND available=TRUE', params=(software.machine_id,)):
-        raise RequestValidationError('Machine does not exist')
+    if software.branch is None or software.branch.strip() == '':
+        software.branch = 'main'
 
+    if software.filename is None or software.filename.strip() == '':
+        software.filename = 'usage_scenario.yml'
 
-    if software.branch.strip() == '':
-        software.branch = None
+    if not DB().fetch_one('SELECT id FROM machines WHERE id=%s AND available=TRUE', params=(software.machine_id,)):
+        raise RequestValidationError('Machine does not exist')
 
-    if software.filename.strip() == '':
-        software.filename = 'usage_scenario.yml'
 
     if software.schedule_mode not in ['one-off', 'time', 'commit', 'variance']:
         raise RequestValidationError(f"Please select a valid measurement interval. ({software.schedule_mode}) is unknown.")
diff --git a/docker/structure.sql b/docker/structure.sql
@@ -22,8 +22,8 @@ CREATE TABLE jobs (
     name text,
     email text,
     url text,
-    branch text,
-    filename text,
+    branch text NOT NULL,
+    filename text NOT NULL,
     categories int[],
     machine_id int REFERENCES machines(id) ON DELETE SET NULL ON UPDATE CASCADE,
     created_at timestamp with time zone DEFAULT now(),
@@ -39,13 +39,13 @@ CREATE TABLE runs (
     job_id integer REFERENCES jobs(id) ON DELETE SET NULL ON UPDATE CASCADE UNIQUE,
     name text,
     uri text,
-    branch text,
+    branch text NOT NULL,
     commit_hash text,
     commit_timestamp timestamp with time zone,
     email text,
     categories int[],
     usage_scenario json,
-    filename text,
+    filename text NOT NULL,
     machine_specs jsonb,
     runner_arguments json,
     machine_id int REFERENCES machines(id) ON DELETE SET NULL ON UPDATE CASCADE,
@@ -196,8 +196,8 @@ CREATE TABLE timeline_projects (
     name text,
     url text,
     categories integer[],
-    branch text DEFAULT 'NULL'::text,
-    filename text,
+    branch text NOT NULL,
+    filename text NOT NULL,
     machine_id integer REFERENCES machines(id) ON DELETE RESTRICT ON UPDATE CASCADE NOT NULL,
     schedule_mode text NOT NULL,
     last_scheduled timestamp with time zone,
diff --git a/frontend/request.html b/frontend/request.html
@@ -77,7 +77,7 @@ <h1 class="ui header float left">
                             <input type="text" placeholder="Filename (optional - default: usage_scenario.yml)" id="text-field-hero-input4" name="filename">
                         </div>
                         <div class="ui fluid icon input">
-                            <input type="text" placeholder="Branch (optional - default: main/master)" id="text-field-hero-input5" name="branch">
+                            <input type="text" placeholder="Branch (optional - default: main)" id="text-field-hero-input5" name="branch">
                         </div>
                         <div class="ui fluid icon input">
                             <select name="machine_id" class="ui fluid dropdown" required>
diff --git a/frontend/timeline.html b/frontend/timeline.html
@@ -112,7 +112,7 @@ <h4>What is Timeline View?</h4>
                                 </div>
                                 <div class="field">
                                     <label>Branch:</label>
-                                    <input type="text" name="branch" value="" placeholder="Leave empty for default (main/master)" class="ui input large">
+                                    <input type="text" name="branch" value="" placeholder="Leave empty for default (main)" class="ui input large">
                                 </div>
                             </div>
                             <div class="inline fields">
diff --git a/migrations/2023_12_17_branch_filename_not_null.sql b/migrations/2023_12_17_branch_filename_not_null.sql
@@ -0,0 +1,26 @@
+UPDATE runs SET filename = 'usage_scenario.yml' WHERE filename IS NULL;
+UPDATE jobs SET filename = 'usage_scenario.yml' WHERE filename IS NULL;
+UPDATE timeline_projects SET filename = 'usage_scenario.yml' WHERE filename IS NULL;
+
+UPDATE runs SET branch = 'main' WHERE branch IS NULL;
+UPDATE jobs SET branch = 'main' WHERE branch IS NULL;
+UPDATE timeline_projects SET branch = 'main' WHERE branch IS NULL;
+
+
+ALTER TABLE "public"."jobs"
+  ALTER COLUMN "branch" DROP DEFAULT,
+  ALTER COLUMN "branch" SET NOT NULL,
+  ALTER COLUMN "filename" DROP DEFAULT,
+  ALTER COLUMN "filename" SET NOT NULL;
+
+ALTER TABLE "public"."timeline_projects"
+  ALTER COLUMN "branch" DROP DEFAULT,
+  ALTER COLUMN "branch" SET NOT NULL,
+  ALTER COLUMN "filename" DROP DEFAULT,
+  ALTER COLUMN "filename" SET NOT NULL;
+
+ALTER TABLE "public"."runs"
+  ALTER COLUMN "branch" DROP DEFAULT,
+  ALTER COLUMN "branch" SET NOT NULL,
+  ALTER COLUMN "filename" DROP DEFAULT,
+  ALTER COLUMN "filename" SET NOT NULL;
diff --git a/runner.py b/runner.py
@@ -116,6 +116,8 @@ def __init__(self,
         self._sci = {'R_d': None, 'R': 0}
         self._job_id = job_id
         self._arguments = locals()
+        self._commit_hash = None
+        self._commit_timestamp = None
         del self._arguments['self'] # self is not needed and also cannot be serialzed. We remove it
 
 
@@ -147,12 +149,15 @@ def custom_sleep(self, sleep_time):
             time.sleep(sleep_time)
 
     def initialize_run(self):
-            # We issue a fetch_one() instead of a query() here, cause we want to get the RUN_ID
+        # We issue a fetch_one() instead of a query() here, cause we want to get the RUN_ID
+
+        # we also update the branch here again, as this might not be main in case of local filesystem
+
         self.__run_id = DB().fetch_one("""
-                INSERT INTO runs (job_id, name, uri, email, branch, runner_arguments, created_at)
-                VALUES (%s, %s, %s, 'manual', %s, %s, NOW())
+                INSERT INTO runs (job_id, name, uri, email, branch, filename, commit_hash, commit_timestamp, runner_arguments, created_at)
+                VALUES (%s, %s, %s, 'manual', %s, %s, %s, %s, %s, NOW())
                 RETURNING id
-                """, params=(self._job_id, self._name, self._uri, self._branch, json.dumps(self._arguments)))[0]
+                """, params=(self._job_id, self._name, self._uri, self._branch, self._original_filename, self._commit_hash, self._commit_timestamp, json.dumps(self._arguments)))[0]
         return self.__run_id
 
     def initialize_folder(self, path):
@@ -218,37 +223,32 @@ def checkout_repository(self):
 
         else:
             if self._branch:
+                # we never want to checkout a local directory to a different branch as this might also be the GMT directory itself and might confuse the tool
                 raise RuntimeError('Specified --branch but using local URI. Did you mean to specify a github url?')
             self.__folder = self._uri
 
+        self._branch = subprocess.check_output(['git', 'branch', '--show-current'], cwd=self.__folder, encoding='UTF-8').strip()
+
         # we can safely do this, even with problematic folders, as the folder can only be a local unsafe one when
         # running in CLI mode
-        commit_hash = subprocess.run(
+        self._commit_hash = subprocess.run(
             ['git', 'rev-parse', 'HEAD'],
             check=True,
             capture_output=True,
             encoding='UTF-8',
             cwd=self.__folder
         )
-        commit_hash = commit_hash.stdout.strip("\n")
+        self._commit_hash = self._commit_hash.stdout.strip("\n")
 
-        commit_timestamp = subprocess.run(
+        self._commit_timestamp = subprocess.run(
             ['git', 'show', '-s', '--format=%ci'],
             check=True,
             capture_output=True,
             encoding='UTF-8',
             cwd=self.__folder
         )
-        commit_timestamp = commit_timestamp.stdout.strip("\n")
-        parsed_timestamp = datetime.strptime(commit_timestamp, "%Y-%m-%d %H:%M:%S %z")
-
-        DB().query("""
-            UPDATE runs
-            SET
-                commit_hash=%s,
-                commit_timestamp=%s
-            WHERE id = %s
-            """, params=(commit_hash, parsed_timestamp, self.__run_id))
+        self._commit_timestamp = self._commit_timestamp.stdout.strip("\n")
+        self._commit_timestamp = datetime.strptime(self._commit_timestamp, "%Y-%m-%d %H:%M:%S %z")
 
     # This method loads the yml file and takes care that the includes work and are secure.
     # It uses the tagging infrastructure provided by https://pyyaml.org/wiki/PyYAMLDocumentation
@@ -464,14 +464,13 @@ def update_and_insert_specs(self):
             UPDATE runs
             SET
                 machine_id=%s, machine_specs=%s, measurement_config=%s,
-                usage_scenario = %s, filename=%s, gmt_hash=%s
+                usage_scenario = %s, gmt_hash=%s
             WHERE id = %s
             """, params=(
             config['machine']['id'],
             escape(json.dumps(machine_specs), quote=False),
             json.dumps(measurement_config),
             escape(json.dumps(self._usage_scenario), quote=False),
-            self._original_filename,
             gmt_hash,
             self.__run_id)
         )
@@ -812,8 +811,7 @@ def setup_services(self):
                     docker_run_string.append(f"{env_key}={env_value}")
 
                 if env_var_check_errors:
-                    raise RuntimeError("Docker container environment setup has problems:\n" + 
-                                       "\n".join(env_var_check_errors))
+                    raise RuntimeError('Docker container environment setup has problems:\n\n'.join(env_var_check_errors))
 
             if 'networks' in service:
                 for network in service['networks']:
@@ -1286,9 +1284,9 @@ def run(self):
         try:
             config = GlobalConfig().config
             self.check_system('start')
-            return_run_id = self.initialize_run()
             self.initialize_folder(self._tmp_folder)
             self.checkout_repository()
+            return_run_id = self.initialize_run()
             self.initial_parse()
             self.import_metric_providers()
             self.populate_image_names()
@@ -1521,7 +1519,7 @@ def run(self):
         error_helpers.log_error('Base exception occured in runner.py: ', e, successful_run_id)
     finally:
         if args.print_logs:
-            for container_id, std_out in runner.get_logs().items():
-                print(f"Container logs of '{container_id}':")
+            for container_id_outer, std_out in runner.get_logs().items():
+                print(f"Container logs of '{container_id_outer}':")
                 print(std_out)
-                print(f"\n-----------------------------\n")
+                print('\n-----------------------------\n')
diff --git a/tests/api/test_api_helpers.py b/tests/api/test_api_helpers.py
@@ -31,7 +31,7 @@ def test_escape_dict():
     assert escaped['link'] == escaped_link
 
 def test_escape_run():
-    messy_run = Run(name="test<?>", url='testURL', email='testEmail', branch='', machine_id=0)
+    messy_run = Run(name="test<?>", url='testURL', email='testEmail', branch='main', machine_id=0)
     escaped_name = 'test&lt;?&gt;'
     escaped = api_helpers.html_escape_multi(messy_run.model_copy())
 
@@ -42,7 +42,7 @@ def test_escape_measurement():
         value=123,
         unit='mJ',
         repo='link<some_place>',
-        branch='',
+        branch='main',
         cpu='',
         commit_hash='',
         workflow='',
diff --git a/tests/test_functions.py b/tests/test_functions.py
@@ -62,15 +62,13 @@ def setup_runner(usage_scenario, docker_compose=None, uri='default', uri_type='f
 def run_until(runner, step):
     try:
         config = GlobalConfig().config
-        return_run_id = runner.initialize_run()
-
-        # do a meaningless operation on return_run_id so pylint doesn't complain
-        print(return_run_id)
-
         runner.check_system('start')
         runner.initialize_folder(runner._tmp_folder)
         runner.checkout_repository()
+        runner.initialize_run()
         runner.initial_parse()
+        if step == 'import_metric_providers':
+            return
         runner.import_metric_providers()
         runner.populate_image_names()
         runner.prepare_docker()