Skip to content

Commit 90a856b

Browse files
committed
fix!: remove post processing of base_ref and base_rev parameters
BREAKING CHANGE: the base_ref and base_rev parameters will now match what was passed into the Decision task exactly. This means `base_ref` will not get reset to the repo's default branch, and `base_rev` will no longer be reset to the merge base. ### Why we were previously post-processing these parameters Github push and pull_request events almost never contain the base ref, and often don't contain the base rev. For base ref, it only ever seems to be present when pushing a new branch. For base rev it is missing in the following cases: 1. All pull_request events. The value we pass in is actually the revision that the base ref happens to be pointing at when the event is fired. 2. Force pushes. The value we pass in is actually the old unreachable head rev that used to be on the branch Really, we only ever get a proper base rev when doing normal fast-forward pushes. The reason we need a base rev in the first place, is to compute the files changed. So let's say there's a PR with the following graph: A -> B -> C -> D (main) \ E -> F (PR) Based on the Github event, `D` is being passed in as `base_rev` and `F` is being passed in as `head_rev`. The post-processing was essentially running `git merge-base` to find `B`, so that we could then use `git log` to find all files touched between `B..F`. ### Why the post-processing isn't actually necessary It turns out that `git log D..F` already does the right thing! It means, show me all commits reachable from `F`, but not reachable from `D`, which is exactly what we want. Only files changed in `E` and `F` will be included. So there's no benefit of using `merge-base` for the purposes of finding changed files. As far as the `base_ref` post-processing goes, all we were doing was setting it to `repo.default_branch` if it doesn't exist (which is almost always for pushes). While this likely works in most cases, we don't actually have any idea what the real `base_ref` is, so it's not correct to be doing this. Since we don't even need `base_ref` to determine files changed, I think it's fine to leave it as `None` in the event it wasn't passed in. ### Risks with this patch While removing this should be fine for the purposes of determining changed files, there might be other use cases that projects are using the `merge-base` for. Such projects may need to determine the merge-base on their own, therefore this patch is backwards incompatible. ### Future improvements I think we should consider removing or renaming the `base_ref` and `base_rev` parameters. As outlined, they are misleading (at least with Github based repos) and don't actually represent the "base revision". But for now, I'll save that particular change for another day.
1 parent faf6ba4 commit 90a856b

File tree

3 files changed

+5
-169
lines changed

3 files changed

+5
-169
lines changed

src/taskgraph/decision.py

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from taskgraph.util import json
2222
from taskgraph.util.python_path import find_object
2323
from taskgraph.util.schema import Schema, validate_schema
24-
from taskgraph.util.vcs import Repository, get_repository
24+
from taskgraph.util.vcs import get_repository
2525
from taskgraph.util.yaml import load_yaml
2626

2727
logger = logging.getLogger(__name__)
@@ -192,25 +192,10 @@ def get_decision_parameters(graph_config, options):
192192
except UnicodeDecodeError:
193193
commit_message = ""
194194

195-
parameters["base_ref"] = _determine_more_accurate_base_ref(
196-
repo,
197-
candidate_base_ref=options.get("base_ref"),
198-
head_ref=options.get("head_ref"),
199-
base_rev=options.get("base_rev"),
200-
)
201-
202-
parameters["base_rev"] = _determine_more_accurate_base_rev(
203-
repo,
204-
base_ref=parameters["base_ref"],
205-
candidate_base_rev=options.get("base_rev"),
206-
head_rev=options.get("head_rev"),
207-
env_prefix=_get_env_prefix(graph_config),
208-
)
209-
210195
# Define default filter list, as most configurations shouldn't need
211196
# custom filters.
212197
parameters["files_changed"] = repo.get_changed_files(
213-
rev=parameters["head_rev"], base_rev=parameters["base_rev"]
198+
rev=parameters["head_rev"], base=parameters["base_rev"]
214199
)
215200
parameters["filters"] = [
216201
"target_tasks_method",
@@ -284,68 +269,6 @@ def get_decision_parameters(graph_config, options):
284269
return result
285270

286271

287-
def _determine_more_accurate_base_ref(repo, candidate_base_ref, head_ref, base_rev):
288-
base_ref = candidate_base_ref
289-
290-
if not candidate_base_ref:
291-
base_ref = repo.default_branch
292-
elif candidate_base_ref == head_ref and base_rev == Repository.NULL_REVISION:
293-
logger.info(
294-
"base_ref and head_ref are identical but base_rev equals the null revision. "
295-
"This is a new branch but Github didn't identify its actual base."
296-
)
297-
base_ref = repo.default_branch
298-
299-
if base_ref != candidate_base_ref:
300-
logger.info(
301-
f'base_ref has been reset from "{candidate_base_ref}" to "{base_ref}".'
302-
)
303-
304-
return base_ref
305-
306-
307-
def _determine_more_accurate_base_rev(
308-
repo, base_ref, candidate_base_rev, head_rev, env_prefix
309-
):
310-
if not candidate_base_rev:
311-
logger.info("base_rev is not set.")
312-
base_ref_or_rev = base_ref
313-
elif candidate_base_rev == Repository.NULL_REVISION:
314-
logger.info("base_rev equals the null revision. This branch is a new one.")
315-
base_ref_or_rev = base_ref
316-
elif not repo.does_revision_exist_locally(candidate_base_rev):
317-
logger.warning(
318-
"base_rev does not exist locally. It is likely because the branch was force-pushed. "
319-
"taskgraph is not able to assess how many commits were changed and assumes it is only "
320-
f"the last one. Please set the {env_prefix.upper()}_BASE_REV environment variable "
321-
"in the decision task and provide `--base-rev` to taskgraph."
322-
)
323-
base_ref_or_rev = base_ref
324-
else:
325-
base_ref_or_rev = candidate_base_rev
326-
327-
if base_ref_or_rev == base_ref:
328-
logger.info(
329-
f'Using base_ref "{base_ref}" to determine latest common revision...'
330-
)
331-
332-
base_rev = repo.find_latest_common_revision(base_ref_or_rev, head_rev)
333-
if base_rev != candidate_base_rev:
334-
if base_ref_or_rev == candidate_base_rev:
335-
logger.info("base_rev is not an ancestor of head_rev.")
336-
337-
logger.info(
338-
f'base_rev has been reset from "{candidate_base_rev}" to "{base_rev}".'
339-
)
340-
341-
return base_rev
342-
343-
344-
def _get_env_prefix(graph_config):
345-
repo_keys = list(graph_config["taskgraph"].get("repositories", {}).keys())
346-
return repo_keys[0] if repo_keys else ""
347-
348-
349272
def set_try_config(parameters, task_config_file):
350273
if os.path.isfile(task_config_file):
351274
logger.info(f"using try tasks from {task_config_file}")

src/taskgraph/parameters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class ParameterMismatch(Exception):
3333
base_schema = Schema(
3434
{
3535
Required("base_repository"): str,
36-
Required("base_ref"): str,
36+
Optional("base_ref"): str,
3737
Required("base_rev"): str,
3838
Required("build_date"): int,
3939
Required("build_number"): int,

test/test_decision.py

Lines changed: 2 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
import unittest
1111
from pathlib import Path
1212

13-
import pytest
14-
1513
from taskgraph import decision
1614
from taskgraph.util.vcs import GitRepository, HgRepository
1715
from taskgraph.util.yaml import load_yaml
@@ -54,6 +52,7 @@ class TestGetDecisionParameters(unittest.TestCase):
5452
def setUp(self):
5553
self.options = {
5654
"base_repository": "https://hg.mozilla.org/mozilla-unified",
55+
"base_rev": "aaaa",
5756
"head_repository": "https://hg.mozilla.org/mozilla-central",
5857
"head_rev": "bbbb",
5958
"head_ref": "default",
@@ -67,27 +66,11 @@ def setUp(self):
6766
"tasks_for": "hg-push",
6867
"level": "3",
6968
}
70-
self.old_determine_more_accurate_base_rev = (
71-
decision._determine_more_accurate_base_rev
72-
)
73-
decision._determine_more_accurate_base_rev = lambda *_, **__: "aaaa"
74-
self.old_determine_more_accurate_base_ref = (
75-
decision._determine_more_accurate_base_ref
76-
)
77-
decision._determine_more_accurate_base_ref = lambda *_, **__: "aaaa"
78-
79-
def tearDown(self):
80-
decision._determine_more_accurate_base_rev = (
81-
self.old_determine_more_accurate_base_rev
82-
)
83-
decision._determine_more_accurate_base_ref = (
84-
self.old_determine_more_accurate_base_ref
85-
)
8669

8770
def test_simple_options(self, mock_files_changed):
8871
mock_files_changed.return_value = ["foo.txt"]
8972
params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options)
90-
mock_files_changed.assert_called_once_with(rev="bbbb", base_rev="aaaa")
73+
mock_files_changed.assert_called_once_with(rev="bbbb", base="aaaa")
9174
self.assertEqual(params["build_date"], 1503691511)
9275
self.assertEqual(params["head_tag"], "v0.0.1")
9376
self.assertEqual(params["pushlog_id"], "143")
@@ -154,73 +137,3 @@ def test_dontbuild_commit_message_yields_default_target_tasks_method(
154137
self.options["tasks_for"] = "hg-push"
155138
params = decision.get_decision_parameters(FAKE_GRAPH_CONFIG, self.options)
156139
self.assertEqual(params["target_tasks_method"], "nothing")
157-
158-
159-
@pytest.mark.parametrize(
160-
"candidate_base_ref, base_rev, expected_base_ref",
161-
(
162-
("", "base-rev", "default-branch"),
163-
("head-ref", "base-rev", "head-ref"),
164-
("head-ref", "0000000000000000000000000000000000000000", "default-branch"),
165-
),
166-
)
167-
def test_determine_more_accurate_base_ref(
168-
candidate_base_ref, base_rev, expected_base_ref
169-
):
170-
repo_mock = unittest.mock.MagicMock()
171-
repo_mock.default_branch = "default-branch"
172-
173-
assert (
174-
decision._determine_more_accurate_base_ref(
175-
repo_mock, candidate_base_ref, "head-ref", base_rev
176-
)
177-
== expected_base_ref
178-
)
179-
180-
181-
@pytest.mark.parametrize(
182-
"common_rev, candidate_base_rev, expected_base_ref_or_rev, expected_base_rev",
183-
(
184-
("found-rev", "", "base-ref", "found-rev"),
185-
(
186-
"found-rev",
187-
"0000000000000000000000000000000000000000",
188-
"base-ref",
189-
"found-rev",
190-
),
191-
("found-rev", "non-existing-rev", "base-ref", "found-rev"),
192-
("existing-rev", "existing-rev", "existing-rev", "existing-rev"),
193-
),
194-
)
195-
def test_determine_more_accurate_base_rev(
196-
common_rev, candidate_base_rev, expected_base_ref_or_rev, expected_base_rev
197-
):
198-
repo_mock = unittest.mock.MagicMock()
199-
repo_mock.find_latest_common_revision.return_value = common_rev
200-
repo_mock.does_revision_exist_locally = lambda rev: rev == "existing-rev"
201-
202-
assert (
203-
decision._determine_more_accurate_base_rev(
204-
repo_mock, "base-ref", candidate_base_rev, "head-rev", env_prefix="PREFIX"
205-
)
206-
== expected_base_rev
207-
)
208-
repo_mock.find_latest_common_revision.assert_called_once_with(
209-
expected_base_ref_or_rev, "head-rev"
210-
)
211-
212-
213-
@pytest.mark.parametrize(
214-
"graph_config, expected_value",
215-
(
216-
({"taskgraph": {}}, ""),
217-
({"taskgraph": {"repositories": {}}}, ""),
218-
({"taskgraph": {"repositories": {"mobile": {}}}}, "mobile"),
219-
(
220-
{"taskgraph": {"repositories": {"mobile": {}, "some-other-repo": {}}}},
221-
"mobile",
222-
),
223-
),
224-
)
225-
def test_get_env_prefix(graph_config, expected_value):
226-
assert decision._get_env_prefix(graph_config) == expected_value

0 commit comments

Comments
 (0)