Skip to content

Commit 14ef83b

Browse files
committed
revisions: add revision worker and mots integration (bug 1740107)
This is a work in progress, do not merge! - add new command (lando-cli revision-worker) to start new worker - add new `revision-worker` that pre-processes revisions - add method to parse diff and list affected files (should move to mots) - store module output in revision model - add mots to requirements file - include revision info via API endpoint
1 parent 22a4bb5 commit 14ef83b

File tree

8 files changed

+346
-3
lines changed

8 files changed

+346
-3
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[flake8]
22
max-line-length = 88
33
select = C,E,F,W,B,B9
4-
ignore = E203, E501, W503, B006
4+
ignore = E203, E501, W503, B006, E712
55
exclude =
66
.hg,
77
.git,

landoapi/api/stacks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from flask import current_app, g
99
from landoapi.commit_message import format_commit_message
1010
from landoapi.decorators import require_phabricator_api_key
11+
from landoapi.models.revisions import Revision
1112
from landoapi.phabricator import PhabricatorClient, PhabricatorAPIException
1213
from landoapi.projects import (
1314
get_sec_approval_project_phid,
@@ -103,6 +104,9 @@ def get(revision_id):
103104

104105
revisions_response = []
105106
for _phid, revision in stack_data.revisions.items():
107+
lando_revision = Revision.query.filter(
108+
Revision.revision_id == revision["id"]
109+
).one_or_none()
106110
revision_phid = PhabricatorClient.expect(revision, "phid")
107111
fields = PhabricatorClient.expect(revision, "fields")
108112
diff_phid = PhabricatorClient.expect(fields, "diffPHID")
@@ -151,6 +155,9 @@ def get(revision_id):
151155
"reviewers": serialize_reviewers(reviewers, users, projects, diff_phid),
152156
"is_secure": secure,
153157
"is_using_secure_commit_message": commit_description.sanitized,
158+
"lando_revision": lando_revision.serialize()
159+
if lando_revision
160+
else None,
154161
}
155162
)
156163

landoapi/cli.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,20 @@ def landing_worker():
8787
worker.start()
8888

8989

90+
@cli.command(name="revision-worker")
91+
def revision_worker():
92+
from landoapi.app import auth0_subsystem, lando_ui_subsystem
93+
94+
exclusions = [auth0_subsystem, lando_ui_subsystem]
95+
for system in get_subsystems(exclude=exclusions):
96+
system.ensure_ready()
97+
98+
from landoapi.revision_worker import RevisionWorker
99+
100+
worker = RevisionWorker()
101+
worker.start()
102+
103+
90104
@cli.command(name="run-pre-deploy-sequence")
91105
def run_pre_deploy_sequence():
92106
"""Runs the sequence of commands required before a deployment."""

landoapi/models/revisions.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,17 @@
99
Phabricator diff that is associated with a particular revision.
1010
"""
1111

12+
from datetime import datetime
1213
import enum
1314
import logging
1415

1516
from sqlalchemy.dialects.postgresql.json import JSONB
1617

1718
from landoapi.models.base import Base
1819
from landoapi.storage import db
20+
from landoapi.hgexports import build_patch_for_revision
21+
from landoapi.phabricator import PhabricatorClient
22+
import os
1923

2024
logger = logging.getLogger(__name__)
2125

@@ -32,6 +36,43 @@ class DiffWarningGroup(enum.Enum):
3236
LINT = "LINT"
3337

3438

39+
class Revision(Base):
40+
revision_id = db.Column(db.Integer, nullable=False, unique=True)
41+
diff_id = db.Column(db.Integer, nullable=False)
42+
repo_name = db.Column(db.String(254), nullable=False)
43+
is_stale = db.Column(db.Boolean, default=True, nullable=False)
44+
45+
patch = db.Column(db.Text, nullable=False, default="")
46+
data = db.Column(JSONB, nullable=False, default=dict)
47+
48+
# TODO: Handle multiple revisions in a stack.
49+
50+
def store_patch(self):
51+
phab = PhabricatorClient(
52+
os.getenv("PHABRICATOR_URL"), os.getenv("PHABRICATOR_UNPRIVILEGED_API_KEY")
53+
)
54+
raw_diff = phab.call_conduit("differential.getrawdiff", diffID=self.diff_id)
55+
patch = build_patch_for_revision(
56+
raw_diff,
57+
"Lando",
58+
"lando@lando",
59+
"System commit",
60+
int(datetime.now().timestamp()),
61+
)
62+
self.patch = patch
63+
db.session.add(self)
64+
db.session.commit()
65+
66+
def serialize(self):
67+
return {
68+
"revision_id": self.revision_id,
69+
"diff_id": self.diff_id,
70+
"repo_name": self.repo_name,
71+
"is_stale": self.is_stale,
72+
"data": self.data,
73+
}
74+
75+
3576
class DiffWarning(Base):
3677
"""Represents a warning message associated with a particular diff and revision."""
3778

landoapi/revision_worker.py

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# This Source Code Form is subject to the terms of the Mozilla Public
2+
# License, v. 2.0. If a copy of the MPL was not distributed with this
3+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
from __future__ import annotations
5+
6+
import io
7+
import logging
8+
from pathlib import Path
9+
from time import sleep
10+
11+
from flask import current_app
12+
13+
from landoapi.hg import (
14+
HgRepo,
15+
)
16+
from landoapi.models.revisions import Revision
17+
from landoapi.repos import repo_clone_subsystem
18+
from landoapi.storage import db
19+
from landoapi.landing_worker import LandingWorker
20+
from landoapi.phabricator import PhabricatorClient
21+
22+
from mots.config import FileConfig
23+
from mots.directory import Directory
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
def get_conduit_data(method, **kwargs):
29+
"""Fetch result from conduit API request."""
30+
phab = PhabricatorClient(
31+
current_app.config["PHABRICATOR_URL"],
32+
current_app.config["PHABRICATOR_UNPRIVILEGED_API_KEY"],
33+
)
34+
data = []
35+
result = phab.call_conduit(method, **kwargs)
36+
data += result["data"]
37+
while result["cursor"]["after"]:
38+
result = phab.call_conduit(method, after=result["cursor"]["after"], **kwargs)
39+
data += result["data"]
40+
return data
41+
42+
43+
def get_revisions_list(statuses=None):
44+
"""Get a list of revisions of given statuses."""
45+
statuses = statuses or ["needs-review", "accepted"]
46+
revisions = get_conduit_data(
47+
"differential.revision.search",
48+
constraints={"statuses": statuses},
49+
)
50+
revisions = [
51+
{
52+
"revision_id": r["id"],
53+
"diff_phid": r["fields"]["diffPHID"],
54+
"repo_phid": r["fields"]["repositoryPHID"],
55+
}
56+
for r in revisions
57+
if r["fields"]["diffPHID"] and r["fields"]["repositoryPHID"]
58+
]
59+
60+
diff_phids = [r["diff_phid"] for r in revisions]
61+
diff_ids = get_conduit_data(
62+
"differential.diff.search", constraints={"phids": diff_phids}
63+
)
64+
diff_map = {d["phid"]: d["id"] for d in diff_ids}
65+
66+
repo_phids = [r["repo_phid"] for r in revisions]
67+
repo_ids = get_conduit_data(
68+
"diffusion.repository.search", constraints={"phids": repo_phids}
69+
)
70+
repo_map = {d["phid"]: d["fields"]["shortName"] for d in repo_ids}
71+
72+
for r in revisions:
73+
r["diff_id"] = diff_map[r["diff_phid"]]
74+
r["repo_name"] = repo_map[r["repo_phid"]]
75+
del r["diff_phid"]
76+
del r["repo_phid"]
77+
78+
return revisions
79+
80+
81+
def parse_diff(diff):
82+
"""Given a diff, extract list of affected files."""
83+
diff_lines = diff.splitlines()
84+
file_diffs = [line.split(" ")[2:] for line in diff_lines if line.startswith("diff")]
85+
file_paths = []
86+
for file_diff in file_diffs:
87+
# Parse source/destination paths.
88+
path1, path2 = file_diff
89+
file_paths.append("/".join(path1.split("/")[1:]))
90+
file_paths.append("/".join(path2.split("/")[1:]))
91+
file_paths = set(file_paths)
92+
return file_paths
93+
94+
95+
def sync_revisions():
96+
"""Check and update local database with available revisions."""
97+
revisions = get_revisions_list()
98+
logger.debug(f"Processing {len(revisions)} revisions...")
99+
for r in revisions:
100+
logger.debug(f"Processing {r}...")
101+
query = (
102+
Revision.revision_id == r["revision_id"],
103+
Revision.diff_id == r["diff_id"],
104+
)
105+
revision = Revision.query.filter(*query)
106+
if revision.count():
107+
logger.debug(f"{r} already exists in DB, skipping.")
108+
continue
109+
revision = Revision.query.filter(Revision.revision_id == r["revision_id"])
110+
if revision.count():
111+
logger.debug(f"{r} already exists in DB, updating diff ID.")
112+
revision.diff_id = r["diff_id"]
113+
db.session.add(revision)
114+
db.session.commit()
115+
continue
116+
logger.debug(f"Creating {r} in DB.")
117+
revision = Revision(**r)
118+
119+
# Download and store the patch diff in the DB.
120+
revision.store_patch()
121+
122+
db.session.add(revision)
123+
db.session.commit()
124+
# TODO: identify stale revisions (e.g. when a repo has been updated and the
125+
# parsed state of the revision is no longer relevant, e.g. check hash.)
126+
127+
128+
class RevisionWorker(LandingWorker):
129+
"""A worker that pre-processes revisions.
130+
131+
This worker continuously synchronises revisions with the remote Phabricator API
132+
and runs all applicable checks and processes on each revision, if needed.
133+
134+
TODO: this should extend an abstract worker class, not landing worker.
135+
"""
136+
137+
processes = [
138+
"mots",
139+
]
140+
141+
def start(self):
142+
logger.info("Revision worker starting")
143+
logger.info(
144+
f"{len(self.applicable_repos)} applicable repos: {self.applicable_repos}"
145+
)
146+
self.running = True
147+
148+
while self.running:
149+
sync_revisions()
150+
151+
# get stale revisions
152+
revisions = Revision.query.filter(Revision.is_stale == True)
153+
if not revisions.count():
154+
sleep(1)
155+
for revision in revisions:
156+
logger.info(
157+
"Running mots checks on revision", extra={"id": revision.id}
158+
)
159+
for process in self.processes:
160+
getattr(self, f"process_{process}")(revision)
161+
162+
def process_mots(self, revision):
163+
repo = repo_clone_subsystem.repos[revision.repo_name]
164+
hgrepo = HgRepo(
165+
str(repo_clone_subsystem.repo_paths[revision.repo_name]),
166+
config=repo.config_override,
167+
)
168+
# checkout repo, pull & update
169+
with hgrepo.for_pull():
170+
hgrepo.update_repo(repo.pull_path)
171+
172+
# load mots.yml config
173+
wd = hgrepo.path
174+
mots_config = FileConfig(Path(wd) / "mots.yaml")
175+
mots_directory = Directory(mots_config)
176+
177+
# CHECK query before applying patch, and again after.
178+
mots_directory.load()
179+
paths = parse_diff(revision.patch)
180+
query = {}
181+
query["pre"] = mots_directory.query(*paths)
182+
183+
with hgrepo.for_pull():
184+
hgrepo.update_repo(repo.pull_path)
185+
try:
186+
hgrepo.apply_patch(io.BytesIO(revision.patch.encode("utf-8")))
187+
except Exception as e:
188+
# Possible merge conflict, skip for now...
189+
logger.error(e)
190+
return
191+
# hg_cmd = ["diff", "-c", "tip"] # TODO: replace this with rev id
192+
# hg_out = hgrepo.run_hg(hg_cmd)
193+
194+
# Reload directory with new patch.
195+
mots_directory.load(full_paths=True)
196+
197+
# query mots for diff files
198+
query["post"] = mots_directory.query(*paths)
199+
200+
query_result = query["pre"] + query["post"]
201+
revision.data = {}
202+
revision.data["mots"] = {
203+
"modules": [m.serialize() for m in query_result.modules],
204+
"owners": [o.real_name for o in query_result.owners],
205+
"peers": [p.real_name for p in query_result.peers],
206+
"paths": query_result.paths,
207+
"rejected_paths": query_result.rejected_paths,
208+
}
209+
revision.is_stale = False
210+
db.session.commit()
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""add_revision
2+
3+
Revision ID: fb8eacc4e2e3
4+
Revises: 53bf7bf2c798
5+
Create Date: 2021-11-26 19:46:56.223875
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "fb8eacc4e2e3"
14+
down_revision = "53bf7bf2c798"
15+
branch_labels = ()
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_table(
22+
"revision",
23+
sa.Column("id", sa.Integer(), nullable=False),
24+
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
25+
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
26+
sa.Column("revision_id", sa.Integer(), nullable=False),
27+
sa.Column("diff_id", sa.Integer(), nullable=False),
28+
sa.Column("repo_name", sa.String(length=254), nullable=False),
29+
sa.Column("is_stale", sa.Boolean(), nullable=False),
30+
sa.Column("patch", sa.Text(), nullable=False),
31+
sa.Column("data", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
32+
sa.PrimaryKeyConstraint("id"),
33+
)
34+
# ### end Alembic commands ###
35+
36+
37+
def downgrade():
38+
# ### commands auto generated by Alembic - please adjust! ###
39+
op.drop_table("revision")
40+
# ### end Alembic commands ###

requirements.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ requests-mock==1.6.0
2424
requests==2.25.1
2525
rs-parsepatch==0.3.3
2626
uWSGI==2.0.18
27+
mots==0.0.1.dev2

0 commit comments

Comments
 (0)