Skip to content

Commit a170e18

Browse files
authored
Merge pull request #107 from padovan/add-watch-command
Add watch command
2 parents a7e3601 + 7b48852 commit a170e18

File tree

8 files changed

+294
-197
lines changed

8 files changed

+294
-197
lines changed

docs/_index.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,23 +85,39 @@ kci-dev --settings /path/to/.kci-dev.toml
8585

8686
#### results
8787

88-
Pull results from the Dashboard. See detailed [documentation](results).
88+
Pull results from the Web Dashboard. See detailed [documentation](results).
8989

9090
### Maestro Commands
9191

92-
#### checkout
92+
#### config
93+
94+
Setup the base config for talking to the maestro API. See Configuration section above.
9395

9496
- [config](config)
9597

9698
#### checkout
9799

100+
Trigger ad-hoc test of specific tree/branch/commit.
101+
98102
- [checkout](checkout)
99103

100104
#### testretry
101105

106+
Trigger a test retry for a given Maestro node id.
107+
102108
- [testretry](testretry)
103109

110+
111+
#### watch
112+
113+
Watch for the results of a given node id.
114+
115+
- [watch](watch)
116+
117+
104118
#### maestro-results
105119

120+
Pull Maestro results in the json format.
121+
106122
- [maestro-results](maestro-results)
107123

docs/checkout.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ Additionally, you can use --watch option to watch the progress of the test.
111111

112112
After executing the command, you will see the output similar to the following:
113113
```sh
114-
./kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch
114+
kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --watch
115115
api connect: https://staging.kernelci.org:9100/
116116
Retrieving latest commit on tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git branch: master
117117
Commit to checkout: d3d1556696c1a993eec54ac585fe5bf677e07474
@@ -171,7 +171,7 @@ Together with --watch option, you can use --test option to wait for particular t
171171

172172
For example:
173173
```sh
174-
kci-dev.py checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit
174+
kci-dev checkout --giturl https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git --branch master --tipoftree --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm --platform-filter sc7180-trogdor-kingoftown --watch --test crit
175175
```
176176

177177
This command will wait for the test results of the test with the name `crit`.

docs/watch.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
+++
2+
title = 'watch'
3+
date = 2025-01-30T07:07:07+01:00
4+
description = 'Watch for the results of given node'
5+
+++
6+
7+
This command waits for the results of particular node id.
8+
9+
Example:
10+
```sh
11+
kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd"
12+
```
13+
14+
`--job-filter` and `--test` work in the same manner as in the [checkout](../checkout.md) command.
15+
16+
## --node-id
17+
18+
The Maestro node id to watch for.
19+
20+
## --job-filter
21+
22+
Pass one or more job filters:
23+
24+
```sh
25+
kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter "kbuild-gcc-12-x86-chromeos-amd" --job-filter baseline-nfs-arm64-qualcomm --job-filter kbuild-gcc-12-arm64-chromeos-qualcomm
26+
```
27+
28+
### --test
29+
30+
Return code of kci-dev will depend on the test result for the supplied test name:
31+
32+
- `pass` - return code 0 (test passed)
33+
- `fail` - return code 1 (test failed)
34+
- `error` - return code 2 (prior steps failed, such as compilation, test setup, etc, or infrastructure error)
35+
- `critical error` - return code 64 (kci-dev failed to execute command, crashed, etc)
36+
37+
For example:
38+
```sh
39+
kci-dev watch --nodeid 679a91b565fae3351e2fac77 --job-filter baseline-nfs-arm64-qualcomm --test crit
40+
```
41+
42+
This command can be used for regression bisection, where you can test if the test `crit` pass or fail on the specific commit.

kcidev/libs/maestro_common.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
# -*- coding: utf-8 -*-
33

44
import json
5+
import time
56

67
import click
8+
import requests
79

810
from kcidev.libs.common import *
911

@@ -23,3 +25,189 @@ def maestro_api_error(response):
2325
except Exception as e:
2426
kci_err(f"API response error: {e}: {response.text}")
2527
return
28+
29+
30+
def maestro_print_nodes(nodes, field):
31+
res = []
32+
if not isinstance(nodes, list):
33+
nodes = [nodes]
34+
for node in nodes:
35+
if field:
36+
data = {}
37+
for f in field:
38+
data[f] = node.get(f)
39+
res.append(data)
40+
else:
41+
res.append(node)
42+
kci_msg(json.dumps(res, sort_keys=True, indent=4))
43+
44+
45+
def maestro_get_node(url, nodeid):
46+
headers = {
47+
"Content-Type": "application/json; charset=utf-8",
48+
}
49+
url = url + "latest/node/" + nodeid
50+
maestro_print_api_call(url)
51+
response = requests.get(url, headers=headers)
52+
try:
53+
response.raise_for_status()
54+
except requests.exceptions.HTTPError as ex:
55+
kci_err(ex.response.json().get("detail"))
56+
click.Abort()
57+
except Exception as ex:
58+
kci_err(ex)
59+
click.Abort()
60+
61+
return response.json()
62+
63+
64+
def maestro_get_nodes(url, limit, offset, filter):
65+
headers = {
66+
"Content-Type": "application/json; charset=utf-8",
67+
}
68+
url = url + "latest/nodes/fast?limit=" + str(limit) + "&offset=" + str(offset)
69+
maestro_print_api_call(url)
70+
if filter:
71+
for f in filter:
72+
# TBD: We need to add translate filter to API
73+
# if we need anything more complex than eq(=)
74+
url = url + "&" + f
75+
76+
response = requests.get(url, headers=headers)
77+
try:
78+
response.raise_for_status()
79+
except requests.exceptions.HTTPError as ex:
80+
kci_err(ex.response.json().get("detail"))
81+
click.Abort()
82+
except Exception as ex:
83+
kci_err(ex)
84+
click.Abort()
85+
86+
return response.json()
87+
88+
89+
def maestro_check_node(node):
90+
"""
91+
Node can be defined RUNNING/DONE/FAIL based on the state
92+
Simplify, as our current state model suboptimal
93+
"""
94+
name = node["name"]
95+
state = node["state"]
96+
result = node["result"]
97+
if name == "checkout":
98+
if state == "running":
99+
return "RUNNING"
100+
elif state == "available" or state == "closing":
101+
return "DONE"
102+
elif state == "done" and result == "pass":
103+
return "DONE"
104+
else:
105+
return "FAIL"
106+
else:
107+
if state == "running":
108+
return "RUNNING"
109+
elif state == "done" and result == "pass":
110+
return "DONE"
111+
else:
112+
return "FAIL"
113+
114+
115+
def maestro_retrieve_treeid_nodes(baseurl, token, treeid):
116+
url = baseurl + "latest/nodes/fast?treeid=" + treeid
117+
headers = {
118+
"Content-Type": "application/json; charset=utf-8",
119+
"Authorization": f"{token}",
120+
}
121+
try:
122+
response = requests.get(url, headers=headers, timeout=30)
123+
except requests.exceptions.RequestException as e:
124+
click.secho(f"API connection error: {e}, retrying...", fg="yellow")
125+
return None
126+
except Exception as e:
127+
click.secho(f"API connection error: {e}, retrying...", fg="yellow")
128+
return None
129+
130+
if response.status_code >= 400:
131+
maestro_api_error(response)
132+
return None
133+
134+
return response.json()
135+
136+
137+
def maestro_watch_jobs(baseurl, token, treeid, job_filter, test):
138+
# we need to add to job_filter "checkout" node
139+
job_filter = list(job_filter)
140+
job_filter.append("checkout")
141+
previous_nodes = None
142+
while True:
143+
inprogress = 0
144+
joblist = job_filter.copy()
145+
nodes = maestro_retrieve_treeid_nodes(baseurl, token, treeid)
146+
if not nodes:
147+
click.secho("No nodes found. Retrying...", fg="yellow")
148+
time.sleep(5)
149+
continue
150+
if previous_nodes == nodes:
151+
kci_msg_nonl(".")
152+
time.sleep(30)
153+
continue
154+
155+
time_local = time.localtime()
156+
click.echo(f"\nCurrent time: {time.strftime('%Y-%m-%d %H:%M:%S', time_local)}")
157+
click.secho(
158+
f"Total tree nodes {len(nodes)} found. job_filter: {job_filter}", fg="green"
159+
)
160+
161+
# Tricky part in watch is that we might have one item in job_filter (job, test),
162+
# but it might spawn multiple nodes with same name
163+
test_result = None
164+
jobs_done_ts = None
165+
for node in nodes:
166+
if node["name"] == test:
167+
test_result = node["result"]
168+
if node["name"] in job_filter:
169+
result = maestro_check_node(node)
170+
if result == "DONE":
171+
if isinstance(joblist, list) and node["name"] in joblist:
172+
joblist.remove(node["name"])
173+
color = "green"
174+
elif result == "RUNNING":
175+
inprogress += 1
176+
color = "yellow"
177+
else:
178+
if isinstance(joblist, list) and node["name"] in joblist:
179+
joblist.remove(node["name"])
180+
color = "red"
181+
# if test is same as job, dont indicate infra-failure if test job fail
182+
if test and test != node["name"]:
183+
# if we have a test, and prior job failed, we should indicate that
184+
kci_err(f"Job {node['name']} failed, test can't be executed")
185+
sys.exit(2)
186+
nodeid = node.get("id")
187+
click.secho(
188+
f"Node: {nodeid} job: {node['name']} State: {node['state']} Result: {node['result']}",
189+
fg=color,
190+
)
191+
if isinstance(joblist, list) and len(joblist) == 0 and inprogress == 0:
192+
click.secho("All jobs completed", fg="green")
193+
if not test:
194+
return
195+
else:
196+
if not jobs_done_ts:
197+
jobs_done_ts = time.time()
198+
# if all jobs done, usually test results must be available
199+
# max within 60s. Safeguard in case of test node is not available
200+
if not test_result and time.time() - jobs_done_ts < 60:
201+
continue
202+
203+
if test_result and test_result == "pass":
204+
click.secho(f"Test {test} passed", fg="green")
205+
sys.exit(0)
206+
elif test_result:
207+
# ignore null, that means result not ready yet
208+
kci_err(f"Test {test} failed: {test_result}")
209+
sys.exit(1)
210+
211+
kci_msg_nonl(f"\rRefresh every 30s...")
212+
previous_nodes = nodes
213+
time.sleep(30)

kcidev/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
patch,
1414
results,
1515
testretry,
16+
watch,
1617
)
1718

1819

@@ -56,6 +57,7 @@ def run():
5657
cli.add_command(maestro_results.maestro_results)
5758
cli.add_command(testretry.testretry)
5859
cli.add_command(results.results)
60+
cli.add_command(watch.watch)
5961
cli()
6062

6163

0 commit comments

Comments
 (0)