Skip to content

Commit f80dd20

Browse files
scottileefacebook-github-bot
authored andcommitted
Script to automatically get system diagnostics (#487)
Summary: Pull Request resolved: #487 #154 Took Pytorch's collect_env.py script (https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py), which already collects torchx version and platform information, and modified it to be typed and include local Docker version, and local versions of AWS CLI, gCloud CLI, az CLI, Slurm CLI, and kubectl CLI. Reviewed By: kurman Differential Revision: D36416679 fbshipit-source-id: 88a63f5e68cbba59127c31945f5fe014619c59f8
1 parent aa5bd3e commit f80dd20

File tree

2 files changed

+157
-0
lines changed

2 files changed

+157
-0
lines changed

.github/ISSUE_TEMPLATE/bug-report.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ Steps to reproduce the behavior:
3737

3838
## Environment
3939

40+
<!-- Please run and include the output from https://github.com/pytorch/torchx/blob/main/scripts/collect_env.py. -->
41+
4042
- torchx version (e.g. 0.1.0rc1):
4143
- Python version:
4244
- OS (e.g., Linux):

scripts/collect_env.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# This script uses https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
8+
# and collects additional information on top of it to output relevant system
9+
# environment info.
10+
# Run it with `python collect_env.py`.
11+
import re
12+
import subprocess
13+
import sys
14+
import tempfile
15+
from os import getenv
16+
from os.path import exists
17+
from typing import Optional, Tuple
18+
from urllib import request
19+
20+
PYTORCH_COLLECT_ENV_URL = "https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py"
21+
TORCHX_PACKAGES = (
22+
"https://raw.githubusercontent.com/pytorch/torchx/main/dev-requirements.txt"
23+
)
24+
25+
26+
def run(
27+
command: str, filter_output_regexp: Optional[str] = None
28+
) -> Optional[Tuple[int, bytes, bytes]]:
29+
"""Returns (return-code, stdout, stderr)"""
30+
p = subprocess.Popen(
31+
args=command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
32+
)
33+
raw_output, raw_err = p.communicate()
34+
raw_output, raw_err = raw_output.strip().decode("utf-8"), raw_err.strip().decode(
35+
"utf-8"
36+
)
37+
rc = p.returncode
38+
if rc != 0:
39+
return None
40+
41+
if filter_output_regexp:
42+
match = re.search(filter_output_regexp, raw_output)
43+
if match is None:
44+
return None
45+
return match.group(1)
46+
47+
return rc, raw_output, raw_err
48+
49+
50+
def get_pip_packages() -> str:
51+
"""Returns versions of packages that match torchx dev requirements"""
52+
user_packages = subprocess.run(
53+
f"{sys.executable + ' -mpip'} list --format=freeze",
54+
stdout=subprocess.PIPE,
55+
stderr=subprocess.PIPE,
56+
shell=True,
57+
)
58+
59+
torchx_packages, _ = request.urlretrieve(TORCHX_PACKAGES)
60+
with open(torchx_packages, "r") as packages:
61+
torchx_deps = [
62+
re.split(r"==|>=|<=|!=|!=|===|<|>", package.strip())[0]
63+
for package in packages.readlines()
64+
if package.strip() and not package.startswith("#")
65+
]
66+
assert torchx_deps is not None
67+
68+
user_deps = [
69+
re.split(r"==|>=|<=|!=|!=|===|<|>", line)
70+
for line in user_packages.stdout.decode("utf-8").splitlines()
71+
]
72+
73+
return "\n".join(
74+
f"{udeps[0]}:{udeps[1]}"
75+
for udeps in user_deps
76+
if any(tdeps in udeps[0] for tdeps in torchx_deps)
77+
)
78+
79+
80+
def get_torchx_config() -> str:
81+
torchxconfig = None
82+
if exists(".torchxconfig"):
83+
torchxconfig = ".torchxconfig"
84+
elif exists(f"{getenv('HOME')}/.torchxconfig"):
85+
torchxconfig = f"{getenv('HOME')}/.torchxconfig"
86+
else:
87+
return "N/A"
88+
89+
with open(torchxconfig, "r") as f:
90+
return f.read()
91+
92+
93+
def run_pytorch_collect_env() -> Tuple[int, bytes]:
94+
with tempfile.NamedTemporaryFile(delete=True, suffix=".py") as temp:
95+
request.urlretrieve(PYTORCH_COLLECT_ENV_URL, temp.name)
96+
out = subprocess.run(
97+
f"{sys.executable} {temp.name}", stderr=subprocess.PIPE, shell=True
98+
)
99+
return out.returncode, out.stderr
100+
101+
102+
def get_cli_info() -> None:
103+
print(f"AWS CLI: {get_aws_version()}")
104+
print(f"gCloud CLI: {get_gcp_version()}")
105+
print(f"AZ CLI: {get_azure_version()}")
106+
print(f"Slurm: {get_slurm_version()}")
107+
print(f"Docker: {get_docker_version()}")
108+
print(f"kubectl: {get_kubectl_version()}")
109+
110+
111+
def get_aws_version() -> Optional[str]:
112+
result = run("aws --version")
113+
if result:
114+
return result[1]
115+
116+
117+
def get_gcp_version() -> Optional[str]:
118+
return run("gcloud --version", r"Google Cloud (.*)")
119+
120+
121+
def get_azure_version() -> Optional[str]:
122+
return run("az version", r"\"azure-cli\": (.*)")
123+
124+
125+
def get_slurm_version() -> Optional[str]:
126+
result = run("slurmd --version")
127+
if result:
128+
return result[1]
129+
130+
131+
def get_docker_version() -> Optional[str]:
132+
return run("docker --version", r"Docker version (.*)")
133+
134+
135+
def get_kubectl_version() -> Optional[str]:
136+
return run("kubectl version --client", r"Client Version: (.*)")
137+
138+
139+
def main() -> None:
140+
status = run_pytorch_collect_env()
141+
if status[0] != 0:
142+
print(f"Could not run Pytorch collect_env script: {status[1]}")
143+
144+
print("\nVersions of CLIs:")
145+
get_cli_info()
146+
147+
print("\ntorchx dev package versions:")
148+
print(get_pip_packages())
149+
150+
print("\ntorchx config:")
151+
print(f"{get_torchx_config()}")
152+
153+
154+
if __name__ == "__main__":
155+
main()

0 commit comments

Comments
 (0)