1+ """ The Python PyTorch testing script.
2+ ##
3+ # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
4+ #
5+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6+ # of this software and associated documentation files (the "Software"), to deal
7+ # in the Software without restriction, including without limitation the rights
8+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+ # copies of the Software, and to permit persons to whom the Software is
10+ # furnished to do so, subject to the following conditions:
11+ #
12+ # The above copyright notice and this permission notice shall be included in
13+ # all copies or substantial portions of the Software.
14+ #
15+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+ # THE SOFTWARE.
22+ """
23+
24+ import xml .etree .ElementTree as ET
25+ from pathlib import Path
26+ from typing import Any , Dict , Tuple
27+
28+ # Backends list
29+ BACKENDS_LIST = [
30+ "dist-gloo" ,
31+ "dist-nccl"
32+ ]
33+
34+ TARGET_WORKFLOW = "--rerun-disabled-tests"
35+
36+ def get_job_id (report : Path ) -> int :
37+ # [Job id in artifacts]
38+ # Retrieve the job id from the report path. In our GHA workflows, we append
39+ # the job id to the end of the report name, so `report` looks like:
40+ # unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
41+ # and we want to get `5596745227` out of it.
42+ try :
43+ return int (report .parts [0 ].rpartition ("_" )[2 ])
44+ except ValueError :
45+ return - 1
46+
47+ def is_rerun_disabled_tests (root : ET .ElementTree ) -> bool :
48+ """
49+ Check if the test report is coming from rerun_disabled_tests workflow
50+ """
51+ skipped = root .find (".//*skipped" )
52+ # Need to check against None here, if not skipped doesn't work as expected
53+ if skipped is None :
54+ return False
55+
56+ message = skipped .attrib .get ("message" , "" )
57+ return TARGET_WORKFLOW in message or "num_red" in message
58+
59+ def parse_xml_report (
60+ tag : str ,
61+ report : Path ,
62+ workflow_id : int ,
63+ workflow_run_attempt : int ,
64+ work_flow_name : str
65+ ) -> Dict [Tuple [str ], Dict [str , Any ]]:
66+ """Convert a test report xml file into a JSON-serializable list of test cases."""
67+ print (f"Parsing { tag } s for test report: { report } " )
68+
69+ job_id = get_job_id (report )
70+ print (f"Found job id: { job_id } " )
71+
72+ test_cases : Dict [Tuple [str ], Dict [str , Any ]] = {}
73+
74+ root = ET .parse (report )
75+ # TODO: unlike unittest, pytest-flakefinder used by rerun disabled tests for test_ops
76+ # includes skipped messages multiple times (50 times by default). This slows down
77+ # this script too much (O(n)) because it tries to gather all the stats. This should
78+ # be fixed later in the way we use pytest-flakefinder. A zipped test report from rerun
79+ # disabled test is only few MB, but will balloon up to a much bigger XML file after
80+ # extracting from a dozen to few hundred MB
81+ if is_rerun_disabled_tests (root ):
82+ return test_cases
83+
84+ for test_case in root .iter (tag ):
85+ case = process_xml_element (test_case )
86+ if tag == 'testcase' :
87+ case ["workflow_id" ] = workflow_id
88+ case ["workflow_run_attempt" ] = workflow_run_attempt
89+ case ["job_id" ] = job_id
90+ case ["work_flow_name" ] = work_flow_name
91+
92+ # [invoking file]
93+ # The name of the file that the test is located in is not necessarily
94+ # the same as the name of the file that invoked the test.
95+ # For example, `test_jit.py` calls into multiple other test files (e.g.
96+ # jit/test_dce.py). For sharding/test selection purposes, we want to
97+ # record the file that invoked the test.
98+ #
99+ # To do this, we leverage an implementation detail of how we write out
100+ # tests (https://bit.ly/3ajEV1M), which is that reports are created
101+ # under a folder with the same name as the invoking file.
102+ case_name = report .parent .name
103+ for ind in range (len (BACKENDS_LIST )):
104+ if BACKENDS_LIST [ind ] in report .parts :
105+ case_name = case_name + "_" + BACKENDS_LIST [ind ]
106+ break
107+ case ["invoking_file" ] = case_name
108+ test_cases [ ( case ["invoking_file" ], case ["classname" ], case ["name" ], case ["work_flow_name" ] ) ] = case
109+ elif tag == 'testsuite' :
110+ case ["work_flow_name" ] = work_flow_name
111+ case ["invoking_xml" ] = report .name
112+ case ["running_time_xml" ] = case ["time" ]
113+ case_name = report .parent .name
114+ for ind in range (len (BACKENDS_LIST )):
115+ if BACKENDS_LIST [ind ] in report .parts :
116+ case_name = case_name + "_" + BACKENDS_LIST [ind ]
117+ break
118+ case ["invoking_file" ] = case_name
119+
120+ test_cases [ ( case ["invoking_file" ], case ["invoking_xml" ], case ["work_flow_name" ] ) ] = case
121+
122+ return test_cases
123+
124+ def process_xml_element (element : ET .Element ) -> Dict [str , Any ]:
125+ """Convert a test suite element into a JSON-serializable dict."""
126+ ret : Dict [str , Any ] = {}
127+
128+ # Convert attributes directly into dict elements.
129+ # e.g.
130+ # <testcase name="test_foo" classname="test_bar"></testcase>
131+ # becomes:
132+ # {"name": "test_foo", "classname": "test_bar"}
133+ ret .update (element .attrib )
134+
135+ # The XML format encodes all values as strings. Convert to ints/floats if
136+ # possible to make aggregation possible in Rockset.
137+ for k , v in ret .items ():
138+ try :
139+ ret [k ] = int (v )
140+ except ValueError :
141+ pass
142+ try :
143+ ret [k ] = float (v )
144+ except ValueError :
145+ pass
146+
147+ # Convert inner and outer text into special dict elements.
148+ # e.g.
149+ # <testcase>my_inner_text</testcase> my_tail
150+ # becomes:
151+ # {"text": "my_inner_text", "tail": " my_tail"}
152+ if element .text and element .text .strip ():
153+ ret ["text" ] = element .text
154+ if element .tail and element .tail .strip ():
155+ ret ["tail" ] = element .tail
156+
157+ # Convert child elements recursively, placing them at a key:
158+ # e.g.
159+ # <testcase>
160+ # <foo>hello</foo>
161+ # <foo>world</foo>
162+ # <bar>another</bar>
163+ # </testcase>
164+ # becomes
165+ # {
166+ # "foo": [{"text": "hello"}, {"text": "world"}],
167+ # "bar": {"text": "another"}
168+ # }
169+ for child in element :
170+ if child .tag not in ret :
171+ ret [child .tag ] = process_xml_element (child )
172+ else :
173+ # If there are multiple tags with the same name, they should be
174+ # coalesced into a list.
175+ if not isinstance (ret [child .tag ], list ):
176+ ret [child .tag ] = [ret [child .tag ]]
177+ ret [child .tag ].append (process_xml_element (child ))
178+ return ret
0 commit comments