From 75171e1d0f3ac0d18983f4c76e85aac2d025dce5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 09:12:15 +0000 Subject: [PATCH 1/2] Initial plan From 85ceaef23209941c09a8e97880265ab5829a6cf5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 09:22:07 +0000 Subject: [PATCH 2/2] Fix resource leaks and optimize nested loops for better performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add context managers for all file operations to prevent resource leaks - Optimize O(n²) nested loop in set_api_sketch() to O(n) using reverse mapping - Optimize nested loop in set_display_attr_of_apis() using any() builtin - Convert list to set for O(1) membership tests in check_api_label_cn.py - Use extend() instead of repeated append() calls for better performance - Add proper file handle cleanup in 6 files --- ci_scripts/check_api_docs_en.py | 3 +- ci_scripts/check_api_label_cn.py | 7 ++-- ci_scripts/check_api_parameters.py | 3 +- ci_scripts/hooks/post_filter_htmls.py | 3 +- docs/api/extract_api_from_docs.py | 6 ++- docs/api/gen_doc.py | 57 ++++++++++++++------------- 6 files changed, 43 insertions(+), 36 deletions(-) diff --git a/ci_scripts/check_api_docs_en.py b/ci_scripts/check_api_docs_en.py index ed7bb411b4d..a7cda7188f7 100644 --- a/ci_scripts/check_api_docs_en.py +++ b/ci_scripts/check_api_docs_en.py @@ -105,7 +105,8 @@ def check_system_message_in_doc(doc_file): for i in range(len(py_files)): if py_files[i].startswith("python/"): py_files[i] = py_files[i][6:] - api_info = json.load(open(args.api_info_file)) + with open(args.api_info_file) as f: + api_info = json.load(f) output_path = args.output_path build_source_file_to_doc_file_dict(api_info) error_files = set() diff --git a/ci_scripts/check_api_label_cn.py b/ci_scripts/check_api_label_cn.py index fe278dbaa72..8096e606fef 100644 --- a/ci_scripts/check_api_label_cn.py +++ b/ci_scripts/check_api_label_cn.py @@ -45,8 +45,8 @@ def find_all_api_labels_in_dir(rootdir): path = str(real_path).removeprefix(rootdir) if not should_test(path): continue - for label in find_api_labels_in_one_file(real_path): - all_api_labels.append(label) + # Use extend instead of repeated append for better performance + all_api_labels.extend(find_api_labels_in_one_file(real_path)) return all_api_labels @@ -80,7 +80,8 @@ def run_cn_api_label_checking(rootdir, files): f"The first line in {rootdir}/{file} is not available, please re-check it!" ) sys.exit(1) - valid_api_labels = find_all_api_labels_in_dir(rootdir) + # Convert to set for O(1) membership tests + valid_api_labels = set(find_all_api_labels_in_dir(rootdir)) for file in files: if not file.endswith(".rst"): continue diff --git a/ci_scripts/check_api_parameters.py b/ci_scripts/check_api_parameters.py index c6662f86903..2acc6888e74 100644 --- a/ci_scripts/check_api_parameters.py +++ b/ci_scripts/check_api_parameters.py @@ -268,7 +268,8 @@ def check_api_parameters(rstfiles, apiinfo): if __name__ == "__main__": args = parse_args() rstfiles = [fn for fn in args.rst_files.split(" ") if fn] - apiinfo = json.load(open(args.api_info_file)) + with open(args.api_info_file) as f: + apiinfo = json.load(f) check_passed, check_failed, api_notfound = check_api_parameters( rstfiles=rstfiles, apiinfo=apiinfo ) diff --git a/ci_scripts/hooks/post_filter_htmls.py b/ci_scripts/hooks/post_filter_htmls.py index 74136cbd770..b886bba052f 100644 --- a/ci_scripts/hooks/post_filter_htmls.py +++ b/ci_scripts/hooks/post_filter_htmls.py @@ -28,7 +28,8 @@ def insert_header_and_anchor_for_method(htmlfile): """ insert a hide h3 tag and a anchor for every class method. """ - soup = BeautifulSoup(open(htmlfile, "r"), "lxml") + with open(htmlfile, "r") as f: + soup = BeautifulSoup(f, "lxml") method_title_tags = soup.find_all("dl", class_="method") for mtt in method_title_tags: dt = mtt.find("dt") diff --git a/docs/api/extract_api_from_docs.py b/docs/api/extract_api_from_docs.py index 390ea979fd3..53f8a857233 100644 --- a/docs/api/extract_api_from_docs.py +++ b/docs/api/extract_api_from_docs.py @@ -124,9 +124,11 @@ def extract_code_blocks_from_file(filename): r = os.path.splitext(filename) ext = r[1].lower() if ext == ".md": - return extract_code_blocks_from_md(open(filename, "r").read()) + with open(filename, "r") as f: + return extract_code_blocks_from_md(f.read()) elif ext == ".rst": - return extract_code_blocks_from_rst(open(filename, "r").read()) + with open(filename, "r") as f: + return extract_code_blocks_from_rst(f.read()) else: return [] diff --git a/docs/api/gen_doc.py b/docs/api/gen_doc.py index 6be58f6be89..15274261117 100755 --- a/docs/api/gen_doc.py +++ b/docs/api/gen_doc.py @@ -264,7 +264,8 @@ def parse_module_file(mod): if len(mod_name) >= 6 and mod_name[:6] == "paddle": fn_splited = os.path.splitext(src_file) if len(fn_splited) > 1 and fn_splited[1].lower() == ".py": - mod_ast = ast.parse(open(src_file, "r").read()) + with open(src_file, "r") as f: + mod_ast = ast.parse(f.read()) for node in mod_ast.body: short_names = [] if ( @@ -425,16 +426,14 @@ def set_display_attr_of_apis(): set the display attr """ if os.path.exists(NOT_DISPLAY_DOC_LIST_FILENAME): - display_none_apis = { - line.strip() for line in open(NOT_DISPLAY_DOC_LIST_FILENAME, "r") - } + with open(NOT_DISPLAY_DOC_LIST_FILENAME, "r") as f: + display_none_apis = {line.strip() for line in f} else: logger.warning("file not exists: %s", NOT_DISPLAY_DOC_LIST_FILENAME) display_none_apis = set() if os.path.exists(DISPLAY_DOC_LIST_FILENAME): - display_yes_apis = { - line.strip() for line in open(DISPLAY_DOC_LIST_FILENAME, "r") - } + with open(DISPLAY_DOC_LIST_FILENAME, "r") as f: + display_yes_apis = {line.strip() for line in f} else: logger.warning("file not exists: %s", DISPLAY_DOC_LIST_FILENAME) display_yes_apis = set() @@ -447,21 +446,17 @@ def set_display_attr_of_apis(): # file the same apis for id_api in api_info_dict: all_names = api_info_dict[id_api]["all_names"] - display_yes = False - for n in all_names: - if n in display_yes_apis: - display_yes = True - break + # Check if any name is in display_yes_apis (O(1) lookup with set) + display_yes = any(n in display_yes_apis for n in all_names) + if display_yes: api_info_dict[id_api]["display"] = True else: + # Check if any name starts with any display_none prefix display_yes = True for n in all_names: - for dn in display_none_apis: - if n.startswith(dn): - display_yes = False - break - if not display_yes: + if any(n.startswith(dn) for dn in display_none_apis): + display_yes = False break if not display_yes: api_info_dict[id_api]["display"] = False @@ -570,17 +565,22 @@ def set_api_sketch(): for api in apis: all_api_found[f"{m}.{api}"] = False + # Create a reverse mapping from API name to api_info_dict keys for O(1) lookup + name_to_id_map = {} + for id_api, api_info in api_info_dict.items(): + if "all_names" in api_info: + for name in api_info["all_names"]: + name_to_id_map[name] = id_api + + # Use the reverse mapping for efficient lookups for api in all_api_found.keys(): - for id_api in api_info_dict.keys(): - if ("all_names" in api_info_dict[id_api]) and ( - api in api_info_dict[id_api]["all_names"] - ): - all_api_found[api] = True - api_info_dict[id_api]["in_api_sketch"] = True - if "api_sketch_names" not in api_info_dict[id_api]: - api_info_dict[id_api]["api_sketch_names"] = [] - api_info_dict[id_api]["api_sketch_names"].append(api) - break + if api in name_to_id_map: + id_api = name_to_id_map[api] + all_api_found[api] = True + api_info_dict[id_api]["in_api_sketch"] = True + if "api_sketch_names" not in api_info_dict[id_api]: + api_info_dict[id_api]["api_sketch_names"] = [] + api_info_dict[id_api]["api_sketch_names"].append(api) api_not_in_dict = [api for api in all_api_found if not all_api_found[api]] if api_not_in_dict: @@ -1114,6 +1114,7 @@ def parse_args(): check_cn_en_match() filter_out_object_of_api_info_dict() - json.dump(api_info_dict, open(jsonfn, "w"), indent=4) + with open(jsonfn, "w") as f: + json.dump(api_info_dict, f, indent=4) logger.info("done")