diff --git a/py3-spdx-tools.yaml b/py3-spdx-tools.yaml index be9fdf8f10b..7d8d27ce852 100644 --- a/py3-spdx-tools.yaml +++ b/py3-spdx-tools.yaml @@ -2,7 +2,7 @@ package: name: py3-spdx-tools version: 0.8.2 - epoch: 0 + epoch: 1 description: SPDX parser and tools. copyright: - license: Apache-2.0 @@ -36,6 +36,10 @@ pipeline: tag: v${{package.version}} expected-commit: 32e74cdc8a39bc3b4119bc6e77f3804d09a05418 + - uses: patch + with: + patches: performance.patch + - name: Python Build uses: python/build-wheel diff --git a/py3-spdx-tools/performance.patch b/py3-spdx-tools/performance.patch new file mode 100644 index 00000000000..816612f245c --- /dev/null +++ b/py3-spdx-tools/performance.patch @@ -0,0 +1,334 @@ +From 30ebcf3981a1a1d29260dfefa75690842b0fb2cb Mon Sep 17 00:00:00 2001 +From: Jon Johnson +Date: Mon, 29 Jan 2024 12:12:26 -0800 +Subject: [PATCH 1/3] Fix several accidentally quadratic functions + +Signed-off-by: Jon Johnson +--- + src/spdx_tools/spdx/model/document.py | 3 +++ + src/spdx_tools/spdx/model/relationship.py | 3 +++ + .../parser/jsonlikedict/relationship_parser.py | 16 +++++++--------- + .../spdx/validation/spdx_id_validators.py | 11 +++++++++-- + 4 files changed, 22 insertions(+), 11 deletions(-) + +diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py +index 980c59ca5..e1ec51671 100644 +--- a/src/spdx_tools/spdx/model/document.py ++++ b/src/spdx_tools/spdx/model/document.py +@@ -81,3 +81,6 @@ def __init__( + relationships = [] if relationships is None else relationships + extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info + check_types_and_set_values(self, locals()) ++ ++ def __hash__(self): ++ return id(self) +diff --git a/src/spdx_tools/spdx/model/relationship.py b/src/spdx_tools/spdx/model/relationship.py +index 02b1326a9..1e6d7ae86 100644 +--- a/src/spdx_tools/spdx/model/relationship.py ++++ b/src/spdx_tools/spdx/model/relationship.py +@@ -73,3 +73,6 @@ def __init__( + comment: Optional[str] = None, + ): + check_types_and_set_values(self, locals()) ++ ++ def __hash__(self): ++ return hash("{} -> {} ({})".format(self.spdx_element_id, str(self.related_spdx_element_id), str(self.relationship_type))) +diff --git a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py +index 17374bef5..6393470e3 100644 +--- a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py ++++ b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py +@@ -1,7 +1,7 @@ + # SPDX-FileCopyrightText: 2022 spdx contributors + # + # SPDX-License-Identifier: Apache-2.0 +-from beartype.typing import Dict, List, Optional ++from beartype.typing import Dict, List, Optional, Set + + from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors + from spdx_tools.spdx.model import Relationship, RelationshipType +@@ -35,9 +35,9 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: + document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", [])) + doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID") + +- existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments( ++ existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments( + relationships +- ) ++ )) + relationships.extend( + parse_field_or_log_error( + self.logger, +@@ -52,9 +52,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: + ) + + package_dicts: List[Dict] = input_doc_dict.get("packages", []) +- existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments( +- relationships +- ) + + relationships.extend( + parse_field_or_log_error( +@@ -110,7 +107,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType: + return relationship_type + + def parse_document_describes( +- self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship] ++ self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship] + ) -> List[Relationship]: + logger = Logger() + describes_relationships = [] +@@ -131,10 +128,11 @@ def parse_document_describes( + return describes_relationships + + def parse_has_files( +- self, package_dicts: List[Dict], existing_relationships: List[Relationship] ++ self, package_dicts: List[Dict], existing_relationships: Set[Relationship] + ) -> List[Relationship]: + # assume existing relationships are stripped of comments + logger = Logger() ++ + contains_relationships = [] + for package in package_dicts: + package_spdx_id: Optional[str] = package.get("SPDXID") +@@ -160,7 +158,7 @@ def parse_has_files( + return contains_relationships + + def check_if_relationship_exists( +- self, relationship: Relationship, existing_relationships: List[Relationship] ++ self, relationship: Relationship, existing_relationships: Set[Relationship] + ) -> bool: + # assume existing relationships are stripped of comments + if relationship in existing_relationships: +diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py +index 6441236a9..2ae412ff3 100644 +--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py ++++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py +@@ -4,7 +4,9 @@ + + import re + +-from beartype.typing import List ++from beartype.typing import List, Set ++ ++from functools import cache + + from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids + from spdx_tools.spdx.model import Document, File +@@ -22,11 +24,16 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool: + return spdx_id in [file.spdx_id for file in files] + + ++@cache + def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool: +- all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document) ++ all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document) + + return spdx_id in all_spdx_ids_in_document + ++@cache ++def get_set_of_all_spdx_ids(document: Document) -> Set[str]: ++ return set(get_list_of_all_spdx_ids(document)) ++ + + def get_list_of_all_spdx_ids(document: Document) -> List[str]: + all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id] + +From 1c6cd54e52d6a3097ad9b4e9ce14c1019e8bcf8f Mon Sep 17 00:00:00 2001 +From: paulgibert +Date: Wed, 14 Feb 2024 10:55:04 -0500 +Subject: [PATCH 2/3] Created a document cache decorator and handled + Relationships as tuples in parsing to remove hash methods. + +Signed-off-by: paulgibert +--- + .../common/typing/dataclass_with_properties.py | 6 +++++- + src/spdx_tools/spdx/model/document.py | 18 +++++++++++++++--- + src/spdx_tools/spdx/model/relationship.py | 3 --- + .../parser/jsonlikedict/relationship_parser.py | 16 +++++++++++----- + .../spdx/validation/spdx_id_validators.py | 5 +++-- + 5 files changed, 34 insertions(+), 14 deletions(-) + +diff --git a/src/spdx_tools/common/typing/dataclass_with_properties.py b/src/spdx_tools/common/typing/dataclass_with_properties.py +index 3f13950d5..ba343af08 100644 +--- a/src/spdx_tools/common/typing/dataclass_with_properties.py ++++ b/src/spdx_tools/common/typing/dataclass_with_properties.py +@@ -1,12 +1,16 @@ + # SPDX-FileCopyrightText: 2022 spdx contributors + # + # SPDX-License-Identifier: Apache-2.0 +-from dataclasses import dataclass ++from dataclasses import dataclass, astuple + + from beartype import beartype + from beartype.roar import BeartypeCallHintException + + ++def freeze_dataclass_with_properties_list(items): ++ return {astuple(itm) for itm in items} ++ ++ + def dataclass_with_properties(cls): + """Decorator to generate a dataclass with properties out of the class' value:type list. + Their getters and setters will be subjected to the @typechecked decorator to ensure type conformity.""" +diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py +index e1ec51671..4ed1b45ae 100644 +--- a/src/spdx_tools/spdx/model/document.py ++++ b/src/spdx_tools/spdx/model/document.py +@@ -1,7 +1,7 @@ + # SPDX-FileCopyrightText: 2022 spdx contributors + # + # SPDX-License-Identifier: Apache-2.0 +-from dataclasses import field ++from dataclasses import field, astuple + from datetime import datetime + + from beartype.typing import List, Optional +@@ -82,5 +82,17 @@ def __init__( + extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info + check_types_and_set_values(self, locals()) + +- def __hash__(self): +- return id(self) ++ ++def document_cache(func): ++ cache = {} ++ ++ def cached_function(document: Document): ++ key = id(document) ++ if key in cache.keys(): ++ return cache[key] ++ else: ++ value = func(document) ++ cache[key] = value ++ return value ++ ++ return cached_function +\ No newline at end of file +diff --git a/src/spdx_tools/spdx/model/relationship.py b/src/spdx_tools/spdx/model/relationship.py +index 1e6d7ae86..02b1326a9 100644 +--- a/src/spdx_tools/spdx/model/relationship.py ++++ b/src/spdx_tools/spdx/model/relationship.py +@@ -73,6 +73,3 @@ def __init__( + comment: Optional[str] = None, + ): + check_types_and_set_values(self, locals()) +- +- def __hash__(self): +- return hash("{} -> {} ({})".format(self.spdx_element_id, str(self.related_spdx_element_id), str(self.relationship_type))) +diff --git a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py +index 6393470e3..9ab8a4755 100644 +--- a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py ++++ b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py +@@ -1,9 +1,12 @@ + # SPDX-FileCopyrightText: 2022 spdx contributors + # + # SPDX-License-Identifier: Apache-2.0 ++from dataclasses import astuple ++ + from beartype.typing import Dict, List, Optional, Set + + from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors ++from spdx_tools.common.typing.dataclass_with_properties import freeze_dataclass_with_properties_list + from spdx_tools.spdx.model import Relationship, RelationshipType + from spdx_tools.spdx.parser.error import SPDXParsingError + from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import ( +@@ -35,9 +38,12 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]: + document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", [])) + doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID") + +- existing_relationships_without_comments: Set[Relationship] = set(self.get_all_relationships_without_comments( +- relationships +- )) ++ relationship_hash = lambda r: hash("{} -> {} ({})" \ ++ .format(r.spdx_element_id, ++ str(r.related_spdx_element_id), ++ str(r.relationship_type))) ++ existing_relationships_without_comments: Set[Relationship] = freeze_dataclass_with_properties_list( ++ self.get_all_relationships_without_comments(relationships)) + relationships.extend( + parse_field_or_log_error( + self.logger, +@@ -161,10 +167,10 @@ def check_if_relationship_exists( + self, relationship: Relationship, existing_relationships: Set[Relationship] + ) -> bool: + # assume existing relationships are stripped of comments +- if relationship in existing_relationships: ++ if astuple(relationship) in existing_relationships: + return True + relationship_inverted: Relationship = self.invert_relationship(relationship) +- if relationship_inverted in existing_relationships: ++ if astuple(relationship_inverted) in existing_relationships: + return True + + return False +diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py +index 2ae412ff3..de3a505b4 100644 +--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py ++++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py +@@ -10,6 +10,7 @@ + + from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids + from spdx_tools.spdx.model import Document, File ++from spdx_tools.spdx.model.document import document_cache + + + def is_valid_internal_spdx_id(spdx_id: str) -> bool: +@@ -24,13 +25,13 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool: + return spdx_id in [file.spdx_id for file in files] + + +-@cache ++# @cache + def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool: + all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document) + + return spdx_id in all_spdx_ids_in_document + +-@cache ++@document_cache + def get_set_of_all_spdx_ids(document: Document) -> Set[str]: + return set(get_list_of_all_spdx_ids(document)) + + +From f45032d642aafc46f9aa59bf2572f434f5ba7813 Mon Sep 17 00:00:00 2001 +From: Jon Johnson +Date: Wed, 14 Feb 2024 09:29:07 -0800 +Subject: [PATCH 3/3] Fix lints + +Signed-off-by: Jon Johnson +--- + src/spdx_tools/spdx/model/document.py | 2 +- + src/spdx_tools/spdx/validation/spdx_id_validators.py | 3 --- + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py +index 4ed1b45ae..33018c896 100644 +--- a/src/spdx_tools/spdx/model/document.py ++++ b/src/spdx_tools/spdx/model/document.py +@@ -95,4 +95,4 @@ def cached_function(document: Document): + cache[key] = value + return value + +- return cached_function +\ No newline at end of file ++ return cached_function +diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py +index de3a505b4..d70c36bb7 100644 +--- a/src/spdx_tools/spdx/validation/spdx_id_validators.py ++++ b/src/spdx_tools/spdx/validation/spdx_id_validators.py +@@ -6,8 +6,6 @@ + + from beartype.typing import List, Set + +-from functools import cache +- + from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids + from spdx_tools.spdx.model import Document, File + from spdx_tools.spdx.model.document import document_cache +@@ -25,7 +23,6 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool: + return spdx_id in [file.spdx_id for file in files] + + +-# @cache + def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool: + all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)