Skip to content
This repository was archived by the owner on Nov 23, 2024. It is now read-only.

Commit 1601f9d

Browse files
authored
feat(parser): explain why an annotation was generated automatically (#914)
* refactor(parser): improve readability * feat(parser): explain autogenerated remove annotations * feat(parser): explain autogenerated value annotations * feat(parser): store match when creating enum type * feat(parser): store match when creating boundary type * feat(parser): explain enum annotations * feat(parser): explain boundary annotations * feat(parser): improve formatting of explanation * chore(data): regenerate annotations * refactor(parse): remove Type wrapper class * fix(parser): mypy errors * fix(parser): mypy errors * fix(parser): mypy errors * style: apply automatic fixes of linters Co-authored-by: lars-reimann <[email protected]>
1 parent 6e1fd68 commit 1601f9d

File tree

16 files changed

+337
-176
lines changed

16 files changed

+337
-176
lines changed

package-parser/package_parser/processing/annotations/_generate_boundary_annotations.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
from typing import Optional
2+
13
from package_parser.processing.annotations.model import (
24
AnnotationStore,
35
BoundaryAnnotation,
46
Interval,
57
ValueAnnotation,
68
)
7-
from package_parser.processing.api.model import API
9+
from package_parser.processing.api.model import API, BoundaryType, UnionType
810

911
from ._constants import autogen_author
1012

@@ -25,23 +27,31 @@ def _generate_boundary_annotations(api: API, annotations: AnnotationStore) -> No
2527
):
2628
continue
2729

28-
boundary_type = parameter.type.to_json()
29-
if "kind" in boundary_type and boundary_type["kind"] == "UnionType":
30-
union_type = boundary_type
31-
for type_in_union in union_type["types"]:
32-
if type_in_union["kind"] == "BoundaryType":
30+
parameter_type = parameter.type
31+
if parameter_type is None:
32+
continue
33+
34+
boundary_type: Optional[BoundaryType] = None
35+
36+
if isinstance(parameter_type, UnionType):
37+
for type_in_union in parameter_type.types:
38+
if isinstance(type_in_union, BoundaryType):
3339
boundary_type = type_in_union
34-
if "kind" in boundary_type and boundary_type["kind"] == "BoundaryType":
35-
min_value = boundary_type["min"]
36-
max_value = boundary_type["max"]
3740

38-
is_discrete = boundary_type["base_type"] == "int"
41+
if isinstance(parameter_type, BoundaryType):
42+
boundary_type = parameter_type
43+
44+
if boundary_type is not None:
45+
min_value = boundary_type.min
46+
max_value = boundary_type.max
47+
48+
is_discrete = boundary_type.base_type == "int"
3949

4050
min_limit_type = 0
4151
max_limit_type = 0
42-
if not boundary_type["min_inclusive"]:
52+
if not boundary_type.min_inclusive:
4353
min_limit_type = 1
44-
if not boundary_type["max_inclusive"]:
54+
if not boundary_type.max_inclusive:
4555
max_limit_type = 1
4656
if min_value == "NegativeInfinity":
4757
min_value = 0
@@ -61,6 +71,7 @@ def _generate_boundary_annotations(api: API, annotations: AnnotationStore) -> No
6171
target=parameter.id,
6272
authors=[autogen_author],
6373
reviewers=[],
74+
comment=f"I turned this into a bounded number because the description contained {boundary_type.full_match}.",
6475
interval=interval,
6576
)
6677
annotations.boundaryAnnotations.append(boundary)

package-parser/package_parser/processing/annotations/_generate_enum_annotations.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
EnumPair,
77
ValueAnnotation,
88
)
9-
from package_parser.processing.api.model import API
9+
from package_parser.processing.api.model import API, EnumType, UnionType
1010

1111
from ._constants import autogen_author
1212

@@ -27,26 +27,21 @@ def _generate_enum_annotations(api: API, annotations: AnnotationStore) -> None:
2727
):
2828
continue
2929

30-
enum_type = parameter.type.to_json()
30+
parameter_type = parameter.type
31+
if parameter_type is None:
32+
continue
33+
3134
pairs = []
32-
if "kind" in enum_type and enum_type["kind"] == "UnionType":
33-
for type_in_union in enum_type["types"]:
34-
if type_in_union["kind"] == "EnumType":
35-
values = sorted(list(type_in_union["values"]))
36-
for string_value in values:
37-
instance_name = _enum_instance_name(string_value)
38-
pairs.append(
39-
EnumPair(
40-
stringValue=string_value, instanceName=instance_name
41-
)
42-
)
43-
elif "kind" in enum_type and enum_type["kind"] == "EnumType":
44-
values = sorted(list(enum_type["values"]))
45-
for string_value in values:
46-
instance_name = _enum_instance_name(string_value)
47-
pairs.append(
48-
EnumPair(stringValue=string_value, instanceName=instance_name)
49-
)
35+
full_match = ""
36+
if isinstance(parameter_type, UnionType):
37+
for type_in_union in parameter_type.types:
38+
if isinstance(type_in_union, EnumType):
39+
pairs = _enum_pairs(type_in_union)
40+
full_match = type_in_union.full_match
41+
42+
elif isinstance(parameter_type, EnumType):
43+
pairs = _enum_pairs(parameter_type)
44+
full_match = parameter_type.full_match
5045

5146
if len(pairs) > 0:
5247
enum_name = _enum_name(parameter.name)
@@ -55,6 +50,7 @@ def _generate_enum_annotations(api: API, annotations: AnnotationStore) -> None:
5550
target=parameter.id,
5651
authors=[autogen_author],
5752
reviewers=[],
53+
comment=f"I turned this into an enum because the type in the documentation contained {full_match}.",
5854
enumName=enum_name,
5955
pairs=pairs,
6056
)
@@ -67,6 +63,17 @@ def _enum_name(parameter_name: str) -> str:
6763
return "".join([segment.capitalize() for segment in segments if segment != ""])
6864

6965

66+
def _enum_pairs(enum_type: EnumType) -> list[EnumPair]:
67+
result = []
68+
69+
sorted_values = sorted(list(enum_type.values))
70+
for string_value in sorted_values:
71+
instance_name = _enum_instance_name(string_value)
72+
result.append(EnumPair(stringValue=string_value, instanceName=instance_name))
73+
74+
return result
75+
76+
7077
def _enum_instance_name(string_value: str) -> str:
7178
segments = re.split(r"[_\-.]", string_value)
7279

package-parser/package_parser/processing/annotations/_generate_remove_annotations.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,38 @@ def _generate_remove_annotations(
1818
:param annotations: AnnotationStore object
1919
"""
2020
for class_ in api.classes.values():
21-
if usages.n_class_usages(class_.id) == 0:
21+
n_class_usages = usages.n_class_usages(class_.id)
22+
if n_class_usages == 0:
2223
annotations.removeAnnotations.append(
2324
RemoveAnnotation(
24-
target=class_.id, authors=[autogen_author], reviewers=[]
25+
target=class_.id,
26+
authors=[autogen_author],
27+
reviewers=[],
28+
comment=_create_explanation("class", n_class_usages),
2529
)
2630
)
2731

2832
for function in api.functions.values():
29-
if usages.n_function_usages(function.id) == 0:
33+
n_function_usages = usages.n_function_usages(function.id)
34+
if n_function_usages == 0:
3035
annotations.removeAnnotations.append(
3136
RemoveAnnotation(
32-
target=function.id, authors=[autogen_author], reviewers=[]
37+
target=function.id,
38+
authors=[autogen_author],
39+
reviewers=[],
40+
comment=_create_explanation("function", n_function_usages),
3341
)
3442
)
43+
44+
45+
def _create_explanation(declaration_type: str, n_usages: int) -> str:
46+
result = f"I removed this {declaration_type} because it has"
47+
48+
if n_usages == 0:
49+
result += " no known usages."
50+
elif n_usages == 1:
51+
result += " only one known usage."
52+
else:
53+
result += f" only {n_usages} known usages."
54+
55+
return result

package-parser/package_parser/processing/annotations/_generate_value_annotations.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from package_parser.processing.usages.model import UsageCountStore
1212
from scipy.stats import binom
1313

14+
from ...utils import pluralize
1415
from ._constants import autogen_author
1516

1617

@@ -53,6 +54,7 @@ def _generate_constant_annotation(
5354
target=parameter.id,
5455
authors=[autogen_author],
5556
reviewers=[],
57+
comment=f"I replaced this parameter with a constant because it is always set to the same literal value ({sole_stringified_value}).",
5658
defaultValueType=default_value_type,
5759
defaultValue=default_value,
5860
)
@@ -63,6 +65,7 @@ def _generate_constant_annotation(
6365
target=parameter.id,
6466
authors=[autogen_author],
6567
reviewers=[],
68+
comment=f"I made this parameter required because, even though it is always set to the same value ({sole_stringified_value}), that value is not a literal.",
6669
)
6770
)
6871

@@ -78,7 +81,10 @@ def _generate_required_or_optional_annotation(
7881
if not _is_stringified_literal(most_common_values[0]):
7982
annotations.valueAnnotations.append(
8083
RequiredAnnotation(
81-
target=parameter.id, authors=[autogen_author], reviewers=[]
84+
target=parameter.id,
85+
authors=[autogen_author],
86+
reviewers=[],
87+
comment=f"I made this parameter required because the most common value ({most_common_values[0]}) is not a literal.",
8288
)
8389
)
8490
return
@@ -90,10 +96,19 @@ def _generate_required_or_optional_annotation(
9096
)
9197

9298
# Add appropriate annotation
93-
if _should_be_required(most_common_value_count, second_most_common_value_count):
99+
should_be_required, comment = _should_be_required(
100+
most_common_values[0],
101+
most_common_value_count,
102+
most_common_values[1],
103+
second_most_common_value_count,
104+
)
105+
if should_be_required:
94106
annotations.valueAnnotations.append(
95107
RequiredAnnotation(
96-
target=parameter.id, authors=[autogen_author], reviewers=[]
108+
target=parameter.id,
109+
authors=[autogen_author],
110+
reviewers=[],
111+
comment=comment,
97112
)
98113
)
99114
else:
@@ -107,25 +122,33 @@ def _generate_required_or_optional_annotation(
107122
target=parameter.id,
108123
authors=[autogen_author],
109124
reviewers=[],
125+
comment=comment,
110126
defaultValueType=default_value_type,
111127
defaultValue=default_value,
112128
)
113129
)
114130

115131

116132
def _should_be_required(
117-
most_common_value_count: int, second_most_common_value_count: int
118-
) -> bool:
133+
most_common_value: str,
134+
most_common_value_count: int,
135+
second_most_common_value: str,
136+
second_most_common_value_count: int,
137+
) -> tuple[bool, str]:
119138
"""
120139
This function determines how to differentiate between an optional and a required parameter
121140
:param most_common_value_count: How often the most common value is used
122141
:param second_most_common_value_count: How often the second most common value is used
123-
:return: True means the parameter should be required, False means it should be optional
142+
:return: True means the parameter should be required, False means it should be optional. The second result is an
143+
explanation.
124144
"""
125145

126146
# Shortcut to speed up the check
127147
if most_common_value_count == second_most_common_value_count:
128-
return True
148+
return (
149+
True,
150+
f"I made this parameter required because there is no single most common value ({most_common_value} and {second_most_common_value} are both used {pluralize(most_common_value_count, 'time')}).",
151+
)
129152

130153
# Precaution to ensure proper order of most_common_value_count and second_most_common_value_count
131154
if second_most_common_value_count > most_common_value_count:
@@ -140,13 +163,21 @@ def _should_be_required(
140163
# toss. Unless this hypothesis is rejected, we make the parameter required. We reject the hypothesis if the p-value
141164
# is less than or equal to 5%. The p-value is the probability that we observe results that are at least as extreme
142165
# as the values we observed, assuming the null hypothesis is true.
143-
return (
144-
2
145-
* sum(
146-
binom.pmf(i, total, 0.5) for i in range(most_common_value_count, total + 1)
147-
)
148-
> 0.05
166+
p_value = 2 * sum(
167+
binom.pmf(i, total, 0.5) for i in range(most_common_value_count, total + 1)
149168
)
169+
significance_level = 0.05
170+
171+
if p_value <= significance_level:
172+
return (
173+
False,
174+
f"I made this parameter optional because there is a statistically significant most common value (p-value {p_value:.2%} <= significance level {significance_level:.0%}).",
175+
)
176+
else:
177+
return (
178+
True,
179+
f"I made this parameter required because there is no statistically significant most common value (p-value ({p_value:.2%}) > significance level ({significance_level:.0%}).",
180+
)
150181

151182

152183
def _is_stringified_literal(stringified_value: str) -> bool:

package-parser/package_parser/processing/annotations/model/_annotations.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from abc import ABC
22
from dataclasses import asdict, dataclass
33
from enum import Enum
4-
from typing import Any
4+
from typing import Any, Union
55

66
ANNOTATION_SCHEMA_VERSION = 2
77

@@ -11,6 +11,7 @@ class AbstractAnnotation(ABC):
1111
target: str
1212
authors: list[str]
1313
reviewers: list[str]
14+
comment: str
1415

1516
def to_json(self) -> dict:
1617
return asdict(self)
@@ -24,9 +25,9 @@ class RemoveAnnotation(AbstractAnnotation):
2425
@dataclass
2526
class Interval:
2627
isDiscrete: bool
27-
lowerIntervalLimit: int
28+
lowerIntervalLimit: Union[int, float, str]
2829
lowerLimitType: int
29-
upperIntervalLimit: int
30+
upperIntervalLimit: Union[int, float, str]
3031
upperLimitType: int
3132

3233
def to_json(self) -> dict:
@@ -79,6 +80,7 @@ def to_json(self) -> dict:
7980
"target": self.target,
8081
"authors": self.authors,
8182
"reviewers": self.reviewers,
83+
"comment": self.comment,
8284
"variant": self.variant.value,
8385
"defaultValueType": self.defaultValueType.value,
8486
"defaultValue": self.defaultValue,
@@ -96,6 +98,7 @@ def to_json(self) -> dict:
9698
"target": self.target,
9799
"authors": self.authors,
98100
"reviewers": self.reviewers,
101+
"comment": self.comment,
99102
"variant": self.variant.value,
100103
"defaultValueType": self.defaultValueType.value,
101104
"defaultValue": self.defaultValue,
@@ -111,6 +114,7 @@ def to_json(self) -> dict:
111114
"target": self.target,
112115
"authors": self.authors,
113116
"reviewers": self.reviewers,
117+
"comment": self.comment,
114118
"variant": self.variant.value,
115119
}
116120

package-parser/package_parser/processing/api/model/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,11 @@
1515
ParameterDocumentation,
1616
)
1717
from ._parameters import Parameter, ParameterAssignment
18-
from ._types import AbstractType, BoundaryType, EnumType, NamedType, Type, UnionType
18+
from ._types import (
19+
AbstractType,
20+
BoundaryType,
21+
EnumType,
22+
NamedType,
23+
UnionType,
24+
create_type,
25+
)

0 commit comments

Comments
 (0)