Skip to content

Commit d04daf2

Browse files
SONARPY-944 Use precomputed Typeshed symbols for third-party libraries in the Python analyzer
1 parent 9ee5d18 commit d04daf2

File tree

8 files changed

+76
-239
lines changed

8 files changed

+76
-239
lines changed

python-frontend/src/main/resources/org/sonar/python/types/protobuf/annoy.protobuf

Lines changed: 0 additions & 215 deletions
This file was deleted.

python-frontend/typeshed_serializer/serializer/symbols.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,13 @@ def __init__(self, _type: mpt.Type):
9191
elif isinstance(_type, mpt.TupleType):
9292
self.kind = TypeKind.TUPLE
9393
items = [TypeDescriptor(t) for t in _type.items]
94-
item_names = [i.pretty_printed_name for i in items]
95-
self.args.extend(items)
96-
self.pretty_printed_name = f"Tuple[{','.join(item_names)}]"
94+
if any(item.is_unknown for item in items):
95+
self.kind = None
96+
self.is_unknown = True
97+
else:
98+
item_names = [i.pretty_printed_name for i in items]
99+
self.args.extend(items)
100+
self.pretty_printed_name = f"Tuple[{','.join(item_names)}]"
97101
elif isinstance(_type, mpt.TypeVarType):
98102
self.kind = TypeKind.TYPE_VAR
99103
self.pretty_printed_name = _type.fullname
@@ -524,9 +528,10 @@ def extract_return_type(func_def: mpn.FuncDef):
524528
return TypeDescriptor(func_type.ret_type)
525529

526530

527-
def save_module(ms: Union[ModuleSymbol, MergedModuleSymbol], is_debug=False, debug_dir="output"):
531+
def save_module(ms: Union[ModuleSymbol, MergedModuleSymbol], is_debug=False, debug_dir="output", is_stdlib=True):
528532
ms_pb = ms.to_proto()
529533
save_dir = "../../src/main/resources/org/sonar/python/types/protobuf" if not is_debug else f"../{debug_dir}"
534+
save_dir = save_dir if is_stdlib else os.path.join(save_dir, "stubs")
530535
save_string = ms_pb.SerializeToString() if not is_debug else str(ms_pb)
531536
open_mode = "wb" if not is_debug else "w"
532537
save_dir_path = os.path.join(CURRENT_PATH, save_dir)

python-frontend/typeshed_serializer/serializer/symbols_merger.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,29 @@
2323
from serializer.symbols import ModuleSymbol, MergedFunctionSymbol, MergedClassSymbol, MergedOverloadedFunctionSymbol, \
2424
MergedModuleSymbol, MergedVarSymbol
2525
from serializer import typeshed_serializer as ts
26-
from serializer.proto_out import symbols_pb2
2726

2827
SUPPORTED_PYTHON_VERSIONS = ((2, 7), (3, 5), (3, 6), (3, 7), (3, 8), (3, 9), (3, 10))
2928

3029

31-
def build_multiple_python_version() -> Dict[str, Dict[str, ModuleSymbol]]:
30+
def build_multiple_python_version(is_third_parties=False) -> Dict[str, Dict[str, ModuleSymbol]]:
3231
model_by_version: Dict[str, Dict[str, ModuleSymbol]] = {}
3332
for major, minor in SUPPORTED_PYTHON_VERSIONS:
34-
build_result = ts.walk_typeshed_stdlib(ts.get_options((major, minor)))
33+
opt = ts.get_options((major, minor))
34+
build_result, source_paths = ts.walk_typeshed_third_parties(opt) if is_third_parties else ts.walk_typeshed_stdlib(opt)
3535
modules = {}
3636
for file in build_result.files:
37+
path = build_result.files[file].path
38+
if is_third_parties and path not in source_paths:
39+
# build_result contains more modules from stdlib unrelated to third_parties
40+
continue
3741
ms = ModuleSymbol(build_result.files.get(file))
3842
modules[ms.fullname] = ms
3943
model_by_version[f"{major}{minor}"] = modules
4044
return model_by_version
4145

4246

43-
def merge_multiple_python_versions():
44-
model_by_version = build_multiple_python_version()
47+
def merge_multiple_python_versions(is_third_parties=False):
48+
model_by_version = build_multiple_python_version(is_third_parties)
4549
all_python_modules: Set[str] = set()
4650
for version in model_by_version:
4751
model = model_by_version[version]

python-frontend/typeshed_serializer/serializer/typeshed_serializer.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
from serializer import symbols_merger, symbols
2626

2727
STDLIB_PATH = "../resources/typeshed/stdlib"
28+
STUBS_PATH = "../resources/typeshed/stubs"
2829
CURRENT_PATH = os.path.dirname(__file__)
30+
THIRD_PARTIES_STUBS = os.listdir(os.path.join(CURRENT_PATH, STUBS_PATH))
2931

3032

3133
def get_options(python_version=(3, 8)):
@@ -60,13 +62,34 @@ def load_single_module(module_fqn: str, category="stdlib"):
6062

6163

6264
def walk_typeshed_stdlib(opt: options.Options = get_options()):
63-
source_list = []
6465
generate_python2_stdlib = opt.python_version < (3, 0)
6566
relative_path = STDLIB_PATH if not generate_python2_stdlib else f"{STDLIB_PATH}/@python2"
67+
source_list, source_paths = get_sources(relative_path, generate_python2_stdlib)
68+
build_result = build.build(source_list, opt)
69+
return build_result, source_paths
70+
71+
72+
def walk_typeshed_third_parties(opt: options.Options = get_options()):
73+
source_list = []
74+
source_paths = set()
75+
generate_python2 = opt.python_version < (3, 0)
76+
for third_party_stub in THIRD_PARTIES_STUBS:
77+
stub_path = os.path.join(STUBS_PATH, third_party_stub)
78+
relative_path = stub_path if not generate_python2 else f"{stub_path}/@python2"
79+
src_list, src_paths = get_sources(relative_path, generate_python2)
80+
source_list.extend(src_list)
81+
source_paths = source_paths.union(src_paths)
82+
build_result = build.build(source_list, opt)
83+
return build_result, source_paths
84+
85+
86+
def get_sources(relative_path: str, generate_python2: bool):
87+
source_list = []
88+
source_paths = set()
6689
path = os.path.join(CURRENT_PATH, relative_path)
6790
for root, dirs, files in os.walk(path):
6891
package_name = root.replace(path, "").replace("\\", ".").replace("/", ".").lstrip(".")
69-
if not generate_python2_stdlib and "python2" in package_name:
92+
if not generate_python2 and "python2" in package_name:
7093
# Avoid python2 stubs
7194
continue
7295
for file in files:
@@ -80,8 +103,8 @@ def walk_typeshed_stdlib(opt: options.Options = get_options()):
80103
file_path = f"{root}/{file}"
81104
source = build.BuildSource(file_path, module=fq_module_name)
82105
source_list.append(source)
83-
build_result = build.build(source_list, opt)
84-
return build_result
106+
source_paths.add(source.path)
107+
return source_list, source_paths
85108

86109

87110
def serialize_typeshed_stdlib(output_dir_name="output", python_version=(3, 8), is_debug=False):
@@ -92,7 +115,7 @@ def serialize_typeshed_stdlib(output_dir_name="output", python_version=(3, 8), i
92115
"""
93116
output_dir_name = output_dir_name if python_version >= (3, 0) else f"{output_dir_name}@python2"
94117
opt = get_options(python_version)
95-
build_result = walk_typeshed_stdlib(opt)
118+
build_result, _ = walk_typeshed_stdlib(opt)
96119
for file in build_result.files:
97120
module_symbol = symbols.ModuleSymbol(build_result.files.get(file))
98121
symbols.save_module(module_symbol, is_debug=is_debug, debug_dir=output_dir_name)
@@ -105,14 +128,15 @@ def serialize_typeshed_stdlib_multiple_python_version():
105128
serialize_typeshed_stdlib(f"output3{minor}", (3, minor), is_debug=True)
106129

107130

108-
def save_merged_symbols(is_debug=False):
109-
merged_modules = symbols_merger.merge_multiple_python_versions()
131+
def save_merged_symbols(is_debug=False, is_third_parties=False):
132+
merged_modules = symbols_merger.merge_multiple_python_versions(is_third_parties)
110133
for mod in merged_modules:
111-
symbols.save_module(merged_modules[mod], is_debug=is_debug, debug_dir="output_merge")
134+
symbols.save_module(merged_modules[mod], is_debug=is_debug, debug_dir="output_merge", is_stdlib=not is_third_parties)
112135

113136

114137
def main():
115138
save_merged_symbols()
139+
save_merged_symbols(is_third_parties=True)
116140

117141

118142
if __name__ == '__main__':

python-frontend/typeshed_serializer/tests/conftest.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@
2424
import pytest
2525
from mypy import build
2626

27-
from serializer import typeshed_serializer
27+
from serializer import typeshed_serializer, symbols_merger
2828

2929

3030
@pytest.fixture(scope="session")
3131
def typeshed_stdlib():
32-
return typeshed_serializer.walk_typeshed_stdlib()
32+
build_result, _ = typeshed_serializer.walk_typeshed_stdlib()
33+
return build_result
3334

3435

3536
@pytest.fixture(scope="session")
@@ -39,3 +40,8 @@ def fake_module_36_38():
3940
fake_module_36 = typeshed_serializer.build_single_module('fakemodule', python_version=(3, 6))
4041
fake_module_38 = typeshed_serializer.build_single_module('fakemodule', python_version=(3, 8))
4142
return [fake_module_36, fake_module_38]
43+
44+
45+
@pytest.fixture(scope="session")
46+
def typeshed_third_parties():
47+
return symbols_merger.merge_multiple_python_versions(is_third_parties=True)

0 commit comments

Comments
 (0)