Skip to content

Commit 8facad7

Browse files
SONARPY-874 Serialize merged symbols for various Python version (#944)
1 parent 5b471db commit 8facad7

File tree

9 files changed

+741
-63
lines changed

9 files changed

+741
-63
lines changed

python-frontend/typeshed_serializer/serializer/symbols.py

Lines changed: 117 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from enum import Enum
3+
from typing import List, Union
34

45
import mypy.types as mpt
56
import mypy.nodes as mpn
@@ -158,6 +159,9 @@ def __init__(self, overloaded_func_def: mpn.OverloadedFuncDef):
158159
if isinstance(item, mpn.Decorator):
159160
self.definitions.append(FunctionSymbol(item.func, decorators=item.original_decorators))
160161

162+
def __eq__(self, other):
163+
return isinstance(other, OverloadedFunctionSymbol) and self.to_proto() == other.to_proto()
164+
161165
def to_proto(self) -> symbols_pb2.OverloadedFunctionSymbol:
162166
pb_overloaded_func = symbols_pb2.OverloadedFunctionSymbol()
163167
pb_overloaded_func.name = self.name
@@ -188,6 +192,9 @@ def __init__(self, func_def: mpn.FuncDef, decorators=None):
188192
if decorator_name is not None:
189193
self.resolved_decorator_names.append(decorator_name)
190194

195+
def __eq__(self, other):
196+
return isinstance(other, FunctionSymbol) and self.to_proto() == other.to_proto()
197+
191198
def to_proto(self) -> symbols_pb2.FunctionSymbol:
192199
pb_func = symbols_pb2.FunctionSymbol()
193200
pb_func.name = self.name
@@ -246,6 +253,19 @@ def __init__(self, type_info: mpn.TypeInfo):
246253
self.metaclass_name = class_def.metaclass.fullname
247254
self.has_decorators = len(class_def.decorators) > 0
248255

256+
def __eq__(self, other):
257+
if not isinstance(other, ClassSymbol):
258+
return False
259+
return (self.name == other.name
260+
and self.fullname == other.fullname
261+
and self.super_classes == other.super_classes
262+
and self.mro == other.mro
263+
and self.is_enum == other.is_enum
264+
and self.is_generic == other.is_generic
265+
and self.is_protocol == other.is_protocol
266+
and self.metaclass_name == other.metaclass_name
267+
and self.has_decorators == other.has_decorators)
268+
249269
def to_proto(self) -> symbols_pb2.ClassSymbol:
250270
pb_class = symbols_pb2.ClassSymbol()
251271
pb_class.name = self.name
@@ -266,6 +286,86 @@ def to_proto(self) -> symbols_pb2.ClassSymbol:
266286
return pb_class
267287

268288

289+
class MergedFunctionSymbol:
290+
def __init__(self, function_symbol: FunctionSymbol, valid_for: List[str]):
291+
self.function_symbol = function_symbol
292+
self.valid_for = valid_for
293+
294+
def to_proto(self) -> symbols_pb2.FunctionSymbol:
295+
pb_func = self.function_symbol.to_proto()
296+
for elem in self.valid_for:
297+
pb_func.valid_for.append(elem)
298+
return pb_func
299+
300+
301+
class MergedOverloadedFunctionSymbol:
302+
def __init__(self, overloaded_function_symbol: OverloadedFunctionSymbol, valid_for: List[str]):
303+
self.overloaded_function_symbol = overloaded_function_symbol
304+
self.valid_for = valid_for
305+
306+
def to_proto(self) -> symbols_pb2.FunctionSymbol:
307+
pb_func = self.overloaded_function_symbol.to_proto()
308+
for elem in self.valid_for:
309+
pb_func.valid_for.append(elem)
310+
return pb_func
311+
312+
313+
class MergedClassSymbol:
314+
def __init__(self, reference_class_symbols: ClassSymbol, merged_methods, merged_overloaded_methods,
315+
valid_for: List[str]):
316+
# nested class symbols functions are not relevant anymore
317+
self.class_symbol = reference_class_symbols
318+
self.methods = merged_methods
319+
self.overloaded_methods = merged_overloaded_methods
320+
self.valid_for = valid_for
321+
322+
def to_proto(self) -> symbols_pb2.ClassSymbol:
323+
pb_class = symbols_pb2.ClassSymbol()
324+
pb_class.name = self.class_symbol.name
325+
pb_class.fully_qualified_name = self.class_symbol.fullname
326+
pb_class.super_classes.extend(self.class_symbol.super_classes)
327+
pb_class.mro.extend(self.class_symbol.mro)
328+
pb_class.has_decorators = self.class_symbol.has_decorators
329+
pb_class.has_metaclass = self.class_symbol.has_metaclass
330+
pb_class.is_enum = self.class_symbol.is_enum
331+
pb_class.is_generic = self.class_symbol.is_generic
332+
pb_class.is_protocol = self.class_symbol.is_protocol
333+
if self.class_symbol.metaclass_name is not None:
334+
pb_class.metaclass_name = self.class_symbol.metaclass_name
335+
for method in self.methods:
336+
for elem in self.methods[method]:
337+
pb_class.methods.append(elem.to_proto())
338+
for overloaded_func in self.overloaded_methods:
339+
for elem in self.overloaded_methods[overloaded_func]:
340+
pb_class.overloaded_methods.append(elem.to_proto())
341+
for elem in self.valid_for:
342+
pb_class.valid_for.append(elem)
343+
return pb_class
344+
345+
346+
class MergedModuleSymbol:
347+
def __init__(self, fullname, classes, functions, overloaded_functions):
348+
self.fullname = fullname
349+
self.classes = classes
350+
self.functions = functions
351+
self.overloaded_functions = overloaded_functions
352+
353+
def to_proto(self):
354+
pb_module = symbols_pb2.ModuleSymbol()
355+
pb_module.name = self.fullname # FIXME: is it even useful to have name?
356+
pb_module.fully_qualified_name = self.fullname
357+
for cls in self.classes:
358+
for elem in self.classes[cls]:
359+
pb_module.classes.append(elem.to_proto())
360+
for func in self.functions:
361+
for elem in self.functions[func]:
362+
pb_module.functions.append(elem.to_proto())
363+
for overloaded_func in self.overloaded_functions:
364+
for elem in self.overloaded_functions[overloaded_func]:
365+
pb_module.overloaded_functions.append(elem.to_proto())
366+
return pb_module
367+
368+
269369
class ModuleSymbol:
270370
def __init__(self, mypy_file: mpn.MypyFile):
271371
self.name = mypy_file.name
@@ -338,15 +438,25 @@ def extract_return_type(func_def: mpn.FuncDef):
338438
return TypeDescriptor(func_type.ret_type)
339439

340440

341-
def save_module(mypy_file: mpn.MypyFile, save_as_text=True, output_dir_name="output",
342-
save_location="../../src/main/resources/org/sonar/python/types"):
343-
ms = ModuleSymbol(mypy_file)
441+
def save_module(ms: Union[ModuleSymbol, MergedModuleSymbol], is_debug=False, debug_dir="output"):
344442
ms_pb = ms.to_proto()
345-
save_dir = f"{save_location}/{output_dir_name}"
346-
save_string = str(ms_pb) if save_as_text else ms_pb.SerializeToString()
347-
open_mode = "w" if save_as_text else "wb"
443+
save_dir = "../../src/main/resources/org/sonar/python/types/protobuf" if not is_debug else f"../{debug_dir}"
444+
save_string = ms_pb.SerializeToString() if not is_debug else str(ms_pb)
445+
open_mode = "wb" if not is_debug else "w"
348446
save_dir_path = os.path.join(CURRENT_PATH, save_dir)
349447
if not os.path.exists(save_dir_path):
350448
os.makedirs(save_dir_path)
351-
with open(f"{save_dir_path}/{ms.fullname}.protobuf", open_mode) as f:
449+
save_name = ms.fullname if not is_python_2_only_exception(ms) else f"2@{ms.fullname}"
450+
with open(f"{save_dir_path}/{save_name}.protobuf", open_mode) as f:
352451
f.write(save_string)
452+
453+
454+
def is_python_2_only_exception(ms) -> bool:
455+
""" This methods aims to flag some Python 2 modules whose name differ from their Python 3 counterpart
456+
by capitalization only. This is done to avoid conflicts in the saved file for OS which are not case sensitive
457+
(e.g Windows and macOS)
458+
"""
459+
if (not isinstance(ms, MergedModuleSymbol)
460+
or ms.fullname not in ['ConfigParser', 'Queue', 'SocketServer']):
461+
return False
462+
return True
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
from typing import Dict, Set, List
2+
3+
from serializer.symbols import ModuleSymbol, MergedFunctionSymbol, MergedClassSymbol, MergedOverloadedFunctionSymbol, \
4+
MergedModuleSymbol
5+
from serializer import typeshed_serializer as ts
6+
7+
SUPPORTED_PYTHON_VERSIONS = ((2, 7), (3, 5), (3, 6), (3, 7), (3, 8), (3, 9))
8+
9+
10+
def build_multiple_python_version() -> Dict[str, Dict[str, ModuleSymbol]]:
11+
model_by_version: Dict[str, Dict[str, ModuleSymbol]] = {}
12+
for major, minor in SUPPORTED_PYTHON_VERSIONS:
13+
build_result = ts.walk_typeshed_stdlib(ts.get_options((major, minor)))
14+
modules = {}
15+
for file in build_result.files:
16+
ms = ModuleSymbol(build_result.files.get(file))
17+
modules[ms.fullname] = ms
18+
model_by_version[f"{major}{minor}"] = modules
19+
return model_by_version
20+
21+
22+
def merge_multiple_python_versions():
23+
model_by_version = build_multiple_python_version()
24+
all_python_modules: Set[str] = set()
25+
for version in model_by_version:
26+
model = model_by_version[version]
27+
for module_fqn in model:
28+
mod: ModuleSymbol = model[module_fqn]
29+
all_python_modules.add(mod.fullname)
30+
merged_modules = merge_modules(all_python_modules, model_by_version)
31+
return merged_modules
32+
33+
34+
def merge_modules(all_python_modules: Set[str], model_by_version: Dict[str, Dict[str, ModuleSymbol]]):
35+
merged_modules: Dict[str, MergedModuleSymbol] = {}
36+
for python_mod in all_python_modules:
37+
handled_classes: Dict[str, List[MergedClassSymbol]] = {}
38+
handled_funcs: Dict[str, List[MergedFunctionSymbol]] = {}
39+
handled_overloaded_functions: Dict[str, List[MergedOverloadedFunctionSymbol]] = {}
40+
merged_modules[python_mod] = MergedModuleSymbol(python_mod, handled_classes,
41+
handled_funcs, handled_overloaded_functions)
42+
for version in model_by_version:
43+
model = model_by_version[version]
44+
# get current module
45+
if python_mod not in model:
46+
continue
47+
current_module = model[python_mod]
48+
merge_classes(current_module, handled_classes, version)
49+
merge_functions(current_module, handled_funcs, version)
50+
merge_overloaded_functions(current_module, handled_overloaded_functions, version)
51+
return merged_modules
52+
53+
54+
def merge_classes(current_module, handled_classes, version):
55+
for mod_class in current_module.classes:
56+
if mod_class.fullname not in handled_classes:
57+
functions = {}
58+
overloaded_functions = {}
59+
merge_functions(mod_class, functions, version)
60+
merge_overloaded_functions(mod_class, overloaded_functions, version)
61+
handled_classes[mod_class.fullname] = [MergedClassSymbol(mod_class, functions,
62+
overloaded_functions, [version])]
63+
else:
64+
# merge
65+
compared = handled_classes[mod_class.fullname]
66+
for elem in compared:
67+
if elem.class_symbol == mod_class:
68+
functions = elem.methods
69+
overloaded_functions = elem.overloaded_methods
70+
merge_functions(mod_class, functions, version)
71+
merge_overloaded_functions(mod_class, overloaded_functions, version)
72+
elem.valid_for.append(version)
73+
break
74+
else:
75+
functions = {}
76+
overloaded_functions = {}
77+
merge_functions(mod_class, functions, version)
78+
merge_overloaded_functions(mod_class, overloaded_functions, version)
79+
compared.append(MergedClassSymbol(mod_class, functions, overloaded_functions, [version]))
80+
81+
82+
def merge_overloaded_functions(module_or_class, handled_overloaded_funcs, version):
83+
functions = (module_or_class.overloaded_functions
84+
if isinstance(module_or_class, ModuleSymbol) else module_or_class.overloaded_methods)
85+
for func in functions:
86+
if func.fullname not in handled_overloaded_funcs:
87+
# doesn't exist: we add it
88+
handled_overloaded_funcs[func.fullname] = [MergedOverloadedFunctionSymbol(func, [version])]
89+
else:
90+
compared = handled_overloaded_funcs[func.fullname]
91+
for elem in compared:
92+
if elem.overloaded_function_symbol == func:
93+
elem.valid_for.append(version)
94+
break
95+
else:
96+
# no equivalent yet in the variations: add a new one
97+
handled_overloaded_funcs[func.fullname].append(MergedOverloadedFunctionSymbol(func, [version]))
98+
99+
100+
def merge_functions(module_or_class, handled_funcs, version):
101+
functions = module_or_class.functions if isinstance(module_or_class, ModuleSymbol) else module_or_class.methods
102+
for func in functions:
103+
if func.fullname not in handled_funcs:
104+
# doesn't exist: we add it
105+
handled_funcs[func.fullname] = [MergedFunctionSymbol(func, [version])]
106+
else:
107+
compared = handled_funcs[func.fullname]
108+
for elem in compared:
109+
if elem.function_symbol == func:
110+
elem.valid_for.append(version)
111+
break
112+
else:
113+
# no equivalent yet in the variations: add a new one
114+
handled_funcs[func.fullname].append(MergedFunctionSymbol(func, [version]))

python-frontend/typeshed_serializer/serializer/typeshed_serializer.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,23 @@
11
import os
2-
import sys
3-
import collections
2+
43
from mypy import build, options
54

6-
from serializer.symbols import save_module
5+
from serializer import symbols_merger, symbols
76

8-
VersionInfoTuple = collections.namedtuple('version_info', ['major', 'minor', 'micro', 'releaselevel', 'serial'])
97
STDLIB_PATH = "../resources/typeshed/stdlib"
108
CURRENT_PATH = os.path.dirname(__file__)
119

1210

13-
def get_options(python_version=None):
11+
def get_options(python_version=(3, 8)):
1412
opt = options.Options()
1513
# Setting incremental to false to avoid issues with mypy caching
1614
opt.incremental = False
17-
if python_version is not None:
18-
opt.python_version = python_version
15+
opt.python_version = python_version
1916
return opt
2017

2118

22-
def build_single_module(module_fqn: str, category="stdlib", opt=get_options()):
19+
def build_single_module(module_fqn: str, category="stdlib", python_version=(3, 8)):
20+
opt = get_options(python_version)
2321
module_source = load_single_module(module_fqn, category)
2422
build_result = build.build([module_source], opt)
2523
built_file = build_result.files.get(module_fqn)
@@ -66,24 +64,31 @@ def walk_typeshed_stdlib(opt: options.Options = get_options()):
6664
return build_result
6765

6866

69-
def serialize_typeshed_stdlib(output_dir_name="output", python_version=(3, 8)):
67+
def serialize_typeshed_stdlib(output_dir_name="output", python_version=(3, 8), is_debug=False):
7068
""" Serialize semantic model for Python standard library
7169
:param output_dir_name: Optional output directory name
7270
:param python_version: Optional version of Python to use for serialization
71+
:param is_debug: debug flag
7372
"""
7473
output_dir_name = output_dir_name if python_version >= (3, 0) else f"{output_dir_name}@python2"
7574
opt = get_options(python_version)
7675
build_result = walk_typeshed_stdlib(opt)
7776
for file in build_result.files:
78-
save_module(build_result.files.get(file), save_as_text=True, output_dir_name=output_dir_name)
77+
module_symbol = symbols.ModuleSymbol(build_result.files.get(file))
78+
symbols.save_module(module_symbol, is_debug=is_debug, debug_dir=output_dir_name)
7979

8080

8181
def serialize_typeshed_stdlib_multiple_python_version():
8282
""" Serialize semantic model for Python stdlib versions from 3.5 to 3.9
8383
"""
8484
for minor in range(5, 10):
85-
sys.version_info = VersionInfoTuple(3, minor, 0, 'final', 0)
86-
serialize_typeshed_stdlib(f"output3{minor}", (3, minor))
85+
serialize_typeshed_stdlib(f"output3{minor}", (3, minor), is_debug=True)
86+
87+
88+
def save_merged_symbols(is_debug=False):
89+
merged_modules = symbols_merger.merge_multiple_python_versions()
90+
for mod in merged_modules:
91+
symbols.save_module(merged_modules[mod], is_debug=is_debug, debug_dir="output_merge")
8792

8893

8994
def main():
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import os
2+
from unittest.mock import Mock
3+
4+
import pytest
5+
from mypy import build
6+
7+
from serializer import typeshed_serializer
8+
9+
10+
@pytest.fixture(scope="session")
11+
def typeshed_stdlib():
12+
return typeshed_serializer.walk_typeshed_stdlib()
13+
14+
15+
@pytest.fixture(scope="session")
16+
def fake_module_36_38():
17+
fake_module_path = os.path.join(os.path.dirname(__file__), "resources/fakemodule.pyi")
18+
typeshed_serializer.load_single_module = Mock(return_value=build.BuildSource(fake_module_path, "fakemodule"))
19+
fake_module_36 = typeshed_serializer.build_single_module('fakemodule', python_version=(3, 6))
20+
fake_module_38 = typeshed_serializer.build_single_module('fakemodule', python_version=(3, 8))
21+
return [fake_module_36, fake_module_38]

0 commit comments

Comments
 (0)