Skip to content

Commit 95cc0de

Browse files
committed
Generalize Skylark parser for Bazel and Buck #1678
* Update tests Signed-off-by: Jono Yang <[email protected]>
1 parent 107b642 commit 95cc0de

File tree

4 files changed

+93
-261
lines changed

4 files changed

+93
-261
lines changed

src/packagedcode/bazel.py

Lines changed: 2 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -25,98 +25,16 @@
2525
from __future__ import print_function
2626
from __future__ import unicode_literals
2727

28-
from collections import defaultdict
29-
from collections import OrderedDict
30-
import ast
3128
import os
3229

3330
import attr
3431

35-
from commoncode import fileutils
36-
from packagedcode.build import BaseBuildManifestPackage
32+
from packagedcode.build import StarlarkManifestPackage
3733
from packagedcode.utils import combine_expressions
3834
from scancode.api import get_licenses
3935

4036

4137
@attr.s()
42-
class BazelPackage(BaseBuildManifestPackage):
38+
class BazelPackage(StarlarkManifestPackage):
4339
metafiles = ('BUILD',)
4440
default_type = 'bazel'
45-
46-
@classmethod
47-
def recognize(cls, location):
48-
if not cls._is_build_manifest(location):
49-
return
50-
for package in bazel_parse(location):
51-
yield package
52-
53-
def compute_normalized_license(self):
54-
return compute_normalized_license(
55-
self.declared_license,
56-
manifest_parent_path=self.root_path
57-
)
58-
59-
60-
def bazel_parse(location):
61-
build_rules = defaultdict(list)
62-
# Thanks to the Skylark language being a Python DSL, we can use the `ast`
63-
# library to parse Bazel BUILD files
64-
with open(location, 'rb') as f:
65-
tree = ast.parse(f.read())
66-
for statement in tree.body:
67-
if (isinstance(statement, ast.Expr)
68-
or isinstance(statement, ast.Call)
69-
or isinstance(statement, ast.Assign)
70-
and isinstance(statement.value, ast.Call)
71-
and isinstance(statement.value.func, ast.Name)):
72-
rule_name = statement.value.func.id
73-
# Process the rule arguments
74-
args = OrderedDict()
75-
for kw in statement.value.keywords:
76-
arg_name = kw.arg
77-
if isinstance(kw.value, ast.Str):
78-
args[arg_name] = kw.value.s
79-
if isinstance(kw.value, ast.List):
80-
# We collect the elements of a list if the element is not a function call
81-
args[arg_name] = [elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call)]
82-
if args:
83-
build_rules[rule_name].append(args)
84-
85-
if build_rules:
86-
for rule_name, rule_instances_args in build_rules.items():
87-
for args in rule_instances_args:
88-
name = args.get('name')
89-
if not name:
90-
continue
91-
license_files = args.get('licenses')
92-
yield BazelPackage(
93-
name=name,
94-
declared_license=license_files,
95-
root_path=fileutils.parent_directory(location)
96-
)
97-
else:
98-
# If we don't find anything in the BUCK file, we yield a Package with
99-
# the parent directory as the name, like the default implementation of
100-
# `recognize()` for `BaseBuildManifestPackage`
101-
yield BazelPackage(
102-
# we use the parent directory as a name
103-
name=fileutils.file_name(fileutils.parent_directory(location))
104-
)
105-
106-
107-
def compute_normalized_license(declared_license, manifest_parent_path):
108-
"""
109-
Return a normalized license expression string detected from a list of
110-
declared license items.
111-
"""
112-
if not declared_license or not manifest_parent_path:
113-
return
114-
115-
license_expressions = []
116-
for license_file in declared_license:
117-
license_file_path = os.path.join(manifest_parent_path, license_file)
118-
if os.path.exists(license_file_path) and os.path.isfile(license_file_path):
119-
licenses = get_licenses(license_file_path)
120-
license_expressions.extend(licenses.get('license_expressions', []))
121-
122-
return combine_expressions(license_expressions)

src/packagedcode/buck.py

Lines changed: 2 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -25,185 +25,14 @@
2525
from __future__ import print_function
2626
from __future__ import unicode_literals
2727

28-
from collections import defaultdict
29-
from collections import OrderedDict
30-
import ast
3128
import os
3229

3330
import attr
3431

35-
from commoncode import fileutils
36-
from packagedcode.build import BaseBuildManifestPackage
37-
from packagedcode.utils import combine_expressions
38-
from scancode.api import get_licenses
32+
from packagedcode.build import StarlarkManifestPackage
3933

4034

4135
@attr.s()
42-
class BuckPackage(BaseBuildManifestPackage):
36+
class BuckPackage(StarlarkManifestPackage):
4337
metafiles = ('BUCK',)
4438
default_type = 'buck'
45-
46-
@classmethod
47-
def recognize(cls, location):
48-
if not cls._is_build_manifest(location):
49-
return
50-
for package in buck_parse(location):
51-
yield package
52-
53-
def compute_normalized_license(self):
54-
return compute_normalized_license(
55-
self.declared_license,
56-
manifest_parent_path=self.root_path
57-
)
58-
59-
60-
# TODO: Prune rule names that do not create things we do not care about like
61-
# `robolectric_test` or `genrule`
62-
buck_rule_names = [
63-
'command_alias',
64-
'export_file',
65-
'filegroup',
66-
'genrule',
67-
'http_archive',
68-
'http_file',
69-
'remote_file',
70-
'test_suite',
71-
'worker_tool',
72-
'zip_file',
73-
'android_aar',
74-
'android_binary',
75-
'android_build_config',
76-
'android_instrumentation_apk',
77-
'android_instrumentation_test',
78-
'android_library',
79-
'android_manifest',
80-
'android_prebuilt_aar',
81-
'android_resource',
82-
'apk_genrule',
83-
'gen_aidl',
84-
'keystore',
85-
'ndk_library',
86-
'prebuilt_jar',
87-
'prebuilt_native_library',
88-
'robolectric_test',
89-
'cxx_binary',
90-
'cxx_library',
91-
'cxx_genrule',
92-
'cxx_precompiled_header',
93-
'cxx_test',
94-
'prebuilt_cxx_library',
95-
'prebuilt_cxx_library_group',
96-
'd_binary',
97-
'd_library',
98-
'd_test',
99-
'go_binary',
100-
'go_library',
101-
'go_test',
102-
'cgo_library',
103-
'groovy_library',
104-
'halide_library',
105-
'haskell_binary',
106-
'haskell_library',
107-
'prebuilt_haskell_library',
108-
'apple_asset_catalog',
109-
'apple_binary',
110-
'apple_bundle',
111-
'apple_library',
112-
'apple_package',
113-
'apple_resource',
114-
'apple_test',
115-
'core_data_model',
116-
'prebuilt_apple_framework',
117-
'java_binary',
118-
'java_library',
119-
'java_test',
120-
'prebuilt_jar',
121-
'prebuilt_native_library',
122-
'kotlin_library',
123-
'kotlin_test',
124-
'cxx_lua_extension',
125-
'lua_binary',
126-
'lua_library',
127-
'ocaml_binary',
128-
'ocaml_library',
129-
'prebuilt_python_library',
130-
'python_binary',
131-
'python_library',
132-
'python_test',
133-
'rust_binary',
134-
'rust_library',
135-
'rust_test',
136-
'prebuilt_rust_library',
137-
'sh_binary',
138-
'sh_test',
139-
'csharp_library',
140-
'prebuilt_dotnet_library'
141-
]
142-
143-
144-
def buck_parse(location):
145-
build_rules = defaultdict(list)
146-
# Thanks to the BUCK language being a Python DSL, we can use the `ast`
147-
# library to parse BUCK files
148-
with open(location, 'rb') as f:
149-
tree = ast.parse(f.read())
150-
for statement in tree.body:
151-
# We only care about function calls or assignments to functions whose
152-
# names are in `buck_rule_names`
153-
if (isinstance(statement, ast.Expr)
154-
or isinstance(statement, ast.Call)
155-
or isinstance(statement, ast.Assign)
156-
and isinstance(statement.value, ast.Call)
157-
and isinstance(statement.value.func, ast.Name)
158-
and statement.value.func.id in buck_rule_names):
159-
rule_name = statement.value.func.id
160-
# Process the rule arguments
161-
args = OrderedDict()
162-
for kw in statement.value.keywords:
163-
arg_name = kw.arg
164-
if isinstance(kw.value, ast.Str):
165-
args[arg_name] = kw.value.s
166-
if isinstance(kw.value, ast.List):
167-
# We collect the elements of a list if the element is not a function call
168-
args[arg_name] = [elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call)]
169-
if args:
170-
build_rules[rule_name].append(args)
171-
172-
if build_rules:
173-
for rule_name, rule_instances_args in build_rules.items():
174-
for args in rule_instances_args:
175-
name = args.get('name')
176-
if not name:
177-
continue
178-
license_files = args.get('licenses')
179-
yield BuckPackage(
180-
name=name,
181-
declared_license=license_files,
182-
root_path=fileutils.parent_directory(location)
183-
)
184-
else:
185-
# If we don't find anything in the BUCK file, we yield a Package with
186-
# the parent directory as the name, like the default implementation of
187-
# `recognize()` for `BaseBuildManifestPackage`
188-
yield BuckPackage(
189-
# we use the parent directory as a name
190-
name=fileutils.file_name(fileutils.parent_directory(location))
191-
)
192-
193-
194-
def compute_normalized_license(declared_license, manifest_parent_path):
195-
"""
196-
Return a normalized license expression string detected from a list of
197-
declared license items.
198-
"""
199-
if not declared_license or not manifest_parent_path:
200-
return
201-
202-
license_expressions = []
203-
for license_file in declared_license:
204-
license_file_path = os.path.join(manifest_parent_path, license_file)
205-
if os.path.exists(license_file_path) and os.path.isfile(license_file_path):
206-
licenses = get_licenses(license_file_path)
207-
license_expressions.extend(licenses.get('license_expressions', []))
208-
209-
return combine_expressions(license_expressions)

src/packagedcode/build.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,19 @@
2626
from __future__ import print_function
2727
from __future__ import unicode_literals
2828

29+
from collections import defaultdict
30+
from collections import OrderedDict
31+
import ast
2932
import logging
33+
import os
3034

3135
import attr
3236

3337
from commoncode import filetype
3438
from commoncode import fileutils
3539
from packagedcode import models
40+
from packagedcode.utils import combine_expressions
41+
from scancode.api import get_licenses
3642

3743

3844
TRACE = False
@@ -89,3 +95,82 @@ def _is_build_manifest(cls, location):
8995
class AutotoolsPackage(BaseBuildManifestPackage):
9096
metafiles = ('configure', 'configure.ac',)
9197
default_type = 'autotools'
98+
99+
100+
starlark_rule_types = [
101+
'binary',
102+
'library'
103+
]
104+
105+
106+
@attr.s()
107+
class StarlarkManifestPackage(BaseBuildManifestPackage):
108+
@classmethod
109+
def recognize(cls, location):
110+
if not cls._is_build_manifest(location):
111+
return
112+
build_rules = defaultdict(list)
113+
# Thanks to Starlark being a Python dialect, we can use the `ast`
114+
# library to parse it
115+
with open(location, 'rb') as f:
116+
tree = ast.parse(f.read())
117+
for statement in tree.body:
118+
# We only care about function calls or assignments to functions whose
119+
# names ends with one of the strings in `rule_types`
120+
if (isinstance(statement, ast.Expr)
121+
or isinstance(statement, ast.Call)
122+
or isinstance(statement, ast.Assign)
123+
and isinstance(statement.value, ast.Call)
124+
and isinstance(statement.value.func, ast.Name)
125+
and statement.value.func.id.endswith(starlark_rule_types)):
126+
rule_name = statement.value.func.id
127+
# Process the rule arguments
128+
args = OrderedDict()
129+
for kw in statement.value.keywords:
130+
arg_name = kw.arg
131+
if isinstance(kw.value, ast.Str):
132+
args[arg_name] = kw.value.s
133+
if isinstance(kw.value, ast.List):
134+
# We collect the elements of a list if the element is not a function call
135+
args[arg_name] = [elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call)]
136+
if args:
137+
build_rules[rule_name].append(args)
138+
139+
if build_rules:
140+
for rule_name, rule_instances_args in build_rules.items():
141+
for args in rule_instances_args:
142+
name = args.get('name')
143+
if not name:
144+
continue
145+
license_files = args.get('licenses')
146+
yield cls(
147+
name=name,
148+
declared_license=license_files,
149+
root_path=fileutils.parent_directory(location)
150+
)
151+
else:
152+
# If we don't find anything in the manifest file, we yield a Package with
153+
# the parent directory as the name
154+
yield cls(
155+
name=fileutils.file_name(fileutils.parent_directory(location))
156+
)
157+
158+
def compute_normalized_license(self):
159+
"""
160+
Return a normalized license expression string detected from a list of
161+
declared license items.
162+
"""
163+
declared_license = self.declared_license
164+
manifest_parent_path = self.root_path
165+
166+
if not declared_license or not manifest_parent_path:
167+
return
168+
169+
license_expressions = []
170+
for license_file in declared_license:
171+
license_file_path = os.path.join(manifest_parent_path, license_file)
172+
if os.path.exists(license_file_path) and os.path.isfile(license_file_path):
173+
licenses = get_licenses(license_file_path)
174+
license_expressions.extend(licenses.get('license_expressions', []))
175+
176+
return combine_expressions(license_expressions)

0 commit comments

Comments
 (0)