-
-
Notifications
You must be signed in to change notification settings - Fork 38
Add support to mine maven Package-URLs #660 #678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
b12150b
e4fdae9
8f1f9ac
837d0d1
b92a8bd
6b3372d
4ff859b
44f4363
b0a3d6e
d9c8b8d
ac7f6d8
1bfb2d8
dda2bca
74bd21c
810956f
0d441f9
e5ed41a
eb4a4fb
289c336
88956bf
9449ee2
a35a351
81b1243
a225502
d126ea0
113bce5
78045ef
644fbc0
77295a3
bc240f2
fabe276
0b36220
16adc5c
ac3032a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
| # The ScanCode.io software is licensed under the Apache License version 2.0. | ||
| # Data generated with ScanCode.io is provided as-is without warranties. | ||
| # ScanCode is a trademark of nexB Inc. | ||
| # | ||
| # You may not use this software except in compliance with the License. | ||
| # You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software distributed | ||
| # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| # CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations under the License. | ||
| # | ||
| # Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
| # OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
| # ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
| # for any legal advice. | ||
| # | ||
| # ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
| # Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
|
||
|
|
||
| from minecode_pipeline.pipes import maven | ||
| from scanpipe.pipelines.publish_to_federatedcode import PublishToFederatedCode | ||
|
|
||
|
|
||
| class MineMaven(PublishToFederatedCode): | ||
| """ | ||
| Create DiscoveredPackages for packages found on maven: | ||
| - input: url of maven repo | ||
| - process index | ||
| - collect purls, grouped by package | ||
| - write to files | ||
| - publish to fetchcode | ||
| - loop | ||
|
|
||
| """ | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return ( | ||
| cls.check_federatedcode_eligibility, | ||
| cls.collect_packages_from_maven, | ||
| ) | ||
|
|
||
| def collect_packages_from_maven(self): | ||
| maven.collect_packages_from_maven(self.project, self.log) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,11 +12,12 @@ | |
|
|
||
| from pathlib import Path | ||
|
|
||
| from aboutcode.hashid import PURLS_FILENAME | ||
| from aboutcode import hashid | ||
| from scanpipe.pipes import federatedcode | ||
|
|
||
|
|
||
| def write_packageurls_to_file(repo, base_dir, packageurls): | ||
| purl_file_rel_path = os.path.join(base_dir, PURLS_FILENAME) | ||
| purl_file_rel_path = os.path.join(base_dir, hashid.PURLS_FILENAME) | ||
| purl_file_full_path = Path(repo.working_dir) / purl_file_rel_path | ||
| write_data_to_file(path=purl_file_full_path, data=packageurls) | ||
| return purl_file_rel_path | ||
|
|
@@ -26,3 +27,28 @@ def write_data_to_file(path, data): | |
| path.parent.mkdir(parents=True, exist_ok=True) | ||
| with open(path, encoding="utf-8", mode="w") as f: | ||
| f.write(saneyaml.dump(data)) | ||
|
|
||
|
|
||
| def write_purls_to_repo(repo, package, packages, push_commit=False): | ||
|
||
| # save purls to yaml | ||
| ppath = hashid.get_package_purls_yml_file_path(package) | ||
| purls = [p.purl for p in packages] | ||
| federatedcode.write_data_as_yaml( | ||
| base_path=repo.working_dir, | ||
| file_path=ppath, | ||
| data=purls, | ||
| ) | ||
|
|
||
| change_type = "Add" if ppath in repo.untracked_files else "Update" | ||
| commit_message = f"""\ | ||
| {change_type} list of available {package} versions | ||
| """ | ||
| federatedcode.commit_changes( | ||
| repo=repo, | ||
| files_to_commit=[ppath], | ||
| commit_message=commit_message, | ||
| ) | ||
|
|
||
| # see if we should push | ||
| if push_commit: | ||
| federatedcode.push_changes(repo=repo) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| # The MIT License (MIT) | ||
| # | ||
| # Copyright (c) 2014 Gustav Arngården | ||
| # | ||
| # Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| # of this software and associated documentation files (the "Software"), to deal | ||
| # in the Software without restriction, including without limitation the rights | ||
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| # copies of the Software, and to permit persons to whom the Software is | ||
| # furnished to do so, subject to the following conditions: | ||
| # | ||
| # The above copyright notice and this permission notice shall be included in all | ||
| # copies or substantial portions of the Software. | ||
| # | ||
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| # SOFTWARE. | ||
|
|
||
|
|
||
| """Reading from Java DataInputStream format.""" | ||
|
|
||
| import struct | ||
|
|
||
|
|
||
| class DataInputStream: | ||
| def __init__(self, stream): | ||
| self.stream = stream | ||
|
|
||
| def read(self, n=1): | ||
| data = self.stream.read(n) | ||
| if len(data) != n: | ||
| # this is a problem but in most cases we have reached EOF | ||
| raise EOFError | ||
| return data | ||
|
|
||
| def read_byte(self): | ||
| return struct.unpack("b", self.read(1))[0] | ||
|
|
||
| def read_long(self): | ||
| return struct.unpack(">q", self.read(8))[0] | ||
|
|
||
| def read_utf(self): | ||
| utf_length = struct.unpack(">H", self.read(2))[0] | ||
| return self.read(utf_length) | ||
|
|
||
| def read_int(self): | ||
| return struct.unpack(">i", self.read(4))[0] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| about_resource: java_stream.py | ||
| name: java_stream.py | ||
| version: 7d118ceef9746981e6bc198861125ca2bb6f920f | ||
| homepage_url: https://github.com/arngarden/python_java_datastream | ||
| owner: Gustav Arngården | ||
| copyright: Copyright (c) 2014 Gustav Arngården | ||
| download_url: https://raw.githubusercontent.com/arngarden/python_java_datastream/7d118ceef9746981e6bc198861125ca2bb6f920f/data_input_stream.py | ||
| license_text_file: license_expfession: mit | ||
| licenses: | ||
| - key: mit | ||
| file: java_stream.py.LICENSE | ||
|
|
||
| vcs_tool: git | ||
| vcs_repo: https://github.com/arngarden/python_java_datastream |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| The MIT License (MIT) | ||
|
|
||
| Copyright (c) 2014 Gustav Arngården | ||
|
|
||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| of this software and associated documentation files (the "Software"), to deal | ||
| in the Software without restriction, including without limitation the rights | ||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| copies of the Software, and to permit persons to whom the Software is | ||
| furnished to do so, subject to the following conditions: | ||
|
|
||
| The above copyright notice and this permission notice shall be included in all | ||
| copies or substantial portions of the Software. | ||
|
|
||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| SOFTWARE. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@JonoYang @AyanSinhaMahapatra I think we should avoid computing the package URL path and just use this function.
ppath = hashid.get_package_purls_yml_file_path(package)