Skip to content

Commit 721488d

Browse files
committed
Updated manifest detection in classify.py to use dynamic handler-based patterns
Replaced the _MANIFEST_ENDS list in summarycode/classify.py , that was a hardcoded list of extensions with a dynamically generated set of manifest file extensions derived from APPLICATION_PACKAGE_DATAFILE_HANDLERS. It will ensures that the is_manifest classification flag uses latestt and comprehensive patterns that are being defined across all supported package handlers.
1 parent 4b57a7f commit 721488d

File tree

1 file changed

+26
-45
lines changed

1 file changed

+26
-45
lines changed

src/summarycode/classify.py

Lines changed: 26 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,31 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
11+
12+
def get_dynamic_manifestends():
13+
14+
"""
15+
This function builds a dynamic list of manifest file extensions that are known
16+
from all data file handlers in APPLICATION_PACKAGE_DATAFILE_HANDLERS
17+
"""
18+
19+
manifest_ends = set()
20+
for handler_class in APPLICATION_PACKAGE_DATAFILE_HANDLERS:
21+
patterns = getattr(handler_class, 'path_patterns',[])
22+
for pattern in patterns :
23+
if pattern.startswith('*'):
24+
# Extract extension, e.g., '*.json' -> '.json'
25+
ext = pattern[1:]
26+
if ext:
27+
manifest_ends.add(ext.lower())
28+
elif pattern.startswith('**/*.'):
29+
# Handles glob patterns like '**/*.csproj'
30+
ext = pattern[5:]
31+
if ext:
32+
manifest_ends.add('.' + ext.lower())
33+
34+
return manifest_ends
1035

1136
def get_relative_path(root_path, path):
1237
"""
@@ -39,51 +64,7 @@ def get_relative_path(root_path, path):
3964
'patents',
4065
)
4166

42-
_MANIFEST_ENDS = {
43-
'.about': 'ABOUT file',
44-
'/bower.json': 'bower',
45-
'/project.clj': 'clojure',
46-
'.podspec': 'cocoapod',
47-
'/composer.json': 'composer',
48-
'/description': 'cran',
49-
'/elm-package.json': 'elm',
50-
'/+compact_manifest': 'freebsd',
51-
'+manifest': 'freebsd',
52-
'.gemspec': 'gem',
53-
'/metadata': 'gem',
54-
# the extracted metadata of a gem archive
55-
'/metadata.gz-extract': 'gem',
56-
'/build.gradle': 'gradle',
57-
'/project.clj': 'clojure',
58-
'.pom': 'maven',
59-
'/pom.xml': 'maven',
60-
61-
'.cabal': 'haskell',
62-
'/haxelib.json': 'haxe',
63-
'/package.json': 'npm',
64-
'.nuspec': 'nuget',
65-
'.pod': 'perl',
66-
'/meta.yml': 'perl',
67-
'/dist.ini': 'perl',
68-
69-
'/pipfile': 'pypi',
70-
'/setup.cfg': 'pypi',
71-
'/setup.py': 'pypi',
72-
'/PKG-INFO': 'pypi',
73-
'/pyproject.toml': 'pypi',
74-
'.spec': 'rpm',
75-
'/cargo.toml': 'rust',
76-
'.spdx': 'spdx',
77-
'/dependencies': 'generic',
78-
79-
# note that these two cannot be top-level for now
80-
'debian/copyright': 'deb',
81-
'meta-inf/manifest.mf': 'maven',
82-
83-
# TODO: Maven also has sometimes a pom under META-INF/
84-
# 'META-INF/manifest.mf': 'JAR and OSGI',
85-
86-
}
67+
_MANIFEST_ENDS = get_dynamic_manifestends()
8768

8869
MANIFEST_ENDS = tuple(_MANIFEST_ENDS)
8970

0 commit comments

Comments
 (0)