1818import tempfile
1919import zipfile
2020from configparser import ConfigParser
21+ from fnmatch import fnmatchcase
2122from pathlib import Path
2223from typing import NamedTuple
2324
@@ -83,6 +84,10 @@ class PythonEggPkgInfoFile(models.DatafileHandler):
8384
8485 @classmethod
8586 def parse (cls , location , package_only = False ):
87+ """
88+ Parse package data from a PKG-INFO file and other manifests present in
89+ neighboring files as needed when an installed layout is found.
90+ """
8691 yield parse_metadata (
8792 location = location ,
8893 datasource_id = cls .datasource_id ,
@@ -108,6 +113,10 @@ class PythonEditableInstallationPkgInfoFile(models.DatafileHandler):
108113
109114 @classmethod
110115 def parse (cls , location , package_only = False ):
116+ """
117+ Parse package data from a PKG-INFO file and other manifests present in
118+ neighboring files as needed when an installed layout is found.
119+ """
111120 yield parse_metadata (
112121 location = location ,
113122 datasource_id = cls .datasource_id ,
@@ -150,12 +159,11 @@ class BaseExtractedPythonLayout(models.DatafileHandler):
150159 def assemble (cls , package_data , resource , codebase , package_adder ):
151160 # a source distribution can have many manifests
152161 datafile_name_patterns = (
153- 'Pipfile.lock' ,
154- 'Pipfile' ,
155- ) + PipRequirementsFileHandler . path_patterns + PyprojectTomlHandler . path_patterns
162+ PipfileHandler . path_patterns + PipfileLockHandler . path_patterns
163+ + PipRequirementsFileHandler . path_patterns + PyprojectTomlHandler . path_patterns
164+ )
156165
157- # TODO: we want PKG-INFO first, then (setup.py, setup.cfg), then pyproject.toml for poetry
158- # then we have the rest of the lock files (pipfile, pipfile.lock, etc.)
166+ is_datafile_pypi = any (fnmatchcase (resource .path , pat ) for pat in datafile_name_patterns )
159167
160168 package_resource = None
161169 if resource .name == 'PKG-INFO' :
@@ -186,18 +194,21 @@ def assemble(cls, package_data, resource, codebase, package_adder):
186194 continue
187195 package_resource = child
188196 break
189- elif resource .name in datafile_name_patterns :
197+
198+ elif is_datafile_pypi :
190199 if resource .has_parent ():
191200 siblings = resource .siblings (codebase )
192- package_resource = [r for r in siblings if r .name == 'PKG-INFO' ]
201+ package_resources = [r for r in siblings if r .name == 'PKG-INFO' ]
193202 if package_resource :
194- package_resource = package_resource [0 ]
203+ package_resource = package_resources [0 ]
195204
196205 package = None
197206 if package_resource :
198207 pkg_data = package_resource .package_data [0 ]
199208 pkg_data = models .PackageData .from_dict (pkg_data )
200209 if pkg_data .purl :
210+ # We yield only the package and the resource, and not dependencies because
211+ # PKG-INFO also has the dependencies from
201212 package = create_package_from_package_data (
202213 package_data = pkg_data ,
203214 datafile_path = package_resource .path
@@ -207,11 +218,6 @@ def assemble(cls, package_data, resource, codebase, package_adder):
207218 package_adder (package .package_uid , package_resource , codebase )
208219 yield package_resource
209220
210- yield from yield_dependencies_from_package_data (
211- package_data = pkg_data ,
212- datafile_path = package_resource .path ,
213- package_uid = package .package_uid
214- )
215221 else :
216222 setup_resources = []
217223 if resource .has_parent ():
@@ -221,31 +227,50 @@ def assemble(cls, package_data, resource, codebase, package_adder):
221227 if r .name in ('setup.py' , 'setup.cfg' )
222228 and r .package_data
223229 ]
224-
225- setup_package_data = [
226- (setup_resource , models .PackageData .from_dict (setup_resource .package_data [0 ]))
227- for setup_resource in setup_resources
228- ]
229- setup_package_data = sorted (setup_package_data , key = lambda s : bool (s [1 ].purl ), reverse = True )
230- for setup_resource , setup_pkg_data in setup_package_data :
231- if setup_pkg_data .purl :
232- if not package :
233- package = create_package_from_package_data (
230+ if setup_resources :
231+ setup_package_data = [
232+ (setup_resource , models .PackageData .from_dict (setup_resource .package_data [0 ]))
233+ for setup_resource in setup_resources
234+ ]
235+ setup_package_data = sorted (setup_package_data , key = lambda s : bool (s [1 ].purl ), reverse = True )
236+ for setup_resource , setup_pkg_data in setup_package_data :
237+ if setup_pkg_data .purl :
238+ if not package :
239+ package = create_package_from_package_data (
240+ package_data = setup_pkg_data ,
241+ datafile_path = setup_resource .path ,
242+ )
243+ yield package
244+ package_resource = setup_resource
245+ else :
246+ package .update (setup_pkg_data , setup_resource .path )
247+ if package :
248+ for setup_resource , setup_pkg_data in setup_package_data :
249+ package_adder (package .package_uid , setup_resource , codebase )
250+ yield setup_resource
251+
252+ yield from yield_dependencies_from_package_data (
234253 package_data = setup_pkg_data ,
235254 datafile_path = setup_resource .path ,
255+ package_uid = package .package_uid
236256 )
237- yield package
238- package_resource = setup_resource
239- else :
240- package .update (setup_pkg_data , setup_resource .path )
241- if package :
242- for setup_resource , setup_pkg_data in setup_package_data :
243- package_adder (package .package_uid , setup_resource , codebase )
244- yield setup_resource
257+ else :
258+ package_resource = resource
259+ pkg_data = package_resource .package_data [0 ]
260+ pkg_data = models .PackageData .from_dict (pkg_data )
261+ if pkg_data .purl :
262+ package = create_package_from_package_data (
263+ package_data = pkg_data ,
264+ datafile_path = package_resource .path
265+ )
266+ yield package
267+
268+ package_adder (package .package_uid , package_resource , codebase )
269+ yield package_resource
245270
246271 yield from yield_dependencies_from_package_data (
247- package_data = setup_pkg_data ,
248- datafile_path = setup_resource .path ,
272+ package_data = pkg_data ,
273+ datafile_path = package_resource .path ,
249274 package_uid = package .package_uid
250275 )
251276
@@ -275,12 +300,20 @@ def assemble(cls, package_data, resource, codebase, package_adder):
275300 else :
276301 package_uid = None
277302
303+ # Yield dependencies from sibling manifests
278304 if package_resource :
279305 for sibling in package_resource .siblings (codebase ):
280- if sibling and sibling .name in datafile_name_patterns :
306+ if not sibling :
307+ continue
308+
309+ is_sibling_pypi_manifest = any (
310+ fnmatchcase (sibling .path , pat )
311+ for pat in datafile_name_patterns
312+ )
313+ if is_sibling_pypi_manifest :
281314 yield from yield_dependencies_from_package_resource (
282315 resource = sibling ,
283- package_uid = package_uid
316+ package_uid = package_uid ,
284317 )
285318
286319 if package_uid and package_uid not in sibling .for_packages :
@@ -981,6 +1014,10 @@ def parse_metadata(location, datasource_id, package_type, package_only=False):
9811014 if license_file :
9821015 extra_data ['license_file' ] = license_file
9831016
1017+ # FIXME: We are getting dependencies from other sibling files, this is duplicated
1018+ # data at the package_data level, is this necessary? We also have the entire dependency
1019+ # relationships here at requires.txt present in ``.egg-info`` should we store these
1020+ # nicely?
9841021 dependencies = get_dist_dependencies (dist )
9851022 file_references = list (get_file_references (dist ))
9861023
@@ -1240,6 +1277,8 @@ def parse(cls, location, package_only=False):
12401277 with open (location ) as f :
12411278 parser .read_file (f )
12421279
1280+ extra_data = {}
1281+
12431282 for section in parser .values ():
12441283 if section .name == 'options' :
12451284 scope_by_sub_section = {
@@ -1255,22 +1294,10 @@ def parse(cls, location, package_only=False):
12551294 reqs = list (get_requirement_from_section (section = section , sub_section = sub_section ))
12561295 dependent_packages .extend (cls .parse_reqs (reqs , scope ))
12571296 continue
1297+
1298+ # This is not a dependency, merely a required python version
12581299 python_requires_specifier = section [sub_section ]
1259- purl = PackageURL (
1260- type = "generic" ,
1261- name = "python" ,
1262- )
1263- resolved_purl = get_resolved_purl (purl = purl , specifiers = SpecifierSet (python_requires_specifier ))
1264- dependent_packages .append (
1265- models .DependentPackage (
1266- purl = str (resolved_purl .purl ),
1267- scope = scope ,
1268- is_runtime = True ,
1269- is_optional = False ,
1270- is_resolved = resolved_purl .is_resolved ,
1271- extracted_requirement = f"python_requires{ python_requires_specifier } " ,
1272- )
1273- )
1300+ extra_data ["python_requires" ] = python_requires_specifier
12741301
12751302 if section .name == "options.extras_require" :
12761303 for sub_section in section :
0 commit comments