@@ -914,6 +914,12 @@ def assemble(cls, package_data, resource, codebase, package_adder=add_to_package
914914 not be further processed,
915915 - a Dependency to add to top-level dependencies
916916
917+ Package items must be yielded before Dependency or Resource items. This
918+ is to ensure that a Package is created before we associate a Resource or
919+ Dependency to a Package. This is particulary important in the case where
920+ we are calling the `assemble()` method outside of the scancode-toolkit
921+ context.
922+
917923 The approach is to find and process all the neighboring related datafiles
918924 to this datafile at once.
919925
@@ -938,14 +944,14 @@ def assemble(cls, package_data, resource, codebase, package_adder=add_to_package
938944 if not package .license_expression :
939945 package .license_expression = cls .compute_normalized_license (package )
940946
947+ yield package
948+
941949 cls .assign_package_to_resources (
942950 package = package ,
943951 resource = resource ,
944952 codebase = codebase ,
945953 package_adder = package_adder ,
946954 )
947-
948- yield package
949955 else :
950956 # we have no package, so deps are not for a specific package uid
951957 package_uid = None
@@ -1038,6 +1044,13 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10381044 This is a convenience method that subclasses can reuse when overriding
10391045 `assemble()`
10401046
1047+ Like in ``DatafileHandler.assemble()``, Package items must be yielded
1048+ before Dependency or Resource items. This is to ensure that a Package is
1049+ created before we associate a Resource or Dependency to a Package. This
1050+ is particulary important in the case where we are calling the
1051+ ``assemble()`` method outside of the scancode-toolkit context, as
1052+ ``assemble()`` can call ``assemble_from_many()``.
1053+
10411054 NOTE: ATTENTION!: this may not work well for datafile that yield
10421055 multiple PackageData for unrelated Packages
10431056 """
@@ -1047,6 +1060,12 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10471060
10481061 # process each package in sequence. The first item creates a package and
10491062 # the other only update
1063+ # We are saving the Packages, Dependencies, and Resources in lists until
1064+ # after we go through `pkgdata_resources` for all Package data, then we
1065+ # yield Packages, then Dependencies, then Resources.
1066+ dependencies = []
1067+ resources = []
1068+ resources_from_package = []
10501069 for package_data , resource in pkgdata_resources :
10511070 if not base_resource :
10521071 base_resource = resource
@@ -1059,8 +1078,6 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10591078 datafile_path = resource .path ,
10601079 )
10611080 package_uid = package .package_uid
1062- if package_uid :
1063- package_adder (package_uid , resource , codebase )
10641081 else :
10651082 # FIXME: What is the package_data is NOT for the same package as package?
10661083 # FIXME: What if the update did not do anything? (it does return True or False)
@@ -1069,31 +1086,40 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10691086 package_data = package_data ,
10701087 datafile_path = resource .path ,
10711088 )
1072- if package_uid :
1073- package_adder (package_uid , resource , codebase )
1089+
1090+ if package_uid :
1091+ resources_from_package .append ((package_uid , resource ,))
10741092
10751093 # in all cases yield possible dependencies
10761094 dependent_packages = package_data .dependencies
10771095 if dependent_packages :
1078- yield from Dependency .from_dependent_packages (
1096+ p_deps = Dependency .from_dependent_packages (
10791097 dependent_packages = dependent_packages ,
10801098 datafile_path = resource .path ,
10811099 datasource_id = package_data .datasource_id ,
10821100 package_uid = package_uid ,
10831101 )
1102+ dependencies .extend (list (p_deps ))
10841103
10851104 # we yield this as we do not want this further processed
1086- yield resource
1087-
1088- # the whole parent subtree of the base_resource is for this package
1089- if package_uid :
1090- for res in base_resource .walk (codebase ):
1091- package_adder (package_uid , res , codebase )
1105+ resources .append (resource )
10921106
1107+ # Yield Packages, Dependencies, and Resources
10931108 if package :
10941109 if not package .license_expression :
10951110 package .license_expression = cls .compute_normalized_license (package )
10961111 yield package
1112+ yield from dependencies
1113+ yield from resources
1114+
1115+ # Associate Package to Resources once they have been yielded
1116+ for package_uid , resource in resources_from_package :
1117+ package_adder (package_uid , resource , codebase )
1118+
1119+ # the whole parent subtree of the base_resource is for this package
1120+ if package_uid :
1121+ for res in base_resource .walk (codebase ):
1122+ package_adder (package_uid , res , codebase )
10971123
10981124 @classmethod
10991125 def assemble_from_many_datafiles (
0 commit comments