@@ -17,7 +17,9 @@ limitations under the License.
1717package differs
1818
1919import (
20+ "bufio"
2021 "io/ioutil"
22+ "os"
2123 "path/filepath"
2224 "regexp"
2325 "strings"
@@ -92,18 +94,42 @@ func (a PipAnalyzer) getPackages(image pkgutil.Image) (map[string]map[string]uti
9294 packageName := packageMatch [1 ]
9395 version := packageMatch [2 ][:len (packageMatch [2 ])- 1 ]
9496
95- // Retrieves size for actual package/script corresponding to each dist-info metadata directory
96- // by taking the file entry alphabetically before it (for a package) or after it (for a script)
97+ // First, try and use the "top_level.txt",
98+ // Many egg packages contains a "top_level.txt" file describing the directories containing the
99+ // required code. Combining the sizes of each of these directories should give the total size.
97100 var size int64
98- if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
99- packagePath := filepath .Join (pythonPath , packageName )
100- size = pkgutil .GetSize (packagePath )
101- } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
102- size = contents [i + 1 ].Size ()
101+ topLevelReader , err := os .Open (filepath .Join (pythonPath , fileName , "top_level.txt" ))
102+ if err == nil {
103+ scanner := bufio .NewScanner (topLevelReader )
104+ scanner .Split (bufio .ScanLines )
105+ for scanner .Scan () {
106+ // check if directory exists first, then retrieve size
107+ contentPath := filepath .Join (pythonPath , scanner .Text ())
108+ if _ , err := os .Stat (contentPath ); err == nil {
109+ size = size + pkgutil .GetSize (contentPath )
110+ } else if _ , err := os .Stat (contentPath + ".py" ); err == nil {
111+ // sometimes the top level content is just a single python file; try this too
112+ size = size + pkgutil .GetSize (contentPath + ".py" )
113+ }
114+ }
103115 } else {
104- logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
105- continue
116+ // if we didn't find a top_level.txt, we'll try the previous alphabetical directory entry heuristic
117+ logrus .Infof ("unable to use top_level.txt: falling back to previous alphabetical directory entry heuristic..." )
118+
119+ // Retrieves size for actual package/script corresponding to each dist-info metadata directory
120+ // by taking the file entry alphabetically before it (for a package) or after it (for a script)
121+ // var size int64
122+ if i - 1 >= 0 && contents [i - 1 ].Name () == packageName {
123+ packagePath := filepath .Join (pythonPath , packageName )
124+ size = pkgutil .GetSize (packagePath )
125+ } else if i + 1 < len (contents ) && contents [i + 1 ].Name () == packageName + ".py" {
126+ size = contents [i + 1 ].Size ()
127+ } else {
128+ logrus .Errorf ("Could not find Python package %s for corresponding metadata info" , packageName )
129+ continue
130+ }
106131 }
132+
107133 currPackage := util.PackageInfo {Version : version , Size : size }
108134 mapPath := strings .Replace (pythonPath , path , "" , 1 )
109135 addToMap (packages , packageName , mapPath , currPackage )
0 commit comments