Skip to content

Commit 2c32af2

Browse files
committed
Switch from using jar to using zip
That way users don't need to install java to run us. Jar is just a zip file so the change is purely cosmetic. The ocaml side may be more of a rabbit hole, which then may extend to the other analyzers. There are still some things using the .jar extension: - `bchsummaries.jar` since that file is a copy of what we have in the ocaml analyzer repo: https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk/CHB/bchsummaries https://github.com/static-analysis-engineering/codehawk/blob/master/CodeHawk/CHB/bchsummaries/makejar.sh - `*functions.jar` in the analysis folder: the ocaml analyzer reads in this file and expects that filename: https://github.com/static-analysis-engineering/codehawk/blob/master/CodeHawk/CHB/bchlib/bCHPreFileIO.ml#L183-L191
1 parent e0a9f73 commit 2c32af2

File tree

9 files changed

+56
-55
lines changed

9 files changed

+56
-55
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ development and thus somewhat experimental.
2020

2121
### Requirements
2222

23-
The command-line interface requires python3.5 or higher. The analyzer
24-
requires a Java development kit (to provide the `jar` tool).
23+
The command-line interface requires python3.5 or higher.
2524

2625
Build instructions for the CodeHawk Binary Analyzer are available
2726
[here](https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk).

chb/app/AppAccess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def __init__(
103103
"""Initializes access to analysis results."""
104104
self._path = path
105105
self._filename = filename
106-
self._deps = deps # list of summary jars registered as dependencies
106+
self._deps = deps # list of summary zips registered as dependencies
107107
self._header_ty: Type[HeaderTy] = fileformat # currently supported: elf, pe
108108

109109
self._userdata: Optional[UserData] = None

chb/cmdline/AnalysisManager.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(
8888
Arguments:
8989
- path: path of the directory that holds the target executable
9090
- filename: filename of the target executable
91-
- deps: list of summary jars
91+
- deps: list of summary zips
9292
- hints: Dictionary with items to add to the userdata file
9393
- elf/mips/arm: modifiers (default is x86 PE)
9494
"""
@@ -455,7 +455,7 @@ def _analyze_until_stable(
455455
preamble_cutoff: int = 12) -> int:
456456
cwd = os.getcwd()
457457
os.chdir(self.path) # temporary change in directory
458-
functionsjarfile = UF.get_functionsjar_filename(self.path, self.filename)
458+
functionszipfile = UF.get_functionszip_filename(self.path, self.filename)
459459
analysisdir = UF.get_analysis_dir(self.path, self.filename)
460460
cmd = [self.chx86_analyze, "-summaries", self.chsummaries]
461461
cmd.extend(["-preamble_cutoff", str(preamble_cutoff)])
@@ -510,7 +510,7 @@ def _analyze_until_stable(
510510
cmd.append("-fail_on_function_failure")
511511

512512
cmd.extend(["-analyze", self.filename])
513-
jarcmd = ["jar", "cf", functionsjarfile, "-C", analysisdir, "functions"]
513+
zipcmd = ["zip", "-r", functionszipfile, "functions"]
514514
print_progress_update("Analyzing "
515515
+ self.filename
516516
+ " (max "
@@ -543,17 +543,17 @@ def _analyze_until_stable(
543543
or (count > iterations))
544544

545545
if isfinished:
546-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
547-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
546+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
547+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
548548
fincmd = cmd + ["-collectdata"]
549549
if self.use_ssa:
550550
fincmd = fincmd + ["-ssa"]
551551
if self.no_varinvs:
552552
fincmd = fincmd + ["-no_varinvs"]
553553
chklogger.logger.debug("execute command %s", " ".join(fincmd))
554554
result = self._call_analysis(fincmd, timeout=timeout)
555-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
556-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
555+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
556+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
557557
count += 1
558558
(stable, results, r_update) = self._get_results()
559559
print_progress_update(r_update + " " + self.filename)
@@ -563,11 +563,12 @@ def _analyze_until_stable(
563563
print("\n".join(lines))
564564
return isstable == "yes"
565565

566-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
567-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
566+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
567+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
568568
result = self._call_analysis(cmd, timeout=timeout)
569569
if result != 0:
570-
chklogger.logger.debug("return cwd %s", cwd)
570+
chklogger.logger.error("zip command failed with return code %s, "
571+
"changing back to folder %s", result, cwd)
571572
os.chdir(cwd) # return to original directory
572573
print("\n".join(lines))
573574
return result

chb/cmdline/chkx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def summariescommand(args: argparse.Namespace) -> NoReturn:
245245
def summarieslistcommand(args: argparse.Namespace) -> NoReturn:
246246
print("The summaries list command provides access to function summaries.")
247247
print("It can be followed by the following subcommands:")
248-
print(" dlls output a list of dlls provided (per jarfile)")
248+
print(" dlls output a list of dlls provided (per zipfile)")
249249
print(" dll-functions <dll-1> ... <dll-n> output a list of functions for each dll listed")
250250
print(" so-functions output a list of shared-object functions (ELF)")
251251
exit(0)
@@ -433,7 +433,7 @@ def parse() -> argparse.Namespace:
433433
'--thirdpartysummaries',
434434
nargs="*",
435435
default=[],
436-
help='summary jars for third party libraries')
436+
help='summary zips for third party libraries')
437437
analyzecmd.add_argument(
438438
"--so_libraries",
439439
nargs="*",

chb/cmdline/summariescmds.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ def summaries_dlls_cmd(args: argparse.Namespace) -> NoReturn:
7676
models = ModelsAccess()
7777

7878
modeldlls = models.dlls()
79-
for jar in modeldlls:
80-
print(jar)
79+
for zip_f in modeldlls:
80+
print(zip_f)
8181
print("-" * 80)
82-
for dll in sorted(modeldlls[jar]):
82+
for dll in sorted(modeldlls[zip_f]):
8383
print(" " + dll)
8484
print("-" * 80)
8585
exit(0)
@@ -132,18 +132,18 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:
132132

133133
models = ModelsAccess()
134134

135-
# returns a dictionary with so-functions for different jars
135+
# returns a dictionary with so-functions for different zips
136136
sofunctions = models.all_so_function_summaries()
137-
for jar in sorted(sofunctions):
137+
for zip_f in sorted(sofunctions):
138138
print("\nShared object functions from "
139-
+ jar
139+
+ zip_f
140140
+ " ("
141-
+ str(len(sofunctions[jar]))
141+
+ str(len(sofunctions[zip_f]))
142142
+ ")")
143143
print("=" * 80)
144144
pdrcounter = 0
145145
pdwcounter = 0
146-
for f in sorted(sofunctions[jar], key=lambda f: f.name):
146+
for f in sorted(sofunctions[zip_f], key=lambda f: f.name):
147147
summary = models.so_function_summary(f.name)
148148
prec = summary.semantics.preconditions
149149
pdread = len([p for p in prec if p.is_deref_read])
@@ -155,7 +155,7 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:
155155
pdwcounter += 1
156156
print("=" * 80)
157157

158-
total = sum(len(sofunctions[jar]) for jar in sofunctions)
158+
total = sum(len(sofunctions[zip_f]) for zip_f in sofunctions)
159159
print(
160160
"\nTotal: "
161161
+ str(total)

chb/models/JniFunctionSummaryLibrary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class JniFunctionSummaryLibrary(L.FunctionSummaryLibrary):
4040
4141
Native methods are indexed by numbers, roughly through 231. Many of these
4242
methods are similar, differring only in the type to which they are
43-
applicable. The summaries in bchsummaries.jar make use of templates that
43+
applicable. The summaries in the bchsummaries archive make use of templates that
4444
can be instantiated for these different types.
4545
4646
For example, for jni_190.xml:

chb/models/ModelsAccess.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,56 +45,56 @@ class ModelsAccess(object):
4545
"""Main entry point for library function summaries.
4646
4747
The main summary collection is obtained from the configured
48-
bchummaries.jar. Other summary collections may be added via
49-
additional jarfiles, specified with depjars.
48+
bchummaries.zip. Other summary collections may be added via
49+
additional zipfiles, specified with depzips.
5050
"""
5151

5252
def __init__(self,
53-
depjars: Sequence[str] = []) -> None:
54-
"""Initialize library models access with jarfile."""
55-
self._bchsummariesjarfilename = Config().summaries
56-
self._depjars = depjars
53+
depzips: Sequence[str] = []) -> None:
54+
"""Initialize library models access with zipfile."""
55+
self._bchsummarieszipfilename = Config().summaries
56+
self._depzips = depzips
5757
self._bchsummaries: Optional[SummaryCollection] = None
5858
self._dependencies: Sequence[SummaryCollection] = []
5959
self._dlls: Dict[str, Sequence[str]] = {}
6060
self._sofunctionsummaries: Dict[str, Sequence[FunctionSummary]] = {}
6161

6262
@property
63-
def depjars(self) -> Sequence[str]:
64-
return self._depjars
63+
def depzips(self) -> Sequence[str]:
64+
return self._depzips
6565

6666
@property
67-
def bchsummariesjarfilename(self) -> str:
68-
return self._bchsummariesjarfilename
67+
def bchsummarieszipfilename(self) -> str:
68+
return self._bchsummarieszipfilename
6969

7070
@property
7171
def bchsummaries(self) -> SummaryCollection:
7272
if self._bchsummaries is None:
7373
self._bchsummaries = SummaryCollection(
74-
self, self.bchsummariesjarfilename)
74+
self, self.bchsummarieszipfilename)
7575
return self._bchsummaries
7676

7777
@property
7878
def dependencies(self) -> Sequence[SummaryCollection]:
7979
if len(self._dependencies) == 0:
80-
self._dependencies = [SummaryCollection(self, j) for j in self.depjars]
80+
self._dependencies = [SummaryCollection(self, j) for j in self.depzips]
8181
return self._dependencies
8282

8383
@property
8484
def stats(self) -> str:
8585
lines: List[str] = []
8686
dlls = self.dlls()
87-
for jar in dlls:
88-
lines.append(jar.ljust(20) + str(len(dlls[jar])) + " dlls")
87+
for zip_f in dlls:
88+
lines.append(zip_f.ljust(20) + str(len(dlls[zip_f])) + " dlls")
8989
return "\n".join(lines)
9090

9191
def dlls(self) -> Mapping[str, Sequence[str]]:
92-
"""Return a mapping from jarfilename to list of function names."""
92+
"""Return a mapping from zipfilename to list of function names."""
9393

9494
if len(self._dlls) == 0:
9595
self._dlls["bchsummaries"] = self.bchsummaries.dlls
9696
for d in self.dependencies:
97-
self._dlls[d.jarfilename] = d.dlls
97+
self._dlls[d.zipfilename] = d.dlls
9898
return self._dlls
9999

100100
def has_dll_function_summary(self, dll: str, fname: str) -> bool:
@@ -140,15 +140,15 @@ def so_function_summary(self, fname: str) -> FunctionSummary:
140140
return self.bchsummaries.so_function_summary(fname)
141141

142142
def all_so_function_summaries(self) -> Mapping[str, Sequence[FunctionSummary]]:
143-
"""Return a mapping from jarfilename to list of function summaries."""
143+
"""Return a mapping from zipfilename to list of function summaries."""
144144

145145
if len(self._sofunctionsummaries) == 0:
146146
sosummaries = self.bchsummaries.all_so_function_summaries()
147147
self._sofunctionsummaries["bchsummaries"] = sosummaries
148148
for d in self.dependencies:
149149
if d.has_so_functions():
150150
self._sofunctionsummaries[
151-
d.jarfilename] = d.all_so_function_summaries()
151+
d.zipfilename] = d.all_so_function_summaries()
152152
return self._sofunctionsummaries
153153

154154
def enum_definitions(self) -> Mapping[str, DllEnumDefinitions]:

chb/models/SummaryCollection.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@
5151

5252

5353
class SummaryCollection:
54-
"""Represents all summary entities in a single jar file."""
54+
"""Represents all summary entities in a single zip file."""
5555

5656
def __init__(
5757
self,
5858
models: "ModelsAccess",
59-
jarfilename: str) -> None:
59+
zipfilename: str) -> None:
6060
self._models = models
61-
self._jarfilename = jarfilename
62-
self._jarfile = zipfile.ZipFile(self.jarfilename, "r")
61+
self._zipfilename = zipfilename
62+
self._zipfile = zipfile.ZipFile(self.zipfilename, "r")
6363
self._filenames: List[str] = []
6464
self._directorynames: List[str] = []
6565
self._dlls: List[str] = []
@@ -73,17 +73,17 @@ def models(self) -> "ModelsAccess":
7373
return self._models
7474

7575
@property
76-
def jarfile(self) -> zipfile.ZipFile:
77-
return self._jarfile
76+
def zipfile(self) -> zipfile.ZipFile:
77+
return self._zipfile
7878

7979
@property
80-
def jarfilename(self) -> str:
81-
return self._jarfilename
80+
def zipfilename(self) -> str:
81+
return self._zipfilename
8282

8383
@property
8484
def filenames(self) -> List[str]:
8585
if len(self._filenames) == 0:
86-
for info in self.jarfile.infolist():
86+
for info in self.zipfile.infolist():
8787
self._filenames.append(info.filename)
8888
return self._filenames
8989

@@ -336,7 +336,7 @@ def retrieve_ref_jni_function_summary_xnode(
336336
raise UF.CHBError("Retrieval of jni references not implemented yet")
337337

338338
def _get_summary_xnode(self, filename: str, tag: str) -> ET.Element:
339-
zfile = self.jarfile.read(filename).decode('utf-8')
339+
zfile = self.zipfile.read(filename).decode('utf-8')
340340
try:
341341
xnode = ET.fromstring(str(zfile)).find(tag)
342342
except ET.ParseError as e:

chb/util/fileutil.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
x_global_state.xml
5151
x_global_locations.xml
5252
x_system_info.xml
53-
x_functions.jar
53+
x_functions.zip
5454
x_asm.log
5555
x_orphan.log
5656
x_bdict.log
@@ -637,8 +637,9 @@ def get_interface_dictionary_xnode(path: str, xfile: str) -> ET.Element:
637637
return get_chb_xnode(filename, "interface-dictionary")
638638

639639

640-
def get_functionsjar_filename(path: str, xfile: str) -> str:
640+
def get_functionszip_filename(path: str, xfile: str) -> str:
641641
fdir = get_analysis_dir(path, xfile)
642+
# For now we keep the .jar extension until we update the ocaml analyzer
642643
return get_chb_filename(fdir, xfile, "functions.jar")
643644

644645

0 commit comments

Comments
 (0)