Skip to content

Commit 6bff37c

Browse files
waskyosipma
authored andcommitted
Switch from using jar to using zip
That way users don't need to install java to run us. Jar is just a zip file so the change is purely cosmetic. The ocaml side may be more of a rabbit hole, which then may extend to the other analyzers. There are still some things using the .jar extension: - `bchsummaries.jar` since that file is a copy of what we have in the ocaml analyzer repo: https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk/CHB/bchsummaries https://github.com/static-analysis-engineering/codehawk/blob/master/CodeHawk/CHB/bchsummaries/makejar.sh - `*functions.jar` in the analysis folder: the ocaml analyzer reads in this file and expects that filename: https://github.com/static-analysis-engineering/codehawk/blob/master/CodeHawk/CHB/bchlib/bCHPreFileIO.ml#L183-L191
1 parent e0a9f73 commit 6bff37c

File tree

9 files changed

+56
-55
lines changed

9 files changed

+56
-55
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ development and thus somewhat experimental.
2020

2121
### Requirements
2222

23-
The command-line interface requires python3.5 or higher. The analyzer
24-
requires a Java development kit (to provide the `jar` tool).
23+
The command-line interface requires python3.5 or higher.
2524

2625
Build instructions for the CodeHawk Binary Analyzer are available
2726
[here](https://github.com/static-analysis-engineering/codehawk/tree/master/CodeHawk).

chb/app/AppAccess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def __init__(
103103
"""Initializes access to analysis results."""
104104
self._path = path
105105
self._filename = filename
106-
self._deps = deps # list of summary jars registered as dependencies
106+
self._deps = deps # list of summary zips registered as dependencies
107107
self._header_ty: Type[HeaderTy] = fileformat # currently supported: elf, pe
108108

109109
self._userdata: Optional[UserData] = None

chb/cmdline/AnalysisManager.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(
8888
Arguments:
8989
- path: path of the directory that holds the target executable
9090
- filename: filename of the target executable
91-
- deps: list of summary jars
91+
- deps: list of summary zips
9292
- hints: Dictionary with items to add to the userdata file
9393
- elf/mips/arm: modifiers (default is x86 PE)
9494
"""
@@ -455,7 +455,7 @@ def _analyze_until_stable(
455455
preamble_cutoff: int = 12) -> int:
456456
cwd = os.getcwd()
457457
os.chdir(self.path) # temporary change in directory
458-
functionsjarfile = UF.get_functionsjar_filename(self.path, self.filename)
458+
functionszipfile = UF.get_functionszip_filename(self.path, self.filename)
459459
analysisdir = UF.get_analysis_dir(self.path, self.filename)
460460
cmd = [self.chx86_analyze, "-summaries", self.chsummaries]
461461
cmd.extend(["-preamble_cutoff", str(preamble_cutoff)])
@@ -510,7 +510,7 @@ def _analyze_until_stable(
510510
cmd.append("-fail_on_function_failure")
511511

512512
cmd.extend(["-analyze", self.filename])
513-
jarcmd = ["jar", "cf", functionsjarfile, "-C", analysisdir, "functions"]
513+
zipcmd = ["zip", "-r", functionszipfile, "functions"]
514514
print_progress_update("Analyzing "
515515
+ self.filename
516516
+ " (max "
@@ -543,17 +543,17 @@ def _analyze_until_stable(
543543
or (count > iterations))
544544

545545
if isfinished:
546-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
547-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
546+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
547+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
548548
fincmd = cmd + ["-collectdata"]
549549
if self.use_ssa:
550550
fincmd = fincmd + ["-ssa"]
551551
if self.no_varinvs:
552552
fincmd = fincmd + ["-no_varinvs"]
553553
chklogger.logger.debug("execute command %s", " ".join(fincmd))
554554
result = self._call_analysis(fincmd, timeout=timeout)
555-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
556-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
555+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
556+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
557557
count += 1
558558
(stable, results, r_update) = self._get_results()
559559
print_progress_update(r_update + " " + self.filename)
@@ -563,11 +563,12 @@ def _analyze_until_stable(
563563
print("\n".join(lines))
564564
return isstable == "yes"
565565

566-
chklogger.logger.debug("execute command %s", " ".join(jarcmd))
567-
subprocess.call(jarcmd, stderr=subprocess.STDOUT)
566+
chklogger.logger.debug("execute zip command %s", " ".join(zipcmd))
567+
subprocess.call(zipcmd, stderr=subprocess.STDOUT, cwd=analysisdir)
568568
result = self._call_analysis(cmd, timeout=timeout)
569569
if result != 0:
570-
chklogger.logger.debug("return cwd %s", cwd)
570+
chklogger.logger.error("zip command failed with return code %s, "
571+
"changing back to folder %s", result, cwd)
571572
os.chdir(cwd) # return to original directory
572573
print("\n".join(lines))
573574
return result

chb/cmdline/chkx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def summariescommand(args: argparse.Namespace) -> NoReturn:
245245
def summarieslistcommand(args: argparse.Namespace) -> NoReturn:
246246
print("The summaries list command provides access to function summaries.")
247247
print("It can be followed by the following subcommands:")
248-
print(" dlls output a list of dlls provided (per jarfile)")
248+
print(" dlls output a list of dlls provided (per zipfile)")
249249
print(" dll-functions <dll-1> ... <dll-n> output a list of functions for each dll listed")
250250
print(" so-functions output a list of shared-object functions (ELF)")
251251
exit(0)
@@ -433,7 +433,7 @@ def parse() -> argparse.Namespace:
433433
'--thirdpartysummaries',
434434
nargs="*",
435435
default=[],
436-
help='summary jars for third party libraries')
436+
help='summary zips for third party libraries')
437437
analyzecmd.add_argument(
438438
"--so_libraries",
439439
nargs="*",

chb/cmdline/summariescmds.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ def summaries_dlls_cmd(args: argparse.Namespace) -> NoReturn:
7676
models = ModelsAccess()
7777

7878
modeldlls = models.dlls()
79-
for jar in modeldlls:
80-
print(jar)
79+
for zip_f in modeldlls:
80+
print(zip_f)
8181
print("-" * 80)
82-
for dll in sorted(modeldlls[jar]):
82+
for dll in sorted(modeldlls[zip_f]):
8383
print(" " + dll)
8484
print("-" * 80)
8585
exit(0)
@@ -132,18 +132,18 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:
132132

133133
models = ModelsAccess()
134134

135-
# returns a dictionary with so-functions for different jars
135+
# returns a dictionary with so-functions for different zips
136136
sofunctions = models.all_so_function_summaries()
137-
for jar in sorted(sofunctions):
137+
for zip_f in sorted(sofunctions):
138138
print("\nShared object functions from "
139-
+ jar
139+
+ zip_f
140140
+ " ("
141-
+ str(len(sofunctions[jar]))
141+
+ str(len(sofunctions[zip_f]))
142142
+ ")")
143143
print("=" * 80)
144144
pdrcounter = 0
145145
pdwcounter = 0
146-
for f in sorted(sofunctions[jar], key=lambda f: f.name):
146+
for f in sorted(sofunctions[zip_f], key=lambda f: f.name):
147147
summary = models.so_function_summary(f.name)
148148
prec = summary.semantics.preconditions
149149
pdread = len([p for p in prec if p.is_deref_read])
@@ -155,7 +155,7 @@ def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:
155155
pdwcounter += 1
156156
print("=" * 80)
157157

158-
total = sum(len(sofunctions[jar]) for jar in sofunctions)
158+
total = sum(len(sofunctions[zip_f]) for zip_f in sofunctions)
159159
print(
160160
"\nTotal: "
161161
+ str(total)

chb/models/JniFunctionSummaryLibrary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class JniFunctionSummaryLibrary(L.FunctionSummaryLibrary):
4040
4141
Native methods are indexed by numbers, roughly through 231. Many of these
4242
methods are similar, differring only in the type to which they are
43-
applicable. The summaries in bchsummaries.jar make use of templates that
43+
applicable. The summaries in the bchsummaries archive make use of templates that
4444
can be instantiated for these different types.
4545
4646
For example, for jni_190.xml:

chb/models/ModelsAccess.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,56 +45,56 @@ class ModelsAccess(object):
4545
"""Main entry point for library function summaries.
4646
4747
The main summary collection is obtained from the configured
48-
bchummaries.jar. Other summary collections may be added via
49-
additional jarfiles, specified with depjars.
48+
bchummaries.zip. Other summary collections may be added via
49+
additional zipfiles, specified with depzips.
5050
"""
5151

5252
def __init__(self,
53-
depjars: Sequence[str] = []) -> None:
54-
"""Initialize library models access with jarfile."""
55-
self._bchsummariesjarfilename = Config().summaries
56-
self._depjars = depjars
53+
depzips: Sequence[str] = []) -> None:
54+
"""Initialize library models access with zipfile."""
55+
self._bchsummarieszipfilename = Config().summaries
56+
self._depzips = depzips
5757
self._bchsummaries: Optional[SummaryCollection] = None
5858
self._dependencies: Sequence[SummaryCollection] = []
5959
self._dlls: Dict[str, Sequence[str]] = {}
6060
self._sofunctionsummaries: Dict[str, Sequence[FunctionSummary]] = {}
6161

6262
@property
63-
def depjars(self) -> Sequence[str]:
64-
return self._depjars
63+
def depzips(self) -> Sequence[str]:
64+
return self._depzips
6565

6666
@property
67-
def bchsummariesjarfilename(self) -> str:
68-
return self._bchsummariesjarfilename
67+
def bchsummarieszipfilename(self) -> str:
68+
return self._bchsummarieszipfilename
6969

7070
@property
7171
def bchsummaries(self) -> SummaryCollection:
7272
if self._bchsummaries is None:
7373
self._bchsummaries = SummaryCollection(
74-
self, self.bchsummariesjarfilename)
74+
self, self.bchsummarieszipfilename)
7575
return self._bchsummaries
7676

7777
@property
7878
def dependencies(self) -> Sequence[SummaryCollection]:
7979
if len(self._dependencies) == 0:
80-
self._dependencies = [SummaryCollection(self, j) for j in self.depjars]
80+
self._dependencies = [SummaryCollection(self, j) for j in self.depzips]
8181
return self._dependencies
8282

8383
@property
8484
def stats(self) -> str:
8585
lines: List[str] = []
8686
dlls = self.dlls()
87-
for jar in dlls:
88-
lines.append(jar.ljust(20) + str(len(dlls[jar])) + " dlls")
87+
for zip_f in dlls:
88+
lines.append(zip_f.ljust(20) + str(len(dlls[zip_f])) + " dlls")
8989
return "\n".join(lines)
9090

9191
def dlls(self) -> Mapping[str, Sequence[str]]:
92-
"""Return a mapping from jarfilename to list of function names."""
92+
"""Return a mapping from zipfilename to list of function names."""
9393

9494
if len(self._dlls) == 0:
9595
self._dlls["bchsummaries"] = self.bchsummaries.dlls
9696
for d in self.dependencies:
97-
self._dlls[d.jarfilename] = d.dlls
97+
self._dlls[d.zipfilename] = d.dlls
9898
return self._dlls
9999

100100
def has_dll_function_summary(self, dll: str, fname: str) -> bool:
@@ -140,15 +140,15 @@ def so_function_summary(self, fname: str) -> FunctionSummary:
140140
return self.bchsummaries.so_function_summary(fname)
141141

142142
def all_so_function_summaries(self) -> Mapping[str, Sequence[FunctionSummary]]:
143-
"""Return a mapping from jarfilename to list of function summaries."""
143+
"""Return a mapping from zipfilename to list of function summaries."""
144144

145145
if len(self._sofunctionsummaries) == 0:
146146
sosummaries = self.bchsummaries.all_so_function_summaries()
147147
self._sofunctionsummaries["bchsummaries"] = sosummaries
148148
for d in self.dependencies:
149149
if d.has_so_functions():
150150
self._sofunctionsummaries[
151-
d.jarfilename] = d.all_so_function_summaries()
151+
d.zipfilename] = d.all_so_function_summaries()
152152
return self._sofunctionsummaries
153153

154154
def enum_definitions(self) -> Mapping[str, DllEnumDefinitions]:

chb/models/SummaryCollection.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@
5151

5252

5353
class SummaryCollection:
54-
"""Represents all summary entities in a single jar file."""
54+
"""Represents all summary entities in a single zip file."""
5555

5656
def __init__(
5757
self,
5858
models: "ModelsAccess",
59-
jarfilename: str) -> None:
59+
zipfilename: str) -> None:
6060
self._models = models
61-
self._jarfilename = jarfilename
62-
self._jarfile = zipfile.ZipFile(self.jarfilename, "r")
61+
self._zipfilename = zipfilename
62+
self._zipfile = zipfile.ZipFile(self.zipfilename, "r")
6363
self._filenames: List[str] = []
6464
self._directorynames: List[str] = []
6565
self._dlls: List[str] = []
@@ -73,17 +73,17 @@ def models(self) -> "ModelsAccess":
7373
return self._models
7474

7575
@property
76-
def jarfile(self) -> zipfile.ZipFile:
77-
return self._jarfile
76+
def zipfile(self) -> zipfile.ZipFile:
77+
return self._zipfile
7878

7979
@property
80-
def jarfilename(self) -> str:
81-
return self._jarfilename
80+
def zipfilename(self) -> str:
81+
return self._zipfilename
8282

8383
@property
8484
def filenames(self) -> List[str]:
8585
if len(self._filenames) == 0:
86-
for info in self.jarfile.infolist():
86+
for info in self.zipfile.infolist():
8787
self._filenames.append(info.filename)
8888
return self._filenames
8989

@@ -336,7 +336,7 @@ def retrieve_ref_jni_function_summary_xnode(
336336
raise UF.CHBError("Retrieval of jni references not implemented yet")
337337

338338
def _get_summary_xnode(self, filename: str, tag: str) -> ET.Element:
339-
zfile = self.jarfile.read(filename).decode('utf-8')
339+
zfile = self.zipfile.read(filename).decode('utf-8')
340340
try:
341341
xnode = ET.fromstring(str(zfile)).find(tag)
342342
except ET.ParseError as e:

chb/util/fileutil.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
x_global_state.xml
5151
x_global_locations.xml
5252
x_system_info.xml
53-
x_functions.jar
53+
x_functions.zip
5454
x_asm.log
5555
x_orphan.log
5656
x_bdict.log
@@ -637,8 +637,9 @@ def get_interface_dictionary_xnode(path: str, xfile: str) -> ET.Element:
637637
return get_chb_xnode(filename, "interface-dictionary")
638638

639639

640-
def get_functionsjar_filename(path: str, xfile: str) -> str:
640+
def get_functionszip_filename(path: str, xfile: str) -> str:
641641
fdir = get_analysis_dir(path, xfile)
642+
# For now we keep the .jar extension until we update the ocaml analyzer
642643
return get_chb_filename(fdir, xfile, "functions.jar")
643644

644645

0 commit comments

Comments
 (0)