Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit e9854dc

Browse files
author
Alessandro Greco
committed
Version 1.0.0
1 parent 7f18309 commit e9854dc

File tree

6 files changed

+317
-0
lines changed

6 files changed

+317
-0
lines changed

PDF_Parser.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import sys
2+
import requests
3+
import base64
4+
import json
5+
from prettytable import PrettyTable
6+
import sys
7+
from termcolor import colored
8+
import os
9+
10+
11+
log_output = ""
12+
13+
def argv_error():
14+
print("./script [option] [value] ...")
15+
print("-p\t\tUse this argoument for set your PDF path\n or --path\n")
16+
print("-A\t\tUse this argoument for set your VirusTotal API Key\n or --API-Key\n")
17+
print("-v\t\tUse this argoument for view a lot more information\n or --verbose\n")
18+
print("-l\t\tUse this argoument for save in a log file all verbose information\n or --log\n")
19+
print("Examples:")
20+
print("$ python3 script.py -p <PDF_DOCUMENT_PATH> -A <VirusTotal_API_Key>")
21+
print("\tGeneric example")
22+
print("$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --verbose")
23+
print("\tIt will print everything in output")
24+
print("$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --log")
25+
print("\tIt will print everything in a log file in the same directory where is the script PDF_Parser.py")
26+
27+
def generic_error(error: str) -> None:
28+
print("\nSomething went wrong... check your private key and your pdf path :-/")
29+
print(f"\n{error}")
30+
31+
"""
32+
{
33+
"data": {
34+
"type": "analysis",
35+
"id": "<base64_file_id>"
36+
}
37+
}
38+
"""
39+
def upload_file(File_Path: str, VirusTotal_API_Key: str, verbose: bool) -> str:
40+
41+
url = "https://www.virustotal.com/api/v3/files"
42+
43+
files = {"file": (File_Path, open(File_Path, "rb"), "application/pdf")}
44+
headers = {
45+
"accept": "application/json",
46+
"x-apikey": VirusTotal_API_Key
47+
}
48+
49+
response = requests.post(url, files=files, headers=headers)
50+
51+
if response.status_code == 200:
52+
global log_output
53+
if verbose:
54+
print(response.text)
55+
if log_output != "":
56+
log_output += "\n"+response.text
57+
58+
return response.text
59+
else:
60+
generic_error(f"[*] Error occurred in upload_file function.\nError code: {response.status_code}")
61+
return "-1"
62+
63+
"""
64+
Load json response as a dict
65+
"""
66+
def json_load(json_in_string: str):
67+
return json.loads(json_in_string)
68+
69+
"""
70+
Get data -> id from VirusTotal response
71+
"""
72+
def get_base64_file_id_from_response(response: str) -> str:
73+
response_in_dict = json_load(response)
74+
return response_in_dict["data"]["id"]
75+
76+
"""
77+
Decrypt from base64
78+
"""
79+
def decrypt_from_base64(encrypted_string: str) -> str:
80+
return base64.b64decode(encrypted_string).decode('ascii')
81+
82+
83+
"""
84+
For check the file you must pass as parameters:
85+
- MD5 of a file uploaded
86+
- Your VirusTotal private Key
87+
"""
88+
def check_file(FileMD5: str, VirusTotal_API_Key: str) -> str:
89+
90+
url = f"https://www.virustotal.com/api/v3/files/{FileMD5}"
91+
92+
headers = {
93+
"accept": "application/json",
94+
"x-apikey": VirusTotal_API_Key
95+
}
96+
97+
response = requests.get(url, headers=headers)
98+
99+
if response.status_code == 200:
100+
return response.text
101+
else:
102+
generic_error(f"[*] Error occurred in check_file function.\nError code: {response.status_code}")
103+
return "-1"
104+
105+
"""
106+
Parserize the response
107+
"""
108+
def response_parser(response: str, verbose: bool):
109+
response_in_dict = json_load(response)
110+
antivirus_supported = response_in_dict["data"]["attributes"]["last_analysis_results"]
111+
112+
malicious = 0
113+
global log_output
114+
115+
116+
cve = {}
117+
for antivirus in antivirus_supported:
118+
if response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["category"] == "malicious":
119+
malicious += 1
120+
cve[f"{antivirus}"] = response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["result"]
121+
122+
table = ["Result", "CVE"]
123+
tab = PrettyTable(table)
124+
125+
for antivirus in cve.keys():
126+
tab.add_row([f"{antivirus}", f"{cve[antivirus]}"])
127+
128+
if verbose:
129+
print(tab)
130+
131+
if log_output != "":
132+
log_output += "\n"+str(tab)
133+
134+
else:
135+
for antivirus in antivirus_supported:
136+
if response_in_dict["data"]["attributes"]["last_analysis_results"][f"{antivirus}"]["category"] == "malicious":
137+
malicious += 1
138+
139+
print_string = ""
140+
color = ""
141+
if malicious > 2:
142+
print_string = 'This document is most likely malicious!!!'
143+
color = "red"
144+
elif malicious == 1:
145+
print_string = "A malicious control has been detected but it could be a false positive."
146+
color = "yellow"
147+
else:
148+
print_string = "This file is safe. :-)"
149+
color = "yellow"
150+
151+
print(colored(f'\n{print_string}', color, attrs=['reverse', 'blink']))
152+
153+
if log_output != "":
154+
log_output += "\n"+print_string
155+
156+
if __name__ == "__main__":
157+
158+
File_Path = ""
159+
VirusTotal_API_Key = ""
160+
verbose = False
161+
log = False
162+
163+
for i in range(1, len(sys.argv)):
164+
if((sys.argv[i] == "-p" or sys.argv[i] == "--path") and (len(sys.argv) > i+1)):
165+
File_Path = sys.argv[i+1]
166+
elif((sys.argv[i] == "-A" or sys.argv[i] == "--API-Key") and (len(sys.argv) > i+1)):
167+
VirusTotal_API_Key = sys.argv[i+1]
168+
elif((sys.argv[i] == "-v" or sys.argv[i] == "--verbose")):
169+
verbose = True
170+
elif((sys.argv[i] == "-l" or sys.argv[i] == "--log")):
171+
log = True
172+
elif((sys.argv[i] == "-h" or sys.argv[i] == "--help")):
173+
argv_error()
174+
exit()
175+
176+
if File_Path != "" and VirusTotal_API_Key != "":
177+
init_print = f"Your File: {File_Path}\nYour API: {VirusTotal_API_Key[:5]}***"
178+
if verbose:
179+
print(init_print)
180+
if log:
181+
log_output += "\n"+init_print
182+
else:
183+
argv_error()
184+
exit()
185+
186+
response = upload_file(File_Path, VirusTotal_API_Key, verbose)
187+
188+
if response == "-1":
189+
exit()
190+
191+
encrypted_FileMD5 = get_base64_file_id_from_response(response)
192+
193+
plaintext_FileMD5 = decrypt_from_base64(encrypted_FileMD5)
194+
195+
FileMD5 = plaintext_FileMD5.split(":")[0]
196+
MD5_print = f"MD5: {FileMD5}"
197+
if verbose:
198+
print(MD5_print)
199+
if log:
200+
log_output += "\n"+MD5_print
201+
202+
VirusTotal_response = check_file(FileMD5, VirusTotal_API_Key)
203+
204+
if VirusTotal_response == "-1":
205+
exit()
206+
207+
response_parser(VirusTotal_response, verbose)
208+
209+
f = open("PDF_Parser.log", "w")
210+
f.write(log_output)
211+
f.close()

README.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,55 @@
11
# Parser-PDF-VirusTotal-Based
22
Parser PDF Based on VirusTotal API
3+
4+
![](docs/log%20file.png)
5+
<center>Log file example</center>
6+
7+
8+
![](docs/terminal%20verbose.png)
9+
<center>Terminal output example</center>
10+
11+
12+
## How to use
13+
14+
Use the command -h or --help for receive the following output...
15+
16+
```
17+
./script [option] [value] ...
18+
-p Use this argoument for set your PDF path
19+
or --path
20+
21+
-A Use this argoument for set your VirusTotal API Key
22+
or --API-Key
23+
24+
-v Use this argoument for view a lot more information
25+
or --verbose
26+
27+
-l Use this argoument for save in a log file all verbose information
28+
or --log
29+
30+
Examples:
31+
$ python3 script.py -p <PDF_DOCUMENT_PATH> -A <VirusTotal_API_Key>
32+
Generic example
33+
$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --verbose
34+
It will print everything in output
35+
$ python3 script.py -p malicious.pdf -A abcdefg123456789876543234567899876543456789876543456789876543 --log
36+
It will print everything in a log file in the same directory where is the script PDF_Parser.py
37+
```
38+
39+
## Argouments
40+
41+
|Argoument|Required|Format|
42+
|--|--|--|
43+
|--path|yes|--path /home/aleff/Documents/malicious.pdf|
44+
|--API-Key|yes|--API-Key abcdefg123456789876543234567899876...|
45+
|--verbose|no|--verbose|
46+
|--log|no|--log|
47+
48+
## VirusTotal API
49+
50+
Signup to [VirusTotal Website](https://www.virustotal.com/gui/join-us) and go to Account -> API Key
51+
52+
## FAQs
53+
54+
### Why?
55+
- Developed for Network Security course of UNICAL Univeristy

docs/PDF_Parser.log

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
2+
Your File: malicious.pdf
3+
Your API: 6788d***
4+
{
5+
"data": {
6+
"type": "analysis",
7+
"id": "YjI1Y2E0NGU2NjIwMzlmNjY0MDFkZDliMzMzYjA3MDE6MTY2OTU2NDAzMw=="
8+
}
9+
}
10+
MD5: b25ca44e662039f66401dd9b333b0701
11+
+-------------------+------------------------------------------+
12+
| Result | CVE |
13+
+-------------------+------------------------------------------+
14+
| Lionic | Trojan.PDF.Pdfka.4!c |
15+
| MicroWorld-eScan | Exploit.PDF-Name.2.Gen |
16+
| FireEye | Exploit.PDF-Name.2.Gen |
17+
| CAT-QuickHeal | PDF.JS.Gen.A |
18+
| McAfee | Exploit-PDF.bk.gen |
19+
| Cyren | ShellCode.AX.gen |
20+
| Symantec | Bloodhound.Exploit.213 |
21+
| ESET-NOD32 | JS/Exploit.Pdfka.NOO |
22+
| Baidu | JS.Exploit.Pdfka.adb |
23+
| Avast | JS:Pdfka-AK [Expl] |
24+
| ClamAV | Heuristics.PDF.ObfuscatedNameObject |
25+
| Kaspersky | Exploit.JS.Pdfka.cil |
26+
| BitDefender | Exploit.PDF-Name.2.Gen |
27+
| NANO-Antivirus | Exploit.Script.IframeBof.gqjs |
28+
| Cynet | Malicious (score: 99) |
29+
| Tencent | Heur:Trojan.Script.LS_Gencirc.7033944.72 |
30+
| Ad-Aware | Exploit.PDF-Name.2.Gen |
31+
| Emsisoft | Exploit.PDF-Name.2.Gen (B) |
32+
| VIPRE | Exploit.PDF-Name.2.Gen |
33+
| TrendMicro | HEUR_PDFF.SPACE |
34+
| McAfee-GW-Edition | BehavesLike.PDF.Trojan.zb |
35+
| Sophos | Mal/PDFEx-D |
36+
| SentinelOne | Static AI - Malicious PDF |
37+
| GData | Exploit.PDF-Name.2.Gen |
38+
| Avira | HTML/Malicious.PDF.Gen3 |
39+
| Arcabit | Exploit.PDF-Name.2.Gen |
40+
| ViRobot | PDF.Exploit.CVE-2008-2992.A |
41+
| ZoneAlarm | HEUR:Exploit.Script.Generic |
42+
| Microsoft | Exploit:JS/ShellCode.gen |
43+
| Google | Detected |
44+
| AhnLab-V3 | Exploit/PDF.Generic.S1213 |
45+
| ALYac | Exploit.PDF-Name.2.Gen |
46+
| MAX | malware (ai score=80) |
47+
| Rising | Hack.Exploit.MalPDF.a (CLASSIC) |
48+
| Ikarus | PDF.Exploit.PDF-JS |
49+
| MaxSecure | Virus.PDF.Pidief.zm |
50+
| Fortinet | PDF/Script.JSS!exploit |
51+
| AVG | JS:Pdfka-AK [Expl] |
52+
+-------------------+------------------------------------------+
53+
This document is most likely malicious!!!

docs/log file.png

233 KB
Loading

docs/malicious.pdf

7.02 KB
Binary file not shown.

docs/terminal verbose.png

231 KB
Loading

0 commit comments

Comments
 (0)