deepweb/deep-web-scanner.py at main · Leetcore/deepweb · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import argparse
import bs4
import os
import colorama
from concurrent.futures import ThreadPoolExecutor
import threading
import ipaddress
import socket
import time
import random
from typing import Optional, Union
import requests
requests.packages.urllib3.disable_warnings()  # type: ignore
colorama.init(autoreset=True)

folder = os.path.dirname(__file__)
output_strings: list[str] = []
ports = [80, 443, 8080, 8081, 4434, 3443, 8443, 3000, 4000, 5000]
workers = len(ports) * 20
keywords = ["cam", "rasp", " hp ", "system", "index of", "dashboard"]
output_tmp = ""
last_write = time.time()
global_lock = threading.Lock()
banner_targets: list[dict[str, Union[str, int]]] = []


def main():
    print("----------------------------")
    print("      Deep Web Scanner!     ")
    print("----------------------------\n")
    print("Every active webserver url will be logged in the output file.")
    print("This terminal will only show urls/metadata with the following keywords: " + ", ".join(keywords))
    if indexof.lower() == "true":
        print("'Index of /' filenames will be logged!")
    print("Scan will start...")

    with open(input_file, "r") as myfile:
        content = myfile.readlines()

        # randomize this list if arg is true
        if randomize:
            random.shuffle(content)

        for line in content:
            # split ip range 2.56.20.0-2.56.23.255
            if "-" in line:
                ip_range_array = line.split("-")
                ip_range_start = ip_range_array[0].strip()
                ip_range_end = ip_range_array[1].strip()
                print(
                    f"Start scan from range: {ip_range_start} - {ip_range_end}")

                current_ip = ipaddress.IPv4Address(ip_range_start)
                end_ip = ipaddress.IPv4Address(ip_range_end)

                with ThreadPoolExecutor(max_workers=workers) as executor_portcheck:
                    while current_ip < end_ip:
                        executor_portcheck.submit(
                            start_portcheck, current_ip.exploded)
                        current_ip += 1

            elif "/" in line:
                ip_range = ipaddress.ip_network(line.strip())
                with ThreadPoolExecutor(max_workers=workers) as executor_portcheck:
                    for ip in ip_range.hosts():
                        executor_portcheck.submit(start_portcheck, ip.exploded)
            else:
                print(
                    "No valid input file! Should be something like 2.56.20.0-2.56.23.255 per line!")

            global banner_targets
            print(f"{len(banner_targets)} responses")
            with ThreadPoolExecutor(max_workers=10) as executor_getbanner:
                for target in banner_targets:
                    executor_getbanner.submit(
                        start_request, target["ip"], target["port"])  # type: ignore
            banner_targets.clear()
        write_line("", True)


def start_portcheck(ip: str) -> None:
    global banner_targets
    # fast webserver port checking
    for port in ports:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
            sock.settimeout(2)
            result = sock.connect_ex((ip, port))
            if result == 0:
                # queue normal browser request
                banner_targets.append({"ip": ip, "port": port})


def start_request(ip: str, port: int) -> None:
    # check for running websites
    try:
        url = "https://" + ip + ":" + str(port)
        if str(port).__contains__("80"):
            url = "http://" + ip + ":" + str(port)

        site_result = request_url(url)
        if not isinstance(site_result, bool) and site_result is not False:
            # if the site is reachable get some information
            get_banner(site_result[0], site_result[1])
    except Exception as e:
        print(f"IP: {ip}, Port: {str(port)}")
        print(e)


def request_url(url: str) -> Union[tuple[requests.Response, bs4.BeautifulSoup], bool]:
    # request url and return the response
    session = requests.session()
    session.headers[
        "User-Agent"
    ] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36"
    header = session.head(url=url, timeout=5, verify=False)

    # check content type
    one_allowed_content_type = False
    content_type_header = header.headers.get("content-type")
    if content_type_header is not None:
        for allowed_content_type in ["html", "plain", "text", "json"]:
            if allowed_content_type in content_type_header.lower():
                one_allowed_content_type = True
        if not one_allowed_content_type:
            return False
    else:
        return False

    response = session.get(url=url, timeout=10, verify=False)
    session.close()
    try:
        soup = bs4.BeautifulSoup(response.text, "html.parser")
        return (response, soup)
    except Exception:
        soup = bs4.BeautifulSoup("", "html.parser")
        return (response, soup)


def get_banner(request: requests.Response, soup: bs4.BeautifulSoup):
    # get banner information, show console output and save them to file
    banner_array: list[str] = []
    banner_array.append(request.url)

    if soup:
        server_header = request.headers.get("Server")
        if isinstance(server_header, str):
            banner_array.append(server_header)
        title = soup.find("title")
        if isinstance(title, bs4.Tag):
            title = title.get_text().strip().replace("\n", "")
            banner_array.append(title)

        meta_tags: bs4.element.ResultSet[bs4.Tag] = soup.find_all(
            "meta", attrs={"name": "generator"})
        if len(meta_tags) > 0:
            for meta_tag in meta_tags:
                attrs = meta_tag.attr
                if isinstance(attrs, bs4.Tag):
                    generator = attrs.get("content")
                    if isinstance(generator, str):
                        banner_array.append(generator)

        # has this site a password field?
        password_fields = soup.find_all(attrs={"type": "password"})
        if len(password_fields) > 0:
            banner_array.append("login required")

        # check for "index of" websites and show root files/folders
        global indexof
        if indexof.lower() == "true" and "index of" in request.text.lower():
            a_array: list[bs4.Tag] = soup.find_all("a")
            for a in a_array:
                href = a.attrs.get("href")
                if isinstance(href, str):
                    if href.find("?") != 0:
                        banner_array.append(href)

    banner_array.append(f"{str(len(request.content))} content size")

    fullstring = ", ".join(banner_array)
    if fullstring not in output_strings:
        output_strings.append(fullstring)
        for keyword in keywords:
            if keyword in fullstring.lower():
                if "login required" in fullstring:
                    print(colorama.Fore.RED + fullstring)
                elif "Index of /" in fullstring:
                    print(colorama.Fore.YELLOW + fullstring)
                else:
                    print(colorama.Fore.GREEN + fullstring)
        write_line(fullstring)


def write_line(line: str, force: Optional[bool] = False):
    # buffers and writes output to file
    global output_tmp, last_write
    output_tmp += line + "\n"

    if last_write + 30 < time.time() or force:
        last_write = time.time()

        while global_lock.locked():
            continue

        global_lock.acquire()

        lines_to_write = output_tmp.count("\n")
        with open(output_file, "a") as output_1:
            output_1.write(output_tmp)
            output_tmp = ""

        if lines_to_write > 1:
            print(f"{lines_to_write} webservers found and written to file")
        else:
            print(f"{lines_to_write} webserver found and written to file")

        global_lock.release()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Check if domain has an active website and grab banner."
    )
    parser.add_argument(
        "-i", type=str, default="./asn-country-ipv4.csv", help="Path to input file"
    )
    parser.add_argument(
        "-o", type=str, default="./deep-web.txt", help="Path to output file"
    )
    parser.add_argument(
        "-indexof", type=str, default="true", help="Show files from index of sites"
    )
    parser.add_argument(
        "-randomize", type=str, default="true", help="Randomize input list entries"
    )
    args = parser.parse_args()
    input_file = args.i
    output_file = args.o
    indexof = args.indexof
    randomize = args.randomize
    main()