- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 928
Improve robustness: canonicalize & validate targets before scan #1155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
8d5f302
              aa994c5
              ed7c81e
              ccd870e
              b5da801
              c1a9201
              b66a598
              949c911
              50374b9
              2389890
              859b850
              a83c17f
              d852609
              4de4cca
              c472e71
              fb43add
              8102395
              cd4e5ab
              c92c7f3
              8752881
              bd762cd
              c23507e
              7de608e
              87b773c
              0ac3a96
              8565db6
              ec97266
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -31,3 +31,6 @@ results.* | |
| coverage.xml | ||
|  | ||
| venv | ||
| Public_sign | ||
| Public_sign.pub | ||
| cks_proxy | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -7,7 +7,7 @@ | |
| from threading import Thread | ||
|  | ||
| import multiprocess | ||
|  | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
| from nettacker import logger | ||
| from nettacker.config import Config, version_info | ||
| from nettacker.core.arg_parser import ArgParser | ||
|  | @@ -23,6 +23,7 @@ | |
| is_ipv6_range, | ||
| is_ipv6_cidr, | ||
| ) | ||
| from nettacker.core.hostcheck import resolve_quick, is_ip_literal, valid_hostname | ||
| from nettacker.core.messages import messages as _ | ||
| from nettacker.core.module import Module | ||
| from nettacker.core.socks_proxy import set_socks_proxy | ||
|  | @@ -142,7 +143,7 @@ def expand_targets(self, scan_id): | |
| ): | ||
| targets += generate_ip_range(target) | ||
| # domains probably | ||
| else: | ||
| else: | ||
| targets.append(target) | ||
| self.arguments.targets = targets | ||
| self.arguments.url_base_path = base_path | ||
|  | @@ -220,7 +221,6 @@ def run(self): | |
| if self.arguments.scan_compare_id is not None: | ||
| create_compare_report(self.arguments, scan_id) | ||
| log.info("ScanID: {0} ".format(scan_id) + _("done")) | ||
|  | ||
| return exit_code | ||
|  | ||
| def start_scan(self, scan_id): | ||
|  | @@ -289,7 +289,66 @@ def scan_target( | |
|  | ||
| return os.EX_OK | ||
|  | ||
|  | ||
| def filter_valid_targets(self, targets, timeout_per_target=2.0, max_workers=None, dedupe=True): | ||
| """ | ||
| Parallel validation of targets via resolve_quick(target, timeout_sec). | ||
| Returns a list of canonical targets (order preserved, invalids removed). | ||
| """ | ||
| # Ensure it's a concrete list (len, indexing OK) | ||
| try: | ||
| targets = list(targets) | ||
| except TypeError: | ||
| raise TypeError(f"`targets` must be iterable, got {type(targets).__name__}") | ||
|  | ||
| if not targets: | ||
| return [] | ||
|  | ||
| if max_workers is None: | ||
| max_workers = min(len(targets), 10) # cap threads | ||
|  | ||
| # Preserve order | ||
| canon_by_index = [None] * len(targets) | ||
|          | ||
|  | ||
| def _task(idx, t): | ||
| ok, canon = resolve_quick(t, timeout_sec=timeout_per_target) | ||
| return idx, t, (canon if ok and canon else None) | ||
|  | ||
| with ThreadPoolExecutor(max_workers=max_workers) as ex: | ||
| futures = [ex.submit(_task, i, t) for i, t in enumerate(targets)] | ||
| for fut in as_completed(futures): | ||
| try: | ||
| idx, orig_target, canon = fut.result() | ||
| except (OSError, socket.gaierror) as exc: | ||
| log.debug(f"Invalid target (resolver error): {exc!s}") | ||
|          | ||
| continue | ||
|  | ||
| if canon: | ||
| canon_by_index[idx] = canon | ||
| else: | ||
| log.info(f"Invalid target -> dropping: {orig_target}") | ||
|  | ||
| # Keep order, drop Nones | ||
| filtered = [c for c in canon_by_index if c is not None] | ||
|  | ||
| if dedupe: | ||
| seen, unique = set(), [] | ||
| for c in filtered: | ||
|          | ||
| if c not in seen: | ||
| seen.add(c) | ||
| unique.append(c) | ||
| return unique | ||
| return filtered | ||
|  | ||
| def scan_target_group(self, targets, scan_id, process_number): | ||
|  | ||
| if(not self.arguments.socks_proxy and self.arguments.validate_before_scan): | ||
| targets = self.filter_valid_targets( | ||
| targets, | ||
| timeout_per_target=2.0, | ||
| max_workers=self.arguments.parallel_module_scan or None, | ||
| dedupe=True, | ||
| ) | ||
| active_threads = [] | ||
| log.verbose_event_info(_("single_process_started").format(process_number)) | ||
| total_number_of_modules = len(targets) * len(self.arguments.selected_modules) | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| # nettacker/core/hostcheck.py | ||
| from __future__ import annotations | ||
| import re | ||
| import socket | ||
| import time | ||
| import concurrent.futures | ||
| import os | ||
| import sys | ||
|         
                  Aarush289 marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| from nettacker import logger | ||
| from nettacker.core.ip import ( | ||
| get_ip_range, | ||
| generate_ip_range, | ||
| is_single_ipv4, | ||
| is_ipv4_range, | ||
| is_ipv4_cidr, | ||
| is_single_ipv6, | ||
| is_ipv6_range, | ||
| is_ipv6_cidr, | ||
| ) | ||
| log = logger.get_logger() | ||
|  | ||
| _LABEL = re.compile(r"^(?!-)[A-Za-z0-9-]{1,63}(?<!-)$") | ||
|  | ||
| def is_ip_literal(name: str) -> bool: | ||
| """Return True if name is a valid IPv4 or IPv6 address literal.""" | ||
| try: | ||
| socket.inet_pton(socket.AF_INET, name) | ||
| return True | ||
| except OSError: | ||
|          | ||
| pass | ||
| try: | ||
| socket.inet_pton(socket.AF_INET6, name) | ||
| return True | ||
| except OSError: | ||
| return False | ||
|  | ||
| def valid_hostname( | ||
| host: str, | ||
| allow_single_label: bool = True | ||
| ) -> bool: | ||
| """ | ||
| Validate hostname syntax per RFC 1123. | ||
| Args: | ||
| host: Hostname to validate. | ||
| allow_single_label: If True, accept single-label names (e.g., "localhost"). | ||
| Returns: | ||
| True if the hostname is syntactically valid. | ||
| """ | ||
| if host.endswith("."): | ||
| host = host[:-1] | ||
| if len(host) > 253: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do add a comment here to specify this comes from RFC1123 which specifies the number of characters to be 250 at max (without dots) and 253 with dots. | ||
| return False | ||
| parts = host.split(".") | ||
| if len(parts) < 2 and not allow_single_label: | ||
| return False | ||
| return all(_LABEL.match(p) for p in parts) | ||
|  | ||
|  | ||
| def _gai_once(name: str, use_ai_addrconfig: bool, port): | ||
| flags = getattr(socket, "AI_ADDRCONFIG", 0) if use_ai_addrconfig else 0 | ||
| return socket.getaddrinfo( | ||
| name, port, socket.AF_UNSPEC, socket.SOCK_STREAM, 0, flags | ||
| ) | ||
|  | ||
| def _clean_host(s: str) -> str: | ||
| # remove surrounding quotes and whitespace, lone commas, repeated dots | ||
|          | ||
| s = s.strip().strip('"').strip("'") | ||
| s = s.strip() # again, after quote strip | ||
| # drop trailing commas that often sneak in from CSV-like inputs | ||
| if s.endswith(","): | ||
| s = s[:-1].rstrip() | ||
| # collapse accidental spaces inside | ||
| return s | ||
|  | ||
| def resolve_quick( | ||
| host: str, | ||
| timeout_sec: float = 2.0, | ||
| allow_single_label: bool = True | ||
| ) -> tuple[bool, str | None]: | ||
| """ | ||
| Perform fast DNS resolution with timeout. | ||
| Args: | ||
| host: Hostname or IP literal to resolve. | ||
| timeout_sec: Maximum time to wait for resolution. | ||
| allow_single_label: If True, allow single-label hostnames (e.g., "intranet"). | ||
| Returns: | ||
| (True, host_name) on success, (False, None) on failure/timeout. | ||
| """ | ||
|         
                  Aarush289 marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| host = _clean_host(host) | ||
| if is_single_ipv4(host) or is_single_ipv6(host): | ||
| if is_ip_literal(host): | ||
| return True, host | ||
| return False, None | ||
|         
                  Aarush289 marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
|  | ||
| if host.endswith("."): | ||
| host = host[:-1] | ||
|  | ||
| if not valid_hostname(host): | ||
| return False, None | ||
|  | ||
| if "." not in host and not allow_single_label: | ||
| return False, None | ||
|         
                  coderabbitai[bot] marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
|  | ||
| def _call(use_ai_addrconfig: bool): | ||
| return _gai_once(host, use_ai_addrconfig, None) | ||
|  | ||
| for use_ai in (True, False): | ||
| try: | ||
| # Run getaddrinfo in a thread so we can enforce timeout | ||
| with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: | ||
| fut = ex.submit(_call, use_ai) | ||
| fut.result(timeout=timeout_sec) # raises on timeout or error | ||
| return True, host.lower() | ||
| except concurrent.futures.TimeoutError: | ||
| continue | ||
| except (OSError, socket.gaierror): | ||
| # DNS resolution failed for this candidate, try next | ||
| continue | ||
| return False, None | ||
|  | ||
|  | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't have to translate for all. Just  | 
Uh oh!
There was an error while loading. Please reload this page.