diff --git a/.envrc.local.template b/.envrc.local.template index a196e5d3..4007b13c 100644 --- a/.envrc.local.template +++ b/.envrc.local.template @@ -44,5 +44,5 @@ export TEST_KEY="" # export AV_STATUS_SNS_PUBLISH_INFECTED # export AV_TIMESTAMP_METADATA # export CLAMAVLIB_PATH -# export CLAMSCAN_PATH +# export CLAMDSCAN_PATH # export FRESHCLAM_PATH diff --git a/Dockerfile b/Dockerfile index fbe47c73..fb8ed274 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,86 +1,119 @@ +FROM public.ecr.aws/lambda/python:3.7 AS cli_deps + +COPY requirements-cli.txt requirements-cli.txt +RUN mkdir -p /opt/app/cli \ + && pip3 install --requirement requirements-cli.txt --target /opt/app/cli \ + && rm -rf /root/.cache/pip + FROM amazonlinux:2 # Set up working directories -RUN mkdir -p /opt/app -RUN mkdir -p /opt/app/build -RUN mkdir -p /opt/app/bin/ - -# Copy in the lambda source -WORKDIR /opt/app -COPY ./*.py /opt/app/ -COPY requirements.txt /opt/app/requirements.txt +RUN mkdir -p \ + /opt/app \ + /opt/app/build \ + /opt/app/bin \ + /opt/app/python_deps \ + /opt/app/cli # Install packages -RUN yum update -y -RUN amazon-linux-extras install epel -y -RUN yum install -y cpio yum-utils tar.x86_64 gzip zip python3-pip - -# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install -r requirements.txt -RUN rm -rf /root/.cache/pip +RUN yum update -y \ + && amazon-linux-extras install epel -y \ + && yum install -y \ + cpio \ + yum-utils \ + tar.x86_64 \ + gzip \ + zip \ + python3-pip \ + shadow-utils.x86_64 \ + && yum clean all \ + && rm -rf /var/cache/yum # Download libraries we need to run in lambda WORKDIR /tmp -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav -RUN rpm2cpio clamav-0*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-lib -RUN rpm2cpio clamav-lib*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav-update -RUN rpm2cpio clamav-update*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 json-c -RUN rpm2cpio json-c*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 pcre2 -RUN rpm2cpio pcre*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libtool-ltdl -RUN rpm2cpio libtool-ltdl*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libxml2 -RUN rpm2cpio libxml2*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 bzip2-libs -RUN rpm2cpio bzip2-libs*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 xz-libs -RUN rpm2cpio xz-libs*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 libprelude -RUN rpm2cpio libprelude*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 gnutls -RUN rpm2cpio gnutls*.rpm | cpio -vimd - -RUN yumdownloader -x \*i686 --archlist=x86_64 nettle -RUN rpm2cpio nettle*.rpm | cpio -vimd - - -# Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ +RUN yumdownloader -x \*i686 --archlist=x86_64 \ + clamav \ + clamav-lib \ + clamav-update \ + clamav-scanner-systemd \ + elfutils-libs \ + json-c \ + lz4 \ + pcre2 \ + systemd-libs \ + libtool-ltdl \ + libxml2 \ + bzip2-libs \ + xz-libs \ + libprelude \ + gnutls \ + nettle \ + && rpm2cpio clamav-0*.rpm | cpio -vimd \ + && rpm2cpio clamav-lib*.rpm | cpio -vimd \ + && rpm2cpio clamav-update*.rpm | cpio -vimd \ + && rpm2cpio json-c*.rpm | cpio -vimd \ + && rpm2cpio pcre*.rpm | cpio -vimd \ + && rpm2cpio libtool-ltdl*.rpm | cpio -vimd \ + && rpm2cpio libxml2*.rpm | cpio -vimd \ + && rpm2cpio bzip2-libs*.rpm | cpio -vimd \ + && rpm2cpio xz-libs*.rpm | cpio -vimd \ + && rpm2cpio libprelude*.rpm | cpio -vimd \ + && rpm2cpio gnutls*.rpm | cpio -vimd \ + && rpm2cpio nettle*.rpm | cpio -vimd \ + && rpm2cpio clamd-0*.rpm | cpio -idmv \ + && rpm2cpio elfutils-libs*.rpm | cpio -idmv \ + && rpm2cpio lz4*.rpm | cpio -idmv \ + && rpm2cpio systemd-libs*.rpm | cpio -idmv \ + && cp -r \ + /tmp/usr/bin/clamdscan \ + /tmp/usr/sbin/clamd \ + /tmp/usr/bin/freshclam \ + /tmp/usr/lib64/* \ + /usr/lib64/libpcre.so* \ + /opt/app/bin/ \ + && rm -rf /tmp/usr # Fix the freshclam.conf settings -RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf -RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf -RUN echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf -RUN echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf - -RUN yum install shadow-utils.x86_64 -y - -RUN groupadd clamav -RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav -RUN useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate +RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ + && echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf \ + && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ + && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf +# clamd conf with hardened configs to avoid false positives +RUN echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ + && echo "PidFile /tmp/clamd.pid" >> /opt/app/bin/scan.conf \ + && echo "LogFile /tmp/clamd.log" >> /opt/app/bin/scan.conf \ + && echo "LocalSocket /tmp/clamd.sock" >> /opt/app/bin/scan.conf \ + && echo "FixStaleSocket yes" >> /opt/app/bin/scan.conf \ + && echo "DetectPUA yes" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/scan.conf + +RUN groupadd clamav \ + && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav \ + && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamupdate ENV LD_LIBRARY_PATH=/opt/app/bin RUN ldconfig -# Create the zip file +# Copy in the lambda source WORKDIR /opt/app -RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin +COPY requirements.txt /opt/app/requirements.txt + +# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel +RUN pip3 install --requirement requirements.txt --target /opt/app/python_deps \ + && rm -rf /root/.cache/pip + +# Copy fangfrisch CLI from lambda image +COPY --from=cli_deps /opt/app/cli /opt/app/cli -WORKDIR /usr/local/lib/python3.7/site-packages -RUN zip -r9 /opt/app/build/lambda.zip * +# Create the zip file +COPY ./*.py /opt/app/ +COPY fangfrisch.conf /opt/app/fangfrisch.conf +RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py *.conf bin cli \ + && cd /opt/app/python_deps \ + && zip -r9 /opt/app/build/lambda.zip * WORKDIR /opt/app diff --git a/README.md b/README.md index 6d911919..aba98f84 100644 --- a/README.md +++ b/README.md @@ -81,31 +81,32 @@ can cause a continuous loop of scanning if improperly configured. Runtime configuration is accomplished using environment variables. See the table below for reference. -| Variable | Description | Default | Required | -| --- | --- | --- | --- | -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | -| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | -| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | -| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | +| Variable | Description | Default | Required | +|----------------------------------|-------------------------------------------------------------------------------------------------|------------------|----------| +| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | +| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | +| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | +| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | +| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | +| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | +| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | +| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | +| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | +| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | +| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | +| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | +| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | +| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | +| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | +| CLAMDSCAN_PATH | Path to ClamAV clamdscan binary | ./bin/clamdscan | No | +| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | +| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | +| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | +| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | +| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | +| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | +| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | +| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | ## S3 Bucket Policy Examples diff --git a/clamav.py b/clamav.py index a44ab3a2..298cb1fa 100644 --- a/clamav.py +++ b/clamav.py @@ -14,30 +14,37 @@ # limitations under the License. import datetime +import errno import hashlib +import json import os import pwd import re +import socket import subprocess import boto3 import botocore from pytz import utc -from common import AV_DEFINITION_S3_PREFIX, S3_ENDPOINT -from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES +from common import AV_DEFINITION_PATH +from common import AV_DEFINITION_S3_BUCKET +from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_EXTRA_FILES +from common import AV_EXTRA_VIRUS_DEFINITIONS from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED from common import CLAMAVLIB_PATH -from common import CLAMSCAN_PATH +from common import CLAMDSCAN_PATH +from common import CLAMDSCAN_TIMEOUT +from common import CLAMD_SOCKET from common import FRESHCLAM_PATH from common import create_dir - RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" @@ -50,6 +57,8 @@ def current_library_search_path(): def update_defs_from_s3(s3_client, bucket, prefix): create_dir(AV_DEFINITION_PATH) to_download = {} + older_files = set() + md5_matches = set() for file_prefix in AV_DEFINITION_FILE_PREFIXES: s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: @@ -60,51 +69,100 @@ def update_defs_from_s3(s3_client, bucket, prefix): s3_time = time_from_s3(s3_client, bucket, s3_path) if s3_best_time is not None and s3_time < s3_best_time: - print("Not downloading older file in series: %s" % filename) + older_files.add(filename) continue else: s3_best_time = s3_time if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: - print("Not downloading %s because local md5 matches s3." % filename) + md5_matches.add(filename) continue if s3_md5: to_download[file_prefix] = { "s3_path": s3_path, "local_path": local_path, } + + if AV_EXTRA_VIRUS_DEFINITIONS is True: + for filename in AV_DEFINITION_EXTRA_FILES: + s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) + local_path = os.path.join(AV_DEFINITION_PATH, filename) + s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) + if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: + md5_matches.add(filename) + continue + if s3_md5: + to_download[filename] = { + "s3_path": s3_path, + "local_path": local_path, + } + + if older_files: + print("Not downloading the following older files in series:") + print(json.dumps(list(older_files))) + if md5_matches: + print("Not downloading the following files because local md5 matches s3:") + print(json.dumps(list(md5_matches))) return to_download +class Md5Matches(Exception): + pass + + +class NoSuchFile(Exception): + pass + + def upload_defs_to_s3(s3_client, bucket, prefix, local_path): - for file_prefix in AV_DEFINITION_FILE_PREFIXES: - for file_suffix in AV_DEFINITION_FILE_SUFFIXES: - filename = file_prefix + "." + file_suffix - local_file_path = os.path.join(local_path, filename) - if os.path.exists(local_file_path): - local_file_md5 = md5_from_file(local_file_path) - if local_file_md5 != md5_from_s3_tags( - s3_client, bucket, os.path.join(prefix, filename) - ): - print( - "Uploading %s to s3://%s" - % (local_file_path, os.path.join(bucket, prefix, filename)) - ) - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) - s3_object = s3.Object(bucket, os.path.join(prefix, filename)) - s3_object.upload_file(os.path.join(local_path, filename)) - s3_client.put_object_tagging( - Bucket=s3_object.bucket_name, - Key=s3_object.key, - Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]}, - ) - else: - print( - "Not uploading %s because md5 on remote matches local." - % filename - ) - else: - print("File does not exist: %s" % filename) + md5_matches = set() + non_existent_files = set() + official_databases = [file_prefix + "." + file_suffix + for file_prefix in AV_DEFINITION_FILE_PREFIXES + for file_suffix in AV_DEFINITION_FILE_SUFFIXES] + all_databases = (official_databases + AV_DEFINITION_EXTRA_FILES + if AV_EXTRA_VIRUS_DEFINITIONS is True + else official_databases) + + for filename in all_databases: + try: + upload_new_file_to_s3(bucket, filename, local_path, prefix, s3_client) + except Md5Matches: + md5_matches.add(filename) + except NoSuchFile: + non_existent_files.add(filename) + + if non_existent_files: + print("The following files do not exist for upload:") + print(json.dumps(list(non_existent_files))) + if md5_matches: + print("The following MD5 hashes match those in S3:") + print(json.dumps(list(md5_matches))) + + +def upload_new_file_to_s3(bucket, filename, local_path, prefix, s3_client): + local_file_path = os.path.join(local_path, filename) + + if not os.path.exists(local_file_path): + raise NoSuchFile + + local_file_md5 = md5_from_file(local_file_path) + + if local_file_md5 == md5_from_s3_tags(s3_client, bucket, os.path.join(prefix, filename)): + raise Md5Matches + + print( + "Uploading %s to s3://%s" + % (local_file_path, os.path.join(bucket, prefix, filename)) + ) + s3 = boto3.resource("s3") + s3_object = s3.Object(bucket, os.path.join(prefix, filename)) + s3_object.upload_file(os.path.join(local_path, filename)) + s3_client.put_object_tagging( + Bucket=s3_object.bucket_name, + Key=s3_object.key, + Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]}, + ) def update_defs_from_freshclam(path, library_path=""): @@ -119,7 +177,7 @@ def update_defs_from_freshclam(path, library_path=""): fc_proc = subprocess.Popen( [ FRESHCLAM_PATH, - "--config-file=./bin/freshclam.conf", + "--config-file=%s/freshclam.conf" % CLAMAVLIB_PATH, "-u %s" % pwd.getpwuid(os.getuid())[0], "--datadir=%s" % path, ], @@ -127,8 +185,9 @@ def update_defs_from_freshclam(path, library_path=""): stdout=subprocess.PIPE, env=fc_env, ) - output = fc_proc.communicate()[0] - print("freshclam output:\n%s" % output) + output = fc_proc.communicate()[0].decode() + print("freshclam output:") + print(json.dumps(output.split("\n"))) if fc_proc.returncode != 0: print("Unexpected exit code from freshclam: %s." % fc_proc.returncode) return fc_proc.returncode @@ -187,24 +246,104 @@ def scan_output_to_json(output): def scan_file(path): av_env = os.environ.copy() av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH - print("Starting clamscan of %s." % path) + print("Starting clamdscan of %s." % path) av_proc = subprocess.Popen( - [CLAMSCAN_PATH, "-v", "-a", "--stdout", "-d", AV_DEFINITION_PATH, path], + [ + CLAMDSCAN_PATH, + "-v", + "--stdout", + "--config-file", + "%s/scan.conf" % CLAMAVLIB_PATH, + path, + ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=av_env, ) - output = av_proc.communicate()[0].decode() - print("clamscan output:\n%s" % output) - # Turn the output into a data source we can read - summary = scan_output_to_json(output) + try: + output, errors = av_proc.communicate(timeout=CLAMDSCAN_TIMEOUT) + except subprocess.TimeoutExpired: + av_proc.kill() + output, errors = av_proc.communicate() + + decoded_output = output.decode() + print("clamdscan output:\n%s" % decoded_output) + if av_proc.returncode == 0: return AV_STATUS_CLEAN, AV_SIGNATURE_OK elif av_proc.returncode == 1: + # Turn the output into a data source we can read + summary = scan_output_to_json(decoded_output) signature = summary.get(path, AV_SIGNATURE_UNKNOWN) return AV_STATUS_INFECTED, signature else: - msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode + msg = "Unexpected exit code from clamdscan: %s.\n" % av_proc.returncode + + if errors: + msg += "Errors: %s\n" % errors.decode() + print(msg) raise Exception(msg) + + +def is_clamd_running(): + print("Checking if clamd is running on %s" % CLAMD_SOCKET) + + if os.path.exists(CLAMD_SOCKET): + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect(CLAMD_SOCKET) + s.send(b"PING") + try: + data = s.recv(32) + except (socket.timeout, socket.error) as e: + print("Failed to read from socket: %s\n" % e) + return False + + print("Received %s in response to PING" % repr(data)) + return data == b"PONG\n" + + print("Clamd is not running on %s" % CLAMD_SOCKET) + return False + + +def start_clamd_daemon(): + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + + to_download = update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX + ) + + for download in to_download.values(): + s3_path = download["s3_path"] + local_path = download["local_path"] + print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) + s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) + print("Downloading definition file %s complete!" % (local_path)) + + av_env = os.environ.copy() + av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH + + print("Starting clamd") + + if os.path.exists(CLAMD_SOCKET): + try: + os.unlink(CLAMD_SOCKET) + except OSError as e: + if e.errno != errno.ENOENT: + print("Could not unlink clamd socket %s" % CLAMD_SOCKET) + raise + + clamd_proc = subprocess.Popen( + ["%s/clamd" % CLAMAVLIB_PATH, "-c", "%s/scan.conf" % CLAMAVLIB_PATH], + env=av_env, + ) + + clamd_proc.wait() + + clamd_log_file = open("/tmp/clamd.log") + print(clamd_log_file.read()) + + return clamd_proc.pid diff --git a/common.py b/common.py index bb953fca..bfeefe2f 100644 --- a/common.py +++ b/common.py @@ -13,10 +13,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -import errno import datetime +import errno import os import os.path +from distutils.util import strtobool + + +def create_dir(path): + if not os.path.exists(path): + try: + print("Attempting to create directory %s.\n" % path) + os.makedirs(path) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + + +def get_timestamp(): + return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") + + +def str_to_bool(s): + return bool(strtobool(str(s))) + AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") @@ -30,33 +50,78 @@ AV_STATUS_INFECTED = os.getenv("AV_STATUS_INFECTED", "INFECTED") AV_STATUS_METADATA = os.getenv("AV_STATUS_METADATA", "av-status") AV_STATUS_SNS_ARN = os.getenv("AV_STATUS_SNS_ARN") -AV_STATUS_SNS_PUBLISH_CLEAN = os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True") -AV_STATUS_SNS_PUBLISH_INFECTED = os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True") +AV_STATUS_SNS_PUBLISH_CLEAN = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True")) +AV_STATUS_SNS_PUBLISH_INFECTED = str_to_bool(os.getenv("AV_STATUS_SNS_PUBLISH_INFECTED", "True")) AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") +AV_EXTRA_VIRUS_DEFINITIONS = str_to_bool(os.getenv("AV_EXTRA_VIRUS_DEFINITIONS", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") -CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") +CLAMDSCAN_PATH = os.getenv("CLAMDSCAN_PATH", "./bin/clamdscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") -AV_PROCESS_ORIGINAL_VERSION_ONLY = os.getenv( +CLAMDSCAN_TIMEOUT = os.getenv("CLAMDSCAN_TIMEOUT", 240) +CLAMD_SOCKET = os.getenv("CLAMD_SOCKET", "/tmp/clamd.sock") +AV_PROCESS_ORIGINAL_VERSION_ONLY = str_to_bool(os.getenv( "AV_PROCESS_ORIGINAL_VERSION_ONLY", "False" -) -AV_DELETE_INFECTED_FILES = os.getenv("AV_DELETE_INFECTED_FILES", "False") +)) +AV_DELETE_INFECTED_FILES = str_to_bool(os.getenv("AV_DELETE_INFECTED_FILES", "False")) -AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] +AV_DEFINITION_FILE_PREFIXES = [ + "main", + "daily", + "bytecode", +] AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] + +AV_DEFINITION_EXTRA_FILES = [ + "MiscreantPunch099-Low.ldb", + "badmacro.ndb", + "blurl.ndb", + "bofhland_cracked_URL.ndb", + "bofhland_malware_URL.ndb", + "bofhland_malware_attach.hdb", + "bofhland_phishing_URL.ndb", + "ditekshen.ldb", + "exexor99.ldb", + "foxhole_filename.cdb", + "foxhole_generic.cdb", + "foxhole_js.cdb", + "foxhole_js.ndb", + "hackingteam.hsb", + "interserver256.hdb", + "interservertopline.db", + "junk.ndb", + "jurlbl.ndb", + "jurlbla.ndb", + "lott.ndb", + "malwarehash.hsb", + "miscreantpunch.hdb", + "phish.ndb", + "phishtank.ndb", + "porcupine.ndb", + "rfxn.hdb", + "rfxn.ndb", + "rfxn.yara", + "rogue.hdb", + "scam.ndb", + "shell.ldb", + "shelter.ldb", + "spamattach.hdb", + "spamimg.hdb", + "spear.ndb", + "spearl.ndb", + "twinclams.ldb", + "twinwave.ign2", + "urlhaus.ndb", + "whitelist.fp", + "winnow.attachments.hdb", + "winnow_bad_cw.hdb", + "winnow_extended_malware.hdb", + "winnow_extended_malware_links.ndb", + "winnow_malware.hdb", + "winnow_malware_links.ndb", + "winnow_phish_complete_url.ndb", + "winnow_spam_complete.ndb" +] + SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) - - -def create_dir(path): - if not os.path.exists(path): - try: - print("Attempting to create directory %s.\n" % path) - os.makedirs(path) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - - -def get_timestamp(): - return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") diff --git a/fangfrisch.conf b/fangfrisch.conf new file mode 100644 index 00000000..8ee1c365 --- /dev/null +++ b/fangfrisch.conf @@ -0,0 +1,83 @@ +[DEFAULT] +db_url = sqlite:///AV_DEFINITION_PATH/db.sqlite +local_directory = AV_DEFINITION_PATH + +# The following settings are optional. Other sections inherit +# values from DEFAULT and may also overwrite values. + +max_size = 5MB +log_level = info + +[malwarepatrol] +enabled = no +# Replace with your personal Malwarepatrol receipt +# receipt = XXXXXXXXX +# change product id if necessary. +# 32 = free guard, 33 = Basic Defense yearly, 34 = Basic Defense monthly, 37 = Basic Defense EDU/Contributor +# product = 32 + +[malwareexpert] +enabled = no + +[sanesecurity] +prefix = https://ftp.swin.edu.au/sanesecurity/ +max_size = 10M +enabled = yes +interval = 1h +url_malwareexpert_fp = disabled +url_malwareexpert_hdb = disabled +url_malwareexpert_ldb = disabled +url_malwareexpert_ndb = disabled + +[securiteinfo] +enabled = no + +[urlhaus] +enabled = yes +max_size = 2MB + +[twinwave] +enabled = yes +max_size = 2M +integrity_check = disabled +interval = 1h +prefix = https://raw.githubusercontent.com/twinwave-security/twinclams/master/ +url_twinclams = ${prefix}twinclams.ldb +url_twinwave_ign2 = ${prefix}twinwave.ign2 + +[clampunch] +enabled = yes +max_size = 2M +integrity_check = disabled +interval = 24h +prefix = https://raw.githubusercontent.com/wmetcalf/clam-punch/master/ +url_miscreantpunch099low = ${prefix}MiscreantPunch099-Low.ldb +url_exexor99 = ${prefix}exexor99.ldb +url_miscreantpuchhdb = ${prefix}miscreantpunch.hdb + +[rfxn] +enabled = yes +interval= 4h +integrity_check = disabled +prefix = https://www.rfxn.com/downloads/ +url_rfxn_ndb = ${prefix}rfxn.ndb +url_rfxn_hdb = ${prefix}rfxn.hdb +url_rfxn_yara = ${prefix}rfxn.yara + +[interserver] +enabled = yes +interval = 1d +integrity_check = disabled +prefix = https://rbldata.interserver.net/ +url_interserver_sha256 = ${prefix}interserver256.hdb +url_interserver_topline = ${prefix}interservertopline.db +url_interserver_shell = ${prefix}shell.ldb +url_interserver_whitelist = ${prefix}whitelist.fp + +[ditekshen] +enabled = yes +interval = 1d +integrity_check = disabled +prefix = https://raw.githubusercontent.com/ditekshen/detection/master/clamav/ +url_ditekshen_ldb = ${prefix}clamav.ldb +filename_ditekshen_ldb = ditekshen.ldb diff --git a/requirements-cli.txt b/requirements-cli.txt new file mode 100644 index 00000000..44ddf5e7 --- /dev/null +++ b/requirements-cli.txt @@ -0,0 +1 @@ +fangfrisch==1.6.1 diff --git a/scan.py b/scan.py index 48545a06..43a1340f 100644 --- a/scan.py +++ b/scan.py @@ -16,6 +16,7 @@ import copy import json import os +import signal from urllib.parse import unquote_plus from distutils.util import strtobool @@ -37,12 +38,12 @@ from common import AV_STATUS_SNS_PUBLISH_CLEAN from common import AV_STATUS_SNS_PUBLISH_INFECTED from common import AV_TIMESTAMP_METADATA -from common import SNS_ENDPOINT -from common import S3_ENDPOINT from common import create_dir from common import get_timestamp +clamd_pid = None + def event_object(event, event_source="s3"): # SNS events are slightly different @@ -75,7 +76,7 @@ def event_object(event, event_source="s3"): raise Exception("Unable to retrieve object from event.\n{}".format(event)) # Create and return the object - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3 = boto3.resource("s3") return s3.Object(bucket_name, key_name) @@ -113,7 +114,7 @@ def delete_s3_object(s3_object): % (s3_object.bucket_name, s3_object.key) ) else: - print("Infected file deleted: %s.%s" % (s3_object.bucket_name, s3_object.key)) + print("Infected file deleted: %s" % os.path.join("s3://", s3_object.bucket_name, s3_object.key)) def set_av_metadata(s3_object, scan_result, scan_signature, timestamp): @@ -200,15 +201,39 @@ def sns_scan_results( ) +def kill_process_by_pid(pid): + # Check if process is running on PID + try: + os.kill(clamd_pid, 0) + except OSError: + return + + print("Killing the process by PID %s" % clamd_pid) + + try: + os.kill(clamd_pid, signal.SIGTERM) + except OSError: + os.kill(clamd_pid, signal.SIGKILL) + + def lambda_handler(event, context): - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) - s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) - sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) + global clamd_pid + + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + sns_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") + if not clamav.is_clamd_running(): + if clamd_pid is not None: + kill_process_by_pid(clamd_pid) + + clamd_pid = clamav.start_clamd_daemon() + print("Clamd PID: %s" % clamd_pid) + start_time = get_timestamp() print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) @@ -225,16 +250,6 @@ def lambda_handler(event, context): create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) - to_download = clamav.update_defs_from_s3( - s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX - ) - - for download in to_download.values(): - s3_path = download["s3_path"] - local_path = download["local_path"] - print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) - s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) - print("Downloading definition file %s complete!" % (local_path)) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" diff --git a/update.py b/update.py index 80aa46d1..79148605 100644 --- a/update.py +++ b/update.py @@ -14,6 +14,7 @@ # limitations under the License. import os +import subprocess import boto3 @@ -21,6 +22,7 @@ from common import AV_DEFINITION_PATH from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX +from common import AV_EXTRA_VIRUS_DEFINITIONS from common import CLAMAVLIB_PATH from common import S3_ENDPOINT from common import get_timestamp @@ -42,7 +44,26 @@ def lambda_handler(event, context): s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) - clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) + if clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) != 0: + return 1 + + if AV_EXTRA_VIRUS_DEFINITIONS is True: + env_pythonpath = os.environ.copy() + env_pythonpath["PYTHONPATH"] = os.path.join(env_pythonpath["LAMBDA_TASK_ROOT"], "cli") + + fangfrisch_conf_filepath = os.path.join(os.environ['LAMBDA_TASK_ROOT'], 'fangfrisch.conf') + fangfrisch_base_command = f"cli/bin/fangfrisch --conf /tmp/fangfrisch.conf" + subprocess.run(f"cp {fangfrisch_conf_filepath} /tmp/fangfrisch.conf &&" + f"sed -i 's~AV_DEFINITION_PATH~{AV_DEFINITION_PATH}~g' /tmp/fangfrisch.conf", + shell=True, + check=True) + print("running fangfrisch refresh...") + subprocess.run(f"{fangfrisch_base_command} initdb", shell=True, env=env_pythonpath) + subprocess.run(f"{fangfrisch_base_command} refresh", shell=True, env=env_pythonpath, check=True) + + else: + print("Skip downloading extra virus definitions with Fangfrisch") + # If main.cvd gets updated (very rare), we will need to force freshclam # to download the compressed version to keep file sizes down. # The existence of main.cud is the trigger to know this has happened.