From ada79e7b101eb0f42c111059e6e880c9a567d114 Mon Sep 17 00:00:00 2001 From: scamalyticsdev Date: Wed, 19 Nov 2025 19:46:15 +0000 Subject: [PATCH] Initial reCommit --- scamalytics/CHANGELOG.md | 8 + scamalytics/README.md | 40 +++ scamalytics/assets/configuration/spec.yaml | 10 + .../datadog_checks/scamalytics/__about__.py | 1 + .../datadog_checks/scamalytics/__init__.py | 4 + .../datadog_checks/scamalytics/check.py | 304 ++++++++++++++++++ .../scamalytics/config_models/__init__.py | 21 ++ .../scamalytics/config_models/defaults.py | 12 + .../scamalytics/config_models/instance.py | 47 +++ .../scamalytics/config_models/shared.py | 44 +++ .../scamalytics/config_models/validators.py | 9 + .../datadog_checks/scamalytics/data/conf.yaml | 16 + .../scamalytics/data/conf.yaml.example | 55 ++++ scamalytics/hatch.toml | 4 + scamalytics/manifest.json | 70 ++++ scamalytics/metadata.csv | 1 + scamalytics/pyproject.toml | 60 ++++ scamalytics/tests/__init__.py | 1 + scamalytics/tests/conftest.py | 0 scamalytics/tests/test_scamalytics.py | 102 ++++++ scamalytics/tests/test_unit.py | 38 +++ 21 files changed, 847 insertions(+) create mode 100644 scamalytics/CHANGELOG.md create mode 100644 scamalytics/README.md create mode 100644 scamalytics/assets/configuration/spec.yaml create mode 100644 scamalytics/datadog_checks/scamalytics/__about__.py create mode 100644 scamalytics/datadog_checks/scamalytics/__init__.py create mode 100644 scamalytics/datadog_checks/scamalytics/check.py create mode 100644 scamalytics/datadog_checks/scamalytics/config_models/__init__.py create mode 100644 scamalytics/datadog_checks/scamalytics/config_models/defaults.py create mode 100644 scamalytics/datadog_checks/scamalytics/config_models/instance.py create mode 100644 scamalytics/datadog_checks/scamalytics/config_models/shared.py create mode 100644 scamalytics/datadog_checks/scamalytics/config_models/validators.py create mode 100644 scamalytics/datadog_checks/scamalytics/data/conf.yaml create mode 100644 scamalytics/datadog_checks/scamalytics/data/conf.yaml.example create mode 100644 scamalytics/hatch.toml create mode 100644 scamalytics/manifest.json create mode 100644 scamalytics/metadata.csv create mode 100644 scamalytics/pyproject.toml create mode 100644 scamalytics/tests/__init__.py create mode 100644 scamalytics/tests/conftest.py create mode 100644 scamalytics/tests/test_scamalytics.py create mode 100644 scamalytics/tests/test_unit.py diff --git a/scamalytics/CHANGELOG.md b/scamalytics/CHANGELOG.md new file mode 100644 index 0000000000..154561438d --- /dev/null +++ b/scamalytics/CHANGELOG.md @@ -0,0 +1,8 @@ +# CHANGELOG - Scamalytics + +## 1.0.0 / YYYY-MM-DD + +***Added***: + +* Initial Release + diff --git a/scamalytics/README.md b/scamalytics/README.md new file mode 100644 index 0000000000..a85d9b6687 --- /dev/null +++ b/scamalytics/README.md @@ -0,0 +1,40 @@ +## Overview + +Scamalytics transforms raw IP traffic into actionable threat intelligence. The platform delivers real-time IP enrichment and provides a trustworthy, accurate risk score for every IP, offering instant context and evidence-based assessments for each connection. It detects anonymization methods such as VPNs, proxies, Tor nodes, and data center traffic, while attributing requests by geolocation, ASN, and ISP. Scamalytics also performs abuse and blacklist checks, giving organizations immediate insight into potentially malicious or high-risk IP activity in real time. + +The Scamalytics Datadog integration extends these capabilities by enriching logs and network telemetry within Datadog, enhancing threat visibility, strengthening risk correlation, and providing deeper operational context across your environment. + + +## Setup + +### 1. Update the configuration file + +Inside conf.d/scamalytics.d/conf.yaml, add the Scamalytics API endpoint URL, as well as your API key: + +init_config: + +instances: + - url: "https://api.scamalytics.com/?ip=" + - api_key: "" + - customer-id: + " Log Configuration > Pipelines. +Click your desired log source's pipeline. +Select Add Processor +Under Select the processor type, choose Remapper. +Name the processor. +Set IP Fields as the attribute to remap. +Set the target attribute to network.ip.attributes.ip + +## Troubleshooting + +Need help? Contact [Scamalytics](dev@scamalytics.com). \ No newline at end of file diff --git a/scamalytics/assets/configuration/spec.yaml b/scamalytics/assets/configuration/spec.yaml new file mode 100644 index 0000000000..16e0b89cea --- /dev/null +++ b/scamalytics/assets/configuration/spec.yaml @@ -0,0 +1,10 @@ +name: Scamalytics +files: +- name: scamalytics.yaml + options: + - template: init_config + options: + - template: init_config/default + - template: instances + options: + - template: instances/default diff --git a/scamalytics/datadog_checks/scamalytics/__about__.py b/scamalytics/datadog_checks/scamalytics/__about__.py new file mode 100644 index 0000000000..1f356cc57b --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/__about__.py @@ -0,0 +1 @@ +__version__ = '1.0.0' diff --git a/scamalytics/datadog_checks/scamalytics/__init__.py b/scamalytics/datadog_checks/scamalytics/__init__.py new file mode 100644 index 0000000000..ac0b0826ca --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/__init__.py @@ -0,0 +1,4 @@ +from .__about__ import __version__ +from .check import ScamalyticsCheck + +__all__ = ['__version__', 'ScamalyticsCheck'] diff --git a/scamalytics/datadog_checks/scamalytics/check.py b/scamalytics/datadog_checks/scamalytics/check.py new file mode 100644 index 0000000000..922cb42cc9 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/check.py @@ -0,0 +1,304 @@ +import json +import re +from datetime import datetime, timedelta, timezone + +import requests + +from datadog_checks.base import ConfigurationError +from datadog_checks.base.checks.logs.crawler.base import LogCrawlerCheck +from datadog_checks.base.checks.logs.crawler.stream import LogRecord, LogStream + + +def parse_iso8601_timestamp(ts_str: str) -> datetime: + """ + Convert ISO8601 timestamp string to an offset-aware datetime (UTC if 'Z'). + Examples: + '2025-08-11T23:03:34.983Z' -> aware UTC + '2025-08-11T23:03:34.983+00:00' -> aware UTC + """ + if not ts_str: + return None # caller must handle + # Normalize trailing Z to +00:00 + if ts_str.endswith('Z'): + ts_str = ts_str[:-1] + '+00:00' + dt = datetime.fromisoformat(ts_str) + if dt.tzinfo is None: + # Make it explicit UTC if somehow naive + dt = dt.replace(tzinfo=timezone.utc) + return dt + + +class ScamalyticsCheck(LogCrawlerCheck): + __NAMESPACE__ = 'scamalytics' + + def __init__(self, name, init_config, instances): + super().__init__(name, init_config, instances) + self.instance = instances[0] if instances else {} + + required_keys = [ + "scamalytics_api_key", + "scamalytics_api_url", + "customer_id", + "dd_api_key", + "dd_app_key", + ] + missing = [k for k in required_keys if not self.instance.get(k)] + if missing: + raise ConfigurationError(f"Missing required configuration key(s): {', '.join(missing)}") + + def get_log_streams(self): + return [ScamalyticsLogStream(check=self, name="scamalytics_stream")] + + +class ScamalyticsLogStream(LogStream): + """ + Crawl Datadog logs, extract public IPs, deduplicate per 24h via persistent cache, + enrich new IPs with Scamalytics, and emit enriched logs through the crawler. + """ + + CACHE_KEY = "scamalytics_recent_ips" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # In-memory per-run cache: {ip: bool processed_in_last_24h} + self.session_recent_cache = {} + # Persistent cache across runs: {ip: "ISO8601 UTC timestamp"} + self.recent_cache = {} + self._load_recent_cache() + + # Optional tunable window (hours) from config, default 24 + try: + self.skip_window_hours = int(self.check.instance.get("skip_window_hours", 24)) + except Exception: + self.skip_window_hours = 24 + + def records(self, cursor=None): + check = self.check + dd_api_key = check.instance.get("dd_api_key") + dd_app_key = check.instance.get("dd_app_key") + scam_key = check.instance.get("scamalytics_api_key") + scam_api_url = check.instance.get("scamalytics_api_url") + customer_id = check.instance.get("customer_id") + dd_site = check.instance.get("dd_site", "datadoghq.com") + + # Datadog Logs Search API + logs_url = f"https://api.{dd_site}/api/v2/logs/events/search" + headers = { + "DD-API-KEY": dd_api_key, + "DD-APPLICATION-KEY": dd_app_key, + "Content-Type": "application/json", + } + + # Cursor handling (ensure overlap with timezone-aware math) + from_time = None + if cursor and cursor.get("timestamp"): + try: + # 2s overlap for safety + from_time = parse_iso8601_timestamp(cursor["timestamp"]) - timedelta(seconds=2) + except Exception: + from_time = None + + filter_dict = { + "to": "now", + "query": "@network.ip.attributes.ip:* AND -source:scamalytics-ti AND -service:scamalytics", + } + if from_time: + # Always emit UTC Z format + filter_dict["from"] = from_time.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + payload = {"filter": filter_dict, "page": {"limit": 1000}} + + try: + resp = requests.post(logs_url, headers=headers, json=payload, timeout=15) + resp.raise_for_status() + logs = resp.json().get("data", []) + check.log.info("SCAMALYTICS: fetched %s logs from Datadog", len(logs)) + except Exception as e: + check.log.error("SCAMALYTICS: error fetching logs: %s", e) + return + + ip_pattern = re.compile(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b") + newest_timestamp = cursor["timestamp"] if (cursor and cursor.get("timestamp")) else "1970-01-01T00:00:00Z" + processed_ips_this_run = set() + + for log_entry in logs: + attributes = log_entry.get("attributes", {}) + ts_str = attributes.get("timestamp") + if not ts_str: + continue + + # Track newest timestamp + if ts_str > newest_timestamp: + newest_timestamp = ts_str + + # Extract any IPv4s + log_content = str(log_entry) + for ip in ip_pattern.findall(log_content): + if not self._is_public_ip(ip): + continue + if ip in processed_ips_this_run: + continue + processed_ips_this_run.add(ip) + + # 1) Fast local session cache check + if ip in self.session_recent_cache and self.session_recent_cache[ip] is True: + check.log.debug("SCAMALYTICS: SKIP %s (session cache <%sh)", ip, self.skip_window_hours) + continue + + # 2) Persistent cache check (authoritative) + if self._processed_recently_local(ip): + check.log.info("SCAMALYTICS: SKIP %s (persistent cache <%sh)", ip, self.skip_window_hours) + self.session_recent_cache[ip] = True + continue + + # 3) Optional remote fallback (Datadog logs) to avoid early re-send if indexing is slow + if self._was_recently_processed_remote(ip): + check.log.info("SCAMALYTICS: SKIP %s (remote logs <%sh)", ip, self.skip_window_hours) + self._update_local_cache(ip) # persist locally for next run + self.session_recent_cache[ip] = True # also cache in-memory for this run + continue + + # 4) Query Scamalytics API and emit enriched log + scam_url = f"{scam_api_url}{ip}" + scam_headers = { + "Authorization": f"Bearer {scam_key}", + "Customer-id": f"{customer_id}", + } + + try: + scam_resp = requests.get(scam_url, headers=scam_headers, timeout=10) + scam_resp.raise_for_status() + scam_data = scam_resp.json() + except Exception as e: + check.log.error("SCAMALYTICS: Scamalytics API error for %s: %s", ip, e) + continue + + # Emit enriched record via crawler + yield LogRecord( + data={ + "message": f"Scamalytics report for IP {ip}", + "ddsource": "scamalytics-ti", + "service": "scamalytics", + "attributes": scam_data, + }, + cursor={"timestamp": ts_str}, + ) + + # Mark as processed now (both caches) + self._update_local_cache(ip) + self.session_recent_cache[ip] = True + + # Persist cache, prune expired, and advance cursor + self._prune_expired_cache() + self._save_recent_cache() + + @staticmethod + def _is_public_ip(ip: str) -> bool: + private_ranges = [ + re.compile(r"^10\..*"), + re.compile(r"^192\.168\..*"), + re.compile(r"^172\.(1[6-9]|2[0-9]|3[0-1])\..*"), + re.compile(r"^127\..*"), + re.compile(r"^169\.254\..*"), + ] + return not any(r.match(ip) for r in private_ranges) + + def _load_recent_cache(self) -> None: + """Load persistent IP cache from Agent into self.recent_cache.""" + try: + cache_str = self.check.read_persistent_cache(self.CACHE_KEY) + if cache_str: + self.recent_cache = json.loads(cache_str) + else: + self.recent_cache = {} + except Exception as e: + self.check.log.warning("SCAMALYTICS: failed to load persistent cache: %s", e) + self.recent_cache = {} + + def _save_recent_cache(self) -> None: + """Persist current cache to Agent store.""" + try: + self.check.write_persistent_cache(self.CACHE_KEY, json.dumps(self.recent_cache)) + except Exception as e: + self.check.log.warning("SCAMALYTICS: failed to save persistent cache: %s", e) + + def _update_local_cache(self, ip: str) -> None: + """Mark IP as processed at current UTC time (aware, ISO8601 Z).""" + now_utc = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + self.recent_cache[ip] = now_utc + # Write-through so restarts keep the state + self._save_recent_cache() + + def _processed_recently_local(self, ip: str) -> bool: + """Return True if IP was processed within the configured window using persistent cache.""" + ts_str = self.recent_cache.get(ip) + if not ts_str: + return False + try: + last_dt = parse_iso8601_timestamp(ts_str) # aware + age = datetime.now(timezone.utc) - last_dt + return age < timedelta(hours=self.skip_window_hours) + except Exception: + # On any parse issue, fail open (treat as not processed) + return False + + def _prune_expired_cache(self) -> None: + """Remove entries older than the configured window.""" + now = datetime.now(timezone.utc) + expiry = timedelta(hours=self.skip_window_hours) + expired = [] + for ip, ts_str in self.recent_cache.items(): + try: + seen_dt = parse_iso8601_timestamp(ts_str) + if (now - seen_dt) > expiry: + expired.append(ip) + except Exception: + # If unparsable, drop it + expired.append(ip) + for ip in expired: + del self.recent_cache[ip] + + def _was_recently_processed_remote(self, ip: str) -> bool: + """ + Fallback: search Datadog logs for a Scamalytics entry in the last window. + Returns True if found. Also seeds local cache if found. + """ + # Check per-run memory cache first + if ip in self.session_recent_cache: + return self.session_recent_cache[ip] is True + + check = self.check + dd_api_key = check.instance.get("dd_api_key") + dd_app_key = check.instance.get("dd_app_key") + dd_site = check.instance.get("dd_site", "datadoghq.com") + + logs_url = f"https://api.{dd_site}/api/v2/logs/events/search" + headers = { + "DD-API-KEY": dd_api_key, + "DD-APPLICATION-KEY": dd_app_key, + "Content-Type": "application/json", + } + + # last N hours window + window = f"now-{self.skip_window_hours}h" + + filter_dict = { + "from": window, + "to": "now", + "query": f"source:scamalytics-ti service:scamalytics @attributes.scamalytics.ip:{ip}", + } + payload = {"filter": filter_dict, "page": {"limit": 1}} + + try: + resp = requests.post(logs_url, headers=headers, json=payload, timeout=10) + resp.raise_for_status() + found = len(resp.json().get("data", [])) > 0 + if found: + # Seed both caches so we don't re-check this run or next runs + self._update_local_cache(ip) + self.session_recent_cache[ip] = True + return found + except Exception as e: + check.log.warning("SCAMALYTICS: remote recent-check failed for %s: %s", ip, e) + # Fail open to avoid data loss (treat as not processed) + return False diff --git a/scamalytics/datadog_checks/scamalytics/config_models/__init__.py b/scamalytics/datadog_checks/scamalytics/config_models/__init__.py new file mode 100644 index 0000000000..76d41c00c9 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/config_models/__init__.py @@ -0,0 +1,21 @@ +# This file is autogenerated. +# To change this file you should edit assets/configuration/spec.yaml and then run the following commands: +# ddev -x validate config -s +# ddev -x validate models -s + + +from .instance import InstanceConfig +from .shared import SharedConfig + + +class ConfigMixin: + _config_model_instance: InstanceConfig + _config_model_shared: SharedConfig + + @property + def config(self) -> InstanceConfig: + return self._config_model_instance + + @property + def shared_config(self) -> SharedConfig: + return self._config_model_shared diff --git a/scamalytics/datadog_checks/scamalytics/config_models/defaults.py b/scamalytics/datadog_checks/scamalytics/config_models/defaults.py new file mode 100644 index 0000000000..b51c0cd250 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/config_models/defaults.py @@ -0,0 +1,12 @@ +# This file is autogenerated. +# To change this file you should edit assets/configuration/spec.yaml and then run the following commands: +# ddev -x validate config -s +# ddev -x validate models -s + + +def instance_empty_default_hostname(): + return False + + +def instance_min_collection_interval(): + return 15 diff --git a/scamalytics/datadog_checks/scamalytics/config_models/instance.py b/scamalytics/datadog_checks/scamalytics/config_models/instance.py new file mode 100644 index 0000000000..393e354fd4 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/config_models/instance.py @@ -0,0 +1,47 @@ +# This file is autogenerated. +# To change this file you should edit assets/configuration/spec.yaml and then run the following commands: +# ddev -x validate config -s +# ddev -x validate models -s + + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator + +from datadog_checks.base.utils.functions import identity +from datadog_checks.base.utils.models import validation + +from . import defaults, validators + + +class InstanceConfig(BaseModel): + model_config = ConfigDict( + validate_default=True, + arbitrary_types_allowed=True, + frozen=True, + ) + empty_default_hostname: Optional[bool] = None + min_collection_interval: Optional[float] = None + service: Optional[str] = None + tags: Optional[tuple[str, ...]] = None + + @model_validator(mode='before') + def _initial_validation(cls, values): + return validation.core.initialize_config(getattr(validators, 'initialize_instance', identity)(values)) + + @field_validator('*', mode='before') + def _validate(cls, value, info): + field = cls.model_fields[info.field_name] + field_name = field.alias or info.field_name + if field_name in info.context['configured_fields']: + value = getattr(validators, f'instance_{info.field_name}', identity)(value, field=field) + else: + value = getattr(defaults, f'instance_{info.field_name}', lambda: value)() + + return validation.utils.make_immutable(value) + + @model_validator(mode='after') + def _final_validation(cls, model): + return validation.core.check_model(getattr(validators, 'check_instance', identity)(model)) diff --git a/scamalytics/datadog_checks/scamalytics/config_models/shared.py b/scamalytics/datadog_checks/scamalytics/config_models/shared.py new file mode 100644 index 0000000000..3ae366a9ad --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/config_models/shared.py @@ -0,0 +1,44 @@ +# This file is autogenerated. +# To change this file you should edit assets/configuration/spec.yaml and then run the following commands: +# ddev -x validate config -s +# ddev -x validate models -s + + +from __future__ import annotations + +from typing import Optional + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator + +from datadog_checks.base.utils.functions import identity +from datadog_checks.base.utils.models import validation + +from . import defaults, validators + + +class SharedConfig(BaseModel): + model_config = ConfigDict( + validate_default=True, + arbitrary_types_allowed=True, + frozen=True, + ) + service: Optional[str] = None + + @model_validator(mode='before') + def _initial_validation(cls, values): + return validation.core.initialize_config(getattr(validators, 'initialize_shared', identity)(values)) + + @field_validator('*', mode='before') + def _validate(cls, value, info): + field = cls.model_fields[info.field_name] + field_name = field.alias or info.field_name + if field_name in info.context['configured_fields']: + value = getattr(validators, f'shared_{info.field_name}', identity)(value, field=field) + else: + value = getattr(defaults, f'shared_{info.field_name}', lambda: value)() + + return validation.utils.make_immutable(value) + + @model_validator(mode='after') + def _final_validation(cls, model): + return validation.core.check_model(getattr(validators, 'check_shared', identity)(model)) diff --git a/scamalytics/datadog_checks/scamalytics/config_models/validators.py b/scamalytics/datadog_checks/scamalytics/config_models/validators.py new file mode 100644 index 0000000000..39523e4f92 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/config_models/validators.py @@ -0,0 +1,9 @@ +# Here you can include additional config validators or transformers +# +# def initialize_instance(values, **kwargs): +# if 'my_option' not in values and 'my_legacy_option' in values: +# values['my_option'] = values['my_legacy_option'] +# if values.get('my_number') > 10: +# raise ValueError('my_number max value is 10, got %s' % str(values.get('my_number'))) +# +# return values diff --git a/scamalytics/datadog_checks/scamalytics/data/conf.yaml b/scamalytics/datadog_checks/scamalytics/data/conf.yaml new file mode 100644 index 0000000000..a0ca6d9a21 --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/data/conf.yaml @@ -0,0 +1,16 @@ + +init_config: + +instances: + - dd_api_key: "" + dd_app_key: "" + scamalytics_api_key: "" + customer_id: "" + scamalytics_api_url: "" + dd_site: "datadoghq.com" + skip_window_hours: 24 +logs: + - type: integration + source: scamalytics-ti + service: scamalytics + diff --git a/scamalytics/datadog_checks/scamalytics/data/conf.yaml.example b/scamalytics/datadog_checks/scamalytics/data/conf.yaml.example new file mode 100644 index 0000000000..57b46cc14a --- /dev/null +++ b/scamalytics/datadog_checks/scamalytics/data/conf.yaml.example @@ -0,0 +1,55 @@ +## All options defined here are available to all instances. +# +init_config: + + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Additionally, this sets the default `service` for every log source. + # + # service: + +## Every instance is scheduled independently of the others. +# +instances: + + - + ## @param tags - list of strings - optional + ## A list of tags to attach to every metric and service check emitted by this instance. + ## + ## Learn more about tagging at https://docs.datadoghq.com/tagging + # + # tags: + # - : + # - : + + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Overrides any `service` defined in the `init_config` section. + # + # service: + + ## @param min_collection_interval - number - optional - default: 15 + ## This changes the collection interval of the check. For more information, see: + ## https://docs.datadoghq.com/developers/write_agent_check/#collection-interval + # + # min_collection_interval: 15 + + ## @param empty_default_hostname - boolean - optional - default: false + ## This forces the check to send metrics with no hostname. + ## + ## This is useful for cluster-level checks. + # + # empty_default_hostname: false + + ## @param metric_patterns - mapping - optional + ## A mapping of metrics to include or exclude, with each entry being a regular expression. + ## + ## Metrics defined in `exclude` will take precedence in case of overlap. + # + # metric_patterns: + # include: + # - + # exclude: + # - diff --git a/scamalytics/hatch.toml b/scamalytics/hatch.toml new file mode 100644 index 0000000000..c85c5f07a7 --- /dev/null +++ b/scamalytics/hatch.toml @@ -0,0 +1,4 @@ +[env.collectors.datadog-checks] + +[[envs.default.matrix]] +python = ["3.12"] diff --git a/scamalytics/manifest.json b/scamalytics/manifest.json new file mode 100644 index 0000000000..431130c617 --- /dev/null +++ b/scamalytics/manifest.json @@ -0,0 +1,70 @@ +{ + "manifest_version": "2.0.0", + "app_uuid": "382b2f17-d803-4ec2-912e-e3eb611397cf", + "app_id": "scamalytics", + "display_on_public_website": true, + "tile": { + "overview": "README.md#Overview", + "configuration": "README.md#Setup", + "support": "README.md#Support", + "changelog": "CHANGELOG.md", + "description": "", + "title": "Scamalytics", + "media": [ + { + "media_type": "image", + "caption": "FILL IN Image 1 caption", + "image_url": "" + }, + { + "media_type": "image", + "caption": "FILL IN Image 2 caption", + "image_url": "" + }, + { + "media_type": "image", + "caption": "FILL IN Image 3 caption", + "image_url": "" + } + ], + "classifier_tags": [ + "", + "Supported OS::Linux", + "Supported OS::Windows", + "Supported OS::macOS", + "Category::", + "Offering::", + "Queried Data Type::", + "Submitted Data Type::" + ] + }, + "assets": { + "integration": { + "auto_install": true, + "source_type_id": 54490979, + "source_type_name": "Scamalytics", + "configuration": { + "spec": "assets/configuration/spec.yaml" + }, + "events": { + "creates_events": false + }, + "metrics": { + "prefix": "scamalytics.", + "check": "", + "metadata_path": "metadata.csv" + } + }, + "dashboards": { + "": "assets/dashboards/.json" + }, + "monitors": {}, + "saved_views": {} + }, + "author": { + "support_email": "dev@scamalytics.com", + "name": "Scamalytics Dev", + "homepage": "", + "sales_email": "" + } +} diff --git a/scamalytics/metadata.csv b/scamalytics/metadata.csv new file mode 100644 index 0000000000..02cde5e983 --- /dev/null +++ b/scamalytics/metadata.csv @@ -0,0 +1 @@ +metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name,curated_metric,sample_tags diff --git a/scamalytics/pyproject.toml b/scamalytics/pyproject.toml new file mode 100644 index 0000000000..f68d59142d --- /dev/null +++ b/scamalytics/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = [ + "hatchling>=0.13.0", +] +build-backend = "hatchling.build" + +[project] +name = "datadog-scamalytics" +description = "The Scamalytics check" +readme = "README.md" +license = "BSD-3-Clause" +requires-python = ">=3.12" +keywords = [ + "datadog", + "datadog agent", + "datadog check", + "scamalytics", +] +authors = [ + { name = "Scamalytics Dev", email = "dev@scamalytics.com" }, +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: BSD License", + "Private :: Do Not Upload", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +dependencies = [ + "datadog-checks-base>=37.20.0", +] +dynamic = [ + "version", +] + +[project.optional-dependencies] +deps = [] + +[project.urls] +Source = "https://github.com/DataDog/integrations-extras" + +[tool.hatch.version] +path = "datadog_checks/scamalytics/__about__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/datadog_checks", + "/tests", + "/manifest.json", +] + +[tool.hatch.build.targets.wheel] +include = [ + "/datadog_checks/scamalytics", +] +dev-mode-dirs = [ + ".", +] diff --git a/scamalytics/tests/__init__.py b/scamalytics/tests/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/scamalytics/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/scamalytics/tests/conftest.py b/scamalytics/tests/conftest.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scamalytics/tests/test_scamalytics.py b/scamalytics/tests/test_scamalytics.py new file mode 100644 index 0000000000..0ac143d09e --- /dev/null +++ b/scamalytics/tests/test_scamalytics.py @@ -0,0 +1,102 @@ +import os +import pytest + +from datadog_checks.base import ConfigurationError +from datadog_checks.scamalytics.check import ScamalyticsCheck, ScamalyticsLogStream + + +# ===================================================================== +# UNIT TESTS +# ===================================================================== + +@pytest.mark.unit +def test_config_validation(): + """ + Validate configuration handling for the ScamalyticsCheck. + Ensures ConfigurationError is raised when required fields are missing. + """ + + # Missing all keys + with pytest.raises(ConfigurationError): + ScamalyticsCheck('scamalytics', {}, [{}]) + + # Missing some required keys + with pytest.raises(ConfigurationError): + ScamalyticsCheck('scamalytics', {}, [{'scamalytics_api_key': 'dummy'}]) + + # Valid instance + valid_instance = { + 'scamalytics_api_key': 'test_key', + 'scamalytics_api_url': 'https://api-ti-us.scamalytics.com/tiprem/?ip=', + 'customer_id': 'test_customer', + 'dd_api_key': 'test_dd_key', + 'dd_app_key': 'test_dd_app', + } + + check = ScamalyticsCheck('scamalytics', {}, [valid_instance]) + assert check.instance == valid_instance + + +@pytest.mark.unit +def test_is_public_ip(): + """ + Verify internal logic for IP classification works correctly. + """ + + # Public IPs + assert ScamalyticsLogStream._is_public_ip("8.8.8.8") is True + assert ScamalyticsLogStream._is_public_ip("1.1.1.1") is True + + # Private IPs + assert ScamalyticsLogStream._is_public_ip("192.168.1.5") is False + assert ScamalyticsLogStream._is_public_ip("10.0.0.1") is False + assert ScamalyticsLogStream._is_public_ip("172.16.0.5") is False + assert ScamalyticsLogStream._is_public_ip("127.0.0.1") is False + assert ScamalyticsLogStream._is_public_ip("169.254.5.10") is False + + +# ===================================================================== +# INTEGRATION TEST +# ===================================================================== + +@pytest.mark.integration +def test_scamalytics_api_end_to_end(): + """ + Integration test verifying that Scamalytics crawler streams work end-to-end. + It runs the ScamalyticsLogStream and ensures records can be produced + without unhandled exceptions. + """ + + dd_api_key = os.getenv("DD_API_KEY") + dd_app_key = os.getenv("DD_APP_KEY") + scam_key = os.getenv("SCAM_API_KEY") + customer_id = os.getenv("SCAM_CUSTOMER_ID") + scam_api_url = "https://api-ti-us.scamalytics.com/tiprem/?ip=" + dd_site = os.getenv("DD_SITE", "datadoghq.com") + + if not all([dd_api_key, dd_app_key, scam_key, customer_id]): + pytest.skip("Integration credentials not set (DD_API_KEY, SCAM_API_KEY, etc.)") + + instance = { + "dd_api_key": dd_api_key, + "dd_app_key": dd_app_key, + "dd_site": dd_site, + "scamalytics_api_key": scam_key, + "scamalytics_api_url": scam_api_url, + "customer_id": customer_id, + } + + # Initialize the check and get its crawler stream + check = ScamalyticsCheck("scamalytics", {}, [instance]) + streams = check.get_log_streams() + assert streams, "No log streams returned by ScamalyticsCheck" + + stream = streams[0] + + try: + records = list(stream.records()) + except Exception as e: + pytest.fail(f"Integration crawler raised unexpected error: {e}") + + assert isinstance(records, list) + assert all(hasattr(r, "data") for r in records) diff --git a/scamalytics/tests/test_unit.py b/scamalytics/tests/test_unit.py new file mode 100644 index 0000000000..73ee4fe697 --- /dev/null +++ b/scamalytics/tests/test_unit.py @@ -0,0 +1,38 @@ +from typing import Any, Callable, Dict # noqa: F401 + +from datadog_checks.base import AgentCheck # noqa: F401 +from datadog_checks.base.stubs.aggregator import AggregatorStub # noqa: F401 +from datadog_checks.dev.utils import get_metadata_metrics +from datadog_checks.scamalytics import ScamalyticsCheck +import pytest + + +# ------------------------------- +# Fixtures +# ------------------------------- +@pytest.fixture +def instance() -> Dict[str, Any]: + """ + Returns a valid minimal configuration for the ScamalyticsCheck. + """ + return { + 'scamalytics_api_key': 'x', + 'scamalytics_api_url': 'https://api-ti-us.scamalytics.com/tiprem/?ip=', + 'customer_id': 'x', + 'dd_api_key': 'x', + 'dd_app_key': 'x', + } + + +# ------------------------------- +# Tests +# ------------------------------- +def test_check(dd_run_check, aggregator, instance): + """ + Test the ScamalyticsCheck with the provided instance. + """ + check = ScamalyticsCheck('scamalytics', {}, [instance]) + dd_run_check(check) + + aggregator.assert_all_metrics_covered() + aggregator.assert_metrics_using_metadata(get_metadata_metrics())