diff --git a/conda/conda-reqs-pip.txt b/conda/conda-reqs-pip.txt index ab3048b3c..a143ec4d3 100644 --- a/conda/conda-reqs-pip.txt +++ b/conda/conda-reqs-pip.txt @@ -1,11 +1,11 @@ azure-mgmt-resourcegraph>=8.0.0 -azure-monitor-query>=1.0.0, <=2.0.0 +joblib>=1.3.0 mo-sql-parsing>=11, <12.0.0 nest_asyncio>=1.4.0 -passivetotal>=2.5.3 -sumologic-sdk>=0.1.11 -splunk-sdk>=1.6.0,!=2.0.0 packaging>=24.0 +passivetotal>=2.5.3 +python_openobserve>=0.4.2 requests>=2.31.0 rrcf==0.4.4 -joblib>=1.3.0 +splunk-sdk>=1.6.0,!=2.0.0 +sumologic-sdk>=0.1.11 diff --git a/conda/conda-reqs.txt b/conda/conda-reqs.txt index 8a70201dc..a16b75000 100644 --- a/conda/conda-reqs.txt +++ b/conda/conda-reqs.txt @@ -1,17 +1,18 @@ attrs>=18.2.0 azure-common>=1.1.18 azure-core>=1.24.0 -azure-mgmt-core>=1.2.1 azure-identity>=1.16.1 azure-keyvault-secrets>=4.0.0 -azure-kusto-data>=4.4.0, <=5.0.0 +azure-kusto-data<7.0.0,>=4.4.0 azure-mgmt-compute>=4.6.2 +azure-mgmt-core>=1.2.1 azure-mgmt-keyvault>=2.0.0 azure-mgmt-monitor azure-mgmt-network>=2.7.0 azure-mgmt-resource>=16.1.0 -azure-storage-blob>=12.5.0 azure-mgmt-subscription +azure-monitor-query>=1.0.0, <=3.0.0 +azure-storage-blob>=12.5.0 beautifulsoup4>=4.0.0 bokeh>=3.0.0 cryptography>=43.0.1 @@ -27,8 +28,8 @@ jinja2>=3.1.5 # (sec vuln) transitive dependency via multiple packages keyring>=13.2.1 lxml>=4.6.5 matplotlib>=3.0.0 -msal>=1.12.0 msal_extensions>=0.3.0 +msal>=1.12.0 msrest>=0.6.0 msrestazure>=0.6.0 networkx>=2.2 diff --git a/docs/notebooks/Openobserve-DataConnector.ipynb b/docs/notebooks/Openobserve-DataConnector.ipynb new file mode 100644 index 000000000..91d16e833 --- /dev/null +++ b/docs/notebooks/Openobserve-DataConnector.ipynb @@ -0,0 +1,588 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OpenObserve - Data Connector" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Description\n", + "The data provider module of msticpy provides functions to allow for the defining of data sources, connectors to them and queries for them as well as the ability to return query result from the defined data sources. \n", + "\n", + "For more information on Data Propviders, check documentation\n", + "- Data Provider: https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProviders.html\n", + "\n", + "In this notebooks we will demonstrate OpenObserve data connector feature of msticpy. \n", + "This feature is built on-top of the [OpenObserve REST API](https://openobserve.ai/docs/api/) and [Unofficial python-openobserve module](https://github.com/JustinGuese/python-openobserve) with some customizations and enhancements (https://github.com/juju4/python-openobserve/tree/devel-all)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Only run first time to install/upgrade msticpy to latest version\n", + "# %pip install --upgrade msticpy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Authentication\n", + "\n", + "Authentication for the OpenObserve data provider is handled by specifying credentials (user and password) directly in the connect call or specifying the credentials in msticpy config file.\n", + "\n", + "For more information on how to create credentials, follow OpenObserve Docs [Users](https://openobserve.ai/docs/user-guide/users/).\n", + "\n", + "Once you created user account, you will require the following details to specify while connecting\n", + "- connection_str = \"https://localhost:5080\" (bare server install) or \"https://cloud.openobserve.ai?\" (cloud)\n", + "- user = \"xxx\" (as created)\n", + "- password = \"xxx\" (same)\n", + "\n", + "Once you have details, you can specify it in `msticpyconfig.yaml` as shown in below example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2020-08-07T17:50:18.361039Z", + "start_time": "2020-08-07T17:50:18.349006Z" + } + }, + "source": [ + "```\n", + "DataProviders:\n", + " Openobserve:\n", + " Args:\n", + " connection_str: \"{Openobserve url endpoint}\"\n", + " user: \"{user with search permissions to connect}\"\n", + " password: \"{password of the user specified}\"\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream" + } + ], + "source": [ + "# Check we are running Python 3.6\n", + "import sys\n", + "\n", + "MIN_REQ_PYTHON = (3, 6)\n", + "if sys.version_info < MIN_REQ_PYTHON:\n", + " print(\"Check the Kernel->Change Kernel menu and ensure that Python 3.6\")\n", + " print(\"or later is selected as the active kernel.\")\n", + " sys.exit(\"Python %s.%s or later is required.\\n\" % MIN_REQ_PYTHON)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Imports Complete\n" + ] + } + ], + "source": [ + "# imports\n", + "import pandas as pd\n", + "from datetime import datetime, timedelta\n", + "\n", + "# data library imports\n", + "from msticpy.data.data_providers import QueryProvider\n", + "\n", + "print(\"Imports Complete\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# Custom Certificate Authority?\n", + "os.environ['REQUESTS_CA_BUNDLE'] = os.environ['HOME'] + '/path/to/ca-bundle-internal.pem'" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# use custom config file?\n", + "# os.environ['MSTICPYCONFIG'] = '/path/to/msticpyconfig.yaml'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME! does not get MSTICPYCONFIG...\n", + "from msticpy.config import MpConfigEdit\n", + "# mpconfig = MpConfigFile()\n", + "# mpconfig.load_default()\n", + "# mpconfig.view_settings()\n", + "mpedit = MpConfigEdit()\n", + "mpedit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Instantiating a query provider\n", + "\n", + "You can instantiate a data provider for OpenObserve by specifying the credentials in connect or in msticpy config file. \n", + "
If the details are correct and authentication is successful, it will show connected." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "connected with user user@example.com\n" + ] + } + ], + "source": [ + "openobserve_prov = QueryProvider(\"OpenObserve\")\n", + "openobserve_prov.connect(connection_str=\"\", user=\"\", password=\"\")\n", + "# openobserve_prov.connect()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running a Ad-hoc OpenObserve query\n", + "You can define your own openobserve query and run it via openobserve provider via `QUERY_PROVIDER.exec_query()`\n", + "\n", + "For more information, check documentation [Running and Ad-hoc Query](https://msticpy.readthedocs.io/en/latest/data_acquisition/DataProviders.html#running-an-ad-hoc-query)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO: from 2025-03-11 18:53:41.122503 to 2025-03-18 18:53:41.122503, TZ UTC\n", + "{'query': {'end_time': 1742324021122503,\n", + " 'sql': '\\n'\n", + " 'SELECT log_file_name,count(*) FROM \"default\" GROUP BY '\n", + " 'log_file_name\\n',\n", + " 'start_time': 1741719221122503}}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
count(*)log_file_name
0110history.log
12mail.err
\n", + "
" + ], + "text/plain": [ + " count(*) log_file_name\n", + "0 110 history.log\n", + "1 2 mail.err" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openobserve_query = \"\"\"\n", + "SELECT log_file_name,count(*) FROM \"default\" GROUP BY log_file_name\n", + "\"\"\"\n", + "df = openobserve_prov.exec_query(openobserve_query, days=7, verbosity=1)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
body__systemd_unitcount(*)
000-kunai.service20812630
1init.scope217243
2osqueryd.service93914
3falcoctl-artifact-follow.service87232
4ssh.service36890
\n", + "
" + ], + "text/plain": [ + " body__systemd_unit count(*)\n", + "0 00-kunai.service 20812630\n", + "1 init.scope 217243\n", + "2 osqueryd.service 93914\n", + "3 falcoctl-artifact-follow.service 87232\n", + "4 ssh.service 36890" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openobserve_query = \"\"\"SELECT body__systemd_unit, count(*) FROM \"journald\" group by body__systemd_unit order by count(*) desc\"\"\"\n", + "df = openobserve_prov.exec_query(\n", + " openobserve_query,\n", + " start_time=datetime.now() - timedelta(days=7),\n", + " end_time=datetime.now() - timedelta(days=1),\n", + ")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "# first/last seen of streams\n", + "streams = ['default', 'journald', 'zeek', 'webproxy']\n", + "df_streams_seen = pd.DataFrame(columns=['first_seen', 'last_seen'])\n", + "days_period = 7" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2729/4108130129.py:19: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " df_streams_seen = pd.concat([df_streams_seen, pd.DataFrame({ \"first_seen\": [df_first['_timestamp'][0]], \"last_seen\": [df_last['_timestamp'][0]]}, index=[s])], ignore_index=False)\n" + ] + } + ], + "source": [ + "for s in streams:\n", + " # panic... pandas handling conversion anyway\n", + " # firstseen_sql = f\"SELECT date_format(_timestamp, '%Y-%m-%d %H:%M:%S', 'UTC') FROM \\\"{s}\\\" order by _timestamp asc limit 1\"\n", + " # lastseen_sql = f\"SELECT date_format(_timestamp, '%Y-%m-%d %H:%M:%S', 'UTC') FROM \\\"{s}\\\" order by _timestamp desc limit 1\"\n", + " firstseen_sql = f\"SELECT _timestamp FROM \\\"{s}\\\" order by _timestamp asc limit 1\"\n", + " lastseen_sql = f\"SELECT _timestamp FROM \\\"{s}\\\" order by _timestamp desc limit 1\"\n", + " df_first = openobserve_prov.exec_query(\n", + " firstseen_sql,\n", + " start_time=datetime.now() - timedelta(days=days_period),\n", + " end_time=datetime.now() - timedelta(days=0),\n", + " verbosity = 0,\n", + " )\n", + " df_last = openobserve_prov.exec_query(\n", + " lastseen_sql,\n", + " start_time=datetime.now() - timedelta(days=days_period),\n", + " end_time=datetime.now() - timedelta(days=0),\n", + " verbosity = 0,\n", + " )\n", + " df_streams_seen = pd.concat(\n", + " [df_streams_seen,\n", + " pd.DataFrame({ \"first_seen\": [df_first['_timestamp'][0]], \"last_seen\": [df_last['_timestamp'][0]]}, index=[s])\n", + " ],\n", + " ignore_index=False\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_seenlast_seen
default2025-03-12 06:27:19.3807082025-03-18 06:28:48.146779
journald2025-03-11 19:39:40.9595062025-03-18 19:39:41.134215
zeek2025-03-11 19:39:46.5796352025-03-18 19:39:40.746550
webproxy2025-03-11 19:40:16.7813572025-03-18 19:39:16.745473
\n", + "
" + ], + "text/plain": [ + " first_seen last_seen\n", + "default 2025-03-12 06:27:19.380708 2025-03-18 06:28:48.146779\n", + "journald 2025-03-11 19:39:40.959506 2025-03-18 19:39:41.134215\n", + "zeek 2025-03-11 19:39:46.579635 2025-03-18 19:39:40.746550\n", + "webproxy 2025-03-11 19:40:16.781357 2025-03-18 19:39:16.745473" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_streams_seen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "- [OpenObserve REST API](https://openobserve.ai/docs/api/)\n", + "- [Unofficial python-openobserve module](https://github.com/JustinGuese/python-openobserve) with some customizations and enhancements (https://github.com/juju4/python-openobserve/tree/devel-all)\n", + "- Openobserve github discussions: https://github.com/openobserve/openobserve/discussions/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "185.554px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/msticpy/data/core/query_defns.py b/msticpy/data/core/query_defns.py index 46a4314eb..1196014af 100644 --- a/msticpy/data/core/query_defns.py +++ b/msticpy/data/core/query_defns.py @@ -39,6 +39,7 @@ class DataFamily(Enum): Cybereason = 11 Elastic = 14 Prismacloud = 21 + OpenObserve = 22 @classmethod def parse(cls, value: str | int) -> "DataFamily": @@ -119,6 +120,7 @@ class DataEnvironment(Enum): Velociraptor = 18 M365DGraph = 20 Prismacloud = 21 + OpenObserve = 22 MSSentinelSearch = 25 diff --git a/msticpy/data/drivers/__init__.py b/msticpy/data/drivers/__init__.py index 4182ffa8e..71d6f5362 100644 --- a/msticpy/data/drivers/__init__.py +++ b/msticpy/data/drivers/__init__.py @@ -42,6 +42,7 @@ DataEnvironment.M365DGraph: ("mdatp_driver", "MDATPDriver"), DataEnvironment.Prismacloud: ("prismacloud_driver", "PrismaCloudDriver"), DataEnvironment.MSSentinelSearch: ("azure_search_driver", "AzureSearchDriver"), + DataEnvironment.OpenObserve: ("openobserve_driver", "OpenObserveDriver"), } CUSTOM_PROVIDERS: dict[str, type] = {} diff --git a/msticpy/data/drivers/openobserve_driver.py b/msticpy/data/drivers/openobserve_driver.py new file mode 100644 index 000000000..b4a555ebb --- /dev/null +++ b/msticpy/data/drivers/openobserve_driver.py @@ -0,0 +1,426 @@ +# ------------------------------------------------------------------------- +# Copyright (c) juju4. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""OpenObserve Driver class.""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any + +import httpx +import pandas as pd +from python_openobserve.openobserve import OpenObserve + +from ..._version import VERSION +from ...common.exceptions import ( + MsticpyConnectionError, + MsticpyUserConfigError, + MsticpyUserError, +) +from ...common.provider_settings import ProviderSettings, get_provider_settings +from ...common.utility import check_kwargs, export +from .driver_base import DriverBase, DriverProps, QuerySource + +__version__ = VERSION +__author__ = "juju4" + + +OPENOBSERVE_CONNECT_ARGS = { + "connection_str": ( + "(string) The url endpoint (the default is" + " 'https://localhost:5080')." + ), + "user": "(string) OpenObserve user, which is used to authenticate.", + "password": "(string) The matching OpenObserve password.", + "verify": "(bool) Validate TLS certificate (default True).", + "timeout": "(int) requests timeout (default 300).", +} + +_HELP_URI = "https://msticpy.readthedocs.io/en/latest/DataProviders.html" +_SL_NB_URI = ( + "https://github.com/microsoft/msticpy/blob/pr/TODO/docs/" + "notebooks/OpenObserve-DataConnector.ipynb" +) + + +@export +class OpenObserveDriver(DriverBase): + """Driver to connect and query from OpenObserve.""" + + _DEF_TIMEOUT = 300 + _OPENOBSERVE_REQD_ARGS = ["connection_str", "user", "password", "verify"] + _CONNECT_DEFAULTS: dict[str, Any] = { + "connection_str": "https://localhost:5080", + "verify": True, + "timeout": _DEF_TIMEOUT, + } + _TIME_FORMAT = '"%Y-%m-%d %H:%M:%S.%6N"' + + def __init__(self, **kwargs): + """Instantiate OpenObserve Driver.""" + super().__init__(**kwargs) + self.service = None + self._loaded = True + self._connected = False + self._debug = kwargs.get("debug", False) + self.set_driver_property(DriverProps.PUBLIC_ATTRS, {"client": self.service}) + self.set_driver_property(DriverProps.FORMATTERS, {"datetime": self._format_datetime}) + self.timeout = self._DEF_TIMEOUT + + def connect(self, connection_str: str | None = None, **kwargs): + """ + Connect to OpenObserve via python-openobserve. + + Parameters + ---------- + connection_str : str | None + OpenObserve API url endpoint. default: https://localhost:5080 + + Other Parameters + ---------------- + kwargs : + Connection parameters can be supplied as keyword parameters. + + Notes + ----- + Default configuration is read from the DataProviders/OpenObserve + section of msticpyconfig.yaml, if available. + There is not doing an http(s) connection, just filling underlying + class properties. + + """ + cs_dict = self._get_connect_args(connection_str, **kwargs) + + arg_dict = { + key: val for key, val in cs_dict.items() if key in OPENOBSERVE_CONNECT_ARGS + } + try: + # https://github.com/JustinGuese/python-openobserve/blob/main/python_openobserve/openobserve.py#L22 + self.service = OpenObserve( + host=arg_dict["connection_str"], + user=arg_dict["user"], + password=arg_dict["password"], + verify=arg_dict["verify"], + timeout=arg_dict["timeout"], + ) + except httpx.ConnectError as err: + raise MsticpyConnectionError( + f"Authentication error connecting to OpenObserve: {err}", + title="OpenObserve connection", + help_uri=_HELP_URI, + nb_uri=_SL_NB_URI, + ) from err + except httpx.HTTPError as err: + raise MsticpyConnectionError( + f"Communication error connecting to OpenObserve: {err}", + title="OpenObserve connection", + help_uri=_HELP_URI, + nb_uri=_SL_NB_URI, + ) from err + except Exception as err: + raise MsticpyConnectionError( + f"Error connecting to OpenObserve: {err}", + title="OpenObserve connection", + help_uri=_HELP_URI, + nb_uri=_SL_NB_URI, + ) from err + self._connected = True + print(f"connected with user {arg_dict['user']}") + + def _get_connect_args(self, connection_str: str | None = None, **kwargs) -> dict[str, Any]: + """Check and consolidate connection parameters.""" + cs_dict: dict[str, Any] = self._CONNECT_DEFAULTS + # Fetch any config settings + settings, cs_is_instance_name = self._get_openobserve_settings(connection_str) + cs_dict.update(settings) + # If a connection string - parse this and add to config + if connection_str and not cs_is_instance_name: + cs_dict["connection_str"] = connection_str + if kwargs: + # if connection args supplied as kwargs + cs_dict.update(kwargs) + check_kwargs(cs_dict, list(OPENOBSERVE_CONNECT_ARGS.keys())) + + missing_args = set(self._OPENOBSERVE_REQD_ARGS) - cs_dict.keys() + if missing_args: + raise MsticpyUserConfigError( + "One or more connection parameters missing for OpenObserve connector", + ", ".join(missing_args), + f"Required parameters are {', '.join(self._OPENOBSERVE_REQD_ARGS)}", + "All parameters:", + *[f"{arg}: {desc}" for arg, desc in OPENOBSERVE_CONNECT_ARGS.items()], + title="no OpenObserve connection parameters", + help_uri=_HELP_URI, + notebook_uri=_SL_NB_URI, + ) + return cs_dict + + # pylint: disable=broad-except + def _query( + self, query: str, query_source: QuerySource | None = None, **kwargs + ) -> pd.DataFrame | Any: + """ + Execute OpenObserve query and retrieve results. + + Parameters + ---------- + query : str + OpenObserve query to execute + query_source : QuerySource | None + Not used. + + Other Parameters + ---------------- + days : int + Search the past X days. + start : datetime + A datetime() object representing the start of the search + window. If used without end_time, the end of the search + window is the current time. + start_time : datetime + alias for `start` + end : datetime + A datetime() object representing the end of the search window. + If used without start_time, the search start will be the earliest + time in the index. + end_time: + alias for `end` + timezone : str + timezone used for time range search + limit : int + An integer describing the max number of search results to return. + verbosity : int + Provide more verbose state. from 0 least verbose to 4 most one. + timeout : int + timeout in seconds when gathering results + + Returns + ------- + pd.DataFrame | Any + Query results in a dataframe. + or query response if an error. + + """ + del query_source + if not self._connected: + raise self._create_not_connected_err("OpenObserve") + + verbosity = kwargs.pop("verbosity", 0) + timezone = kwargs.pop("timezone", "UTC") + self.timeout = kwargs.pop("timeout", self._DEF_TIMEOUT) + + start_time, end_time = self._get_time_params(**kwargs) + + if "limit" in kwargs and kwargs["limit"] <= 10000 and " limit " not in query.lower(): + limit = kwargs["limit"] + query = f"{query} limit {limit}" + else: + limit = None + + if verbosity >= 1: + print(f"INFO: from {start_time} to {end_time}, TZ {timezone}") + + if verbosity >= 2: + print(f"DEBUG: query {query}") + # submit the search job + try: + searchresults = self.service.search2df( + query, + start_time=start_time, + end_time=end_time, + verbosity=verbosity, + timeout=self.timeout, + ) + except Exception as err: + self._raise_qry_except(err, "search_job", "to search job") + + # return the results + return searchresults + + @staticmethod + def _raise_qry_except(err: Exception, mssg: str, action: str | None = None): + if isinstance(err, httpx.HTTPError): + raise MsticpyConnectionError( + f"Communication error connecting to OpenObserve: {err}", + title=f"OpenObserve {mssg}", + help_uri=_HELP_URI, + notebook_uri=_SL_NB_URI, + ) from err + action = action or mssg + raise MsticpyConnectionError( + f"Failed {action}: {err}", + title=f"OpenObserve - {mssg}", + help_uri=_HELP_URI, + notebook_uri=_SL_NB_URI, + ) from err + + def _get_time_params(self, **kwargs): + if "days" in kwargs: + end = datetime.now() + start = end - timedelta(days=kwargs["days"]) + return self._format_datetime(start), self._format_datetime(end) + + start = kwargs.pop("start", kwargs.pop("start_time", None)) + end = kwargs.pop("end", kwargs.pop("end_time", None)) + if start and not end: + end = datetime.now() + elif not start: + raise MsticpyUserError( + "Error! requires either 'days' or 'start' parameters", + title="Missing parameter.", + help_uri=_HELP_URI, + notebook_uri=_SL_NB_URI, + ) + return self._format_datetime(start), self._format_datetime(end) + + # pylint: disable=too-many-branches + def query( + self, query: str, query_source: QuerySource | None = None, **kwargs + ) -> pd.DataFrame | Any: + """ + Execute OpenObserve query and retrieve results. + + Parameters + ---------- + query : str + OpenObserve query to execute + query_source : QuerySource | None + Not used. + + Other Parameters + ---------------- + days: int + Search the past X days. + start : datetime + A datetime() object representing the start of the search + window. If used without end_time, the end of the search + window is the current time. + start_time : datetime + alias for `start` + end : datetime + A datetime() object representing the end of the search window. + If used without start_time, the search start will be the earliest + time in the index. + end_time : datetime + alias for `end` + timeZone : str + timezone used for time range search + limit : int + An integer describing the max number of search results to return. + verbosity : int + Provide more verbose state. from 0 least verbose to 4 most one. + timeout : int + timeout in seconds when gathering results + exporting : bool + Export result to file. + export_path : str + file path for exporte results. + time_columns: array[string] + returning columns which format should be dataframe timestamp + numeric_columns: array[string] + returning columns which format should be dataframe numeric + + Returns + ------- + pd.DataFrame | Any + Query results in a dataframe. + or query response if an error. + + """ + del query_source + + limit = kwargs.get("limit", None) + verbosity = kwargs.get("verbosity", 0) + exporting = kwargs.pop("exporting", False) + export_path = kwargs.pop("export_path", "") + time_columns = kwargs.pop("time_columns", []) + numeric_columns = kwargs.pop("numeric_columns", []) + + dataframe_res = self._query(query, **kwargs) + if verbosity >= 1: + print(f"DEBUG: results shape {dataframe_res.shape}") + if verbosity >= 3: + print(f"DEBUG: {dataframe_res}") + + if limit is not None and dataframe_res.shape[0] > limit: + dataframe_res = dataframe_res.head(limit) + + for col in dataframe_res.columns: + try: + if col in numeric_columns: + dataframe_res[col] = pd.to_numeric(dataframe_res[col]) + # ensure timestamp format + if col in ["_timestamp"] + time_columns: + dataframe_res[col] = pd.to_datetime(dataframe_res[col]) + + except Exception as err: + self._raise_qry_except( + err, + "query", + f"query column type conversion: {col} -> {dataframe_res[col]}", + ) + + if exporting: + if export_path.endswith(".xlsx"): + if verbosity >= 2: + print(f"DEBUG: Exporting results to excel file {export_path}") + dataframe_res.to_excel(export_path, index=False) + elif export_path.endswith(".csv"): + if verbosity >= 2: + print("DEBUG: Exporting results to csv file {export_path}") + dataframe_res.to_csv(export_path, index=False) + + return dataframe_res.copy() + + def query_with_results(self, query: str, **kwargs) -> tuple[pd.DataFrame, Any]: + """ + Execute query string and return DataFrame of results. + + Parameters + ---------- + query : str + Query to execute against OpenObserve instance. + + Returns + ------- + tuple[pd.DataFrame, Any] + A DataFrame (if successful) or + the underlying provider result if an error occurs. + + """ + raise NotImplementedError(f"Not supported for {self.__class__.__name__}") + + # Parameter Formatting methods + @staticmethod + def _format_datetime(date_time: datetime) -> datetime: + """ + Return datetime-formatted string. + + python-openobserve takes datetime or microseconds since epoch as input + """ + return date_time + + # Read values from configuration + @staticmethod + def _get_openobserve_settings( + instance_name: str | None = None, + ) -> tuple[dict[str, Any], bool]: + """Get config from msticpyconfig.""" + data_provs = get_provider_settings(config_section="DataProviders") + sl_settings = { + name: settings + for name, settings in data_provs.items() + if name.startswith("OpenObserve") + } + openobserve_settings: ProviderSettings | None + # Check if the connection string is an instance name + openobserve_settings = sl_settings.get(f"OpenObserve-{instance_name}") + if openobserve_settings: + is_instance_name = True + else: + # otherwise get the default OpenObserve entry + openobserve_settings = sl_settings.get("OpenObserve") + is_instance_name = False + return getattr(openobserve_settings, "args", {}), is_instance_name diff --git a/msticpy/data/queries/openobserve/openobserve_queries.yaml b/msticpy/data/queries/openobserve/openobserve_queries.yaml new file mode 100644 index 000000000..c378b92f3 --- /dev/null +++ b/msticpy/data/queries/openobserve/openobserve_queries.yaml @@ -0,0 +1,45 @@ +metadata: + version: 1 + description: OpenObserve Queries + data_environments: [OpenObserve] + data_families: [OpenObserveGeneral] + tags: ['alert', 'securityalert', 'process', 'account', 'network'] +defaults: + metadata: + data_source: 'bots' + parameters: + stream: + description: 'OpenObserve stream/index name' + type: str + default: 'default' + start: + description: Query start time + type: datetime + end: + description: Query end time + type: datetime + project_fields: + description: Project Field names + type: str + default: '_timestamp' + add_query_items: + description: Additional query clauses + type: str + default: 'limit 100' + timeformat: + description: 'Datetime format to use in OpenObserve query' + type: str + default: '"%Y-%m-%dT%H:%M:%S.%6N"' +sources: + list_default_log_file_name: + description: Summary of default stream events by log_file_name + metadata: + data_families: [OpenObserveGeneral] + args: + query: 'SELECT os_type,log_file_name,count(*) FROM "default" GROUP BY os_type,log_file_name ORDER BY count(*) desc' + list_journald_units: + description: Summary of journald stream events by hostname and comm/unit process + metadata: + data_families: [OpenObserveGeneral] + args: + query: 'SELECT body__hostname,body__comm,count(*) FROM "journald" GROUP BY body__hostname,body__comm ORDER BY count(*) desc' diff --git a/msticpy/resources/mpconfig_defaults.yaml b/msticpy/resources/mpconfig_defaults.yaml index 74814dc67..135b66c4c 100644 --- a/msticpy/resources/mpconfig_defaults.yaml +++ b/msticpy/resources/mpconfig_defaults.yaml @@ -214,6 +214,12 @@ DataProviders: ClientId: str(required=False, format=uuid) # [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="Test code")] ClientSecret: *cred_key_opt + Openobserve: + Args: + connection_str: str() + user: str() + # [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="Test code")] + password: *cred_key msticpy: FriendlyExceptions: bool(default=True, required=False) QueryDefinitions: list(required=False) diff --git a/mypy.ini b/mypy.ini index 681a70bbc..fa8f61096 100644 --- a/mypy.ini +++ b/mypy.ini @@ -158,3 +158,6 @@ ignore_missing_imports = True [mypy-rrcf.*] ignore_missing_imports = True + +[mypy-python_openobserve.*] +ignore_missing_imports = True diff --git a/requirements-all.txt b/requirements-all.txt index d8c3323ed..2be60514f 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -3,7 +3,7 @@ azure-common>=1.1.18 azure-core>=1.24.0 azure-identity>=1.16.1 azure-keyvault-secrets>=4.0.0 -azure-kusto-data>=4.4.0, <=5.0.0 +azure-kusto-data<7.0.0,>=4.4.0 azure-mgmt-compute>=4.6.2 azure-mgmt-core>=1.2.1 azure-mgmt-keyvault>=2.0.0 @@ -12,55 +12,56 @@ azure-mgmt-network>=2.7.0 azure-mgmt-resource>=16.1.0 azure-mgmt-resourcegraph>=8.0.0 azure-mgmt-subscription>=3.0.0 -azure-monitor-query>=1.0.0, <=2.0.0 +azure-monitor-query<=3.0.0,>=1.0.0 azure-storage-blob>=12.5.0 beautifulsoup4>=4.0.0 bokeh>=3.0.0 cryptography>=43.0.1 deprecated>=1.2.4 -dnspython>=2.0.0, <3.0.0 +dnspython<3.0.0,>=2.0.0 folium>=0.9.0 geoip2>=2.9.0 -httpx>=0.23.0, <1.0.0 html5lib +httpx<1.0.0,>=0.23.0 ipython>=7.23.1 -ipywidgets>=7.4.2, <9.0.0 -jinja2>=3.1.5 # (sec vuln) transitive dependency via multiple packages +ipywidgets<9.0.0,>=7.4.2 +jinja2>=3.1.5 +joblib>=1.3.0 keyring>=13.2.1 lxml>=4.6.5 matplotlib>=3.0.0 -mo-sql-parsing>=11, <12.0.0 +mo-sql-parsing<12.0.0,>=11 msal>=1.12.0 msal_extensions>=0.3.0 msrest>=0.6.0 msrestazure>=0.6.0 nest_asyncio>=1.4.0 networkx>=2.2 -numpy>=1.15.4 # pandas +numpy>=1.15.4 openpyxl>=3.0 packaging>=24.0 -pandas>=1.4.0, <3.0.0 +pandas<3.0.0,>=1.4.0 panel>=1.2.1 passivetotal>=2.5.3 -pydantic>=1.8.0, <3.0.0 +pydantic<3.0.0,>=1.8.0 pygments>=2.0.0 pyjwt>=2.3.0 -python-dateutil>=2.8.1 # pandas -pytz>=2019.2 # pandas +python-dateutil>=2.8.1 +python_openobserve>=0.4.2 +pytz>=2019.2 pyyaml>=3.13 +requests>=2.31.0 +rrcf==0.4.4 scikit-learn>=1.0.0 scipy>=1.1.0 setuptools>=40.6.3 -splunk-sdk>=1.6.0,!=2.0.0 +splunk-sdk!=2.0.0,>=1.6.0 statsmodels>=0.11.1 sumologic-sdk>=0.1.11 tldextract>=2.2.2 -tornado>=6.4.2 # (sec vuln) transitive dependency via bokeh +tornado>=6.4.2 tqdm>=4.36.1 typing-extensions>=4.2.0 urllib3>=1.23 -vt-py>=0.18.0 vt-graph-api>=2.0 -requests>=2.31.0 -rrcf==0.4.4 -joblib>=1.3.0 \ No newline at end of file +vt-py>=0.18.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6a2b3f1d5..88b835cf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,10 @@ azure-common>=1.1.18 azure-core>=1.24.0 azure-identity>=1.16.1 azure-keyvault-secrets>=4.0.0 -azure-kusto-data>=4.4.0, <=5.0.0 +azure-kusto-data>=4.4.0, <7.0.0 azure-mgmt-keyvault>=2.0.0 azure-mgmt-subscription>=3.0.0 -azure-monitor-query>=1.0.0, <=2.0.0 +azure-monitor-query>=1.0.0, <=3.0.0 beautifulsoup4>=4.0.0 bokeh>=3.0.0 cryptography>=43.0.1 diff --git a/setup.py b/setup.py index eebfca367..82f610712 100644 --- a/setup.py +++ b/setup.py @@ -52,10 +52,9 @@ def _combine_extras(extras: list) -> list: "sql2kql": ["mo-sql-parsing>=11, <12.0.0"], "riskiq": ["passivetotal>=2.5.3", "requests>=2.31.0"], "panel": [], # now in core install + "openobserve": ["python_openobserve>=0.4.2"], } -extras_all = [ - extra for name, extras in EXTRAS.items() for extra in extras if name != "dev" -] +extras_all = [extra for name, extras in EXTRAS.items() for extra in extras if name != "dev"] EXTRAS["all"] = extras_all # Create combination extras diff --git a/tests/data/drivers/test_openobserve_driver.py b/tests/data/drivers/test_openobserve_driver.py new file mode 100644 index 000000000..1d5f72c61 --- /dev/null +++ b/tests/data/drivers/test_openobserve_driver.py @@ -0,0 +1,190 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- +"""OpenObserve query test class.""" +# pylint: disable=missing-function-docstring,redefined-outer-name,unused-argument +from datetime import datetime, timedelta, timezone +from pathlib import Path +from unittest.mock import patch + +import pandas as pd +import pytest +import pytest_check as check + +from msticpy.common.exceptions import ( + MsticpyConnectionError, + MsticpyNotConnectedError, + MsticpyUserConfigError, + MsticpyUserError, +) + +_OPEN_OBSERVE_NOT_LOADED = True +try: + from msticpy.data.drivers.openobserve_driver import OpenObserveDriver + + _OPEN_OBSERVE_NOT_LOADED = False +except ImportError: + pass + +UTC = timezone.utc +OO_HOST = OO_USER = OO_PASS = "MOCK_INPUT" + + +def mock_post(*args, **kwargs): + """MockResponse function for openobserve calls of requests.post.""" + url = args[0] + + class MockResponse: + """MockResponse class for openobserve calls of requests.post.""" + + def __init__(self, json_data, status_code): + self.json_data = json_data + self.status_code = status_code + + def json(self): + return self.json_data + + if "/api/default/_search" in url: + return MockResponse( + { + "took": 155, + "hits": [ + { + "_p": "F", + "_timestamp": 1674213225158000, + "log": ( + "[2023-01-20T11:13:45Z INFO actix_web::middleware::logger] " + '10.2.80.192 "POST /api/demo/_bulk HTTP/1.1" 200 68 "-" ' + '"go-resty/2.7.0 (https://github.com/go-resty/resty)" 0.001074', + ), + "stream": "stderr", + } + ], + "total": 27179431, + "from": 0, + "size": 1, + "scan_size": 28943, + }, + 200, + ) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +def test_openobserve_connect_no_params(): + """Check failure with no args.""" + openobserve_driver = OpenObserveDriver() + check.is_true(openobserve_driver.loaded) + + with pytest.raises(MsticpyUserConfigError) as mp_ex: + openobserve_driver.connect() + check.is_false(openobserve_driver.connected) + check.is_in("no OpenObserve connection parameters", mp_ex.value.args) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +def test_openobserve_connect_req_params(): + """Check load/connect success with required params.""" + openobserve_driver = OpenObserveDriver() + check.is_true(openobserve_driver.loaded) + + openobserve_driver.connect( + connection_str="https://localhost:5080", + user="***", + password="***", + ) # nosec + check.is_true(openobserve_driver.connected) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +def test_openobserve_connect_errors(): + """Check connect failure errors.""" + openobserve_driver = OpenObserveDriver() + check.is_true(openobserve_driver.loaded) + openobserve_driver.connect( + connection_str="invalid", user="***", password="***" + ) # nosec + + with pytest.raises(MsticpyConnectionError) as mp_ex: + openobserve_driver.query('select * from "default"', days=1) + check.is_in( + ( + "Failed to search job: Invalid URL 'invalid/api/default/_search': " + "No scheme supplied. Perhaps you meant https://invalid/api/default/_search?" + ), + mp_ex.value.args, + ) + + openobserve_driver = OpenObserveDriver() + openobserve_driver.connect( + connection_str="https://nonexistent.example.com", user="***", password="***" + ) # nosec + with pytest.raises( + MsticpyConnectionError, + match="Max retries exceeded with url:", + ): + openobserve_driver.query('select * from "default"', days=1) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +def test_openobserve_query_no_connect(): + """Check query fails when not connected true.""" + openobserve_driver = OpenObserveDriver() + # trying to get these before connecting should throw + with pytest.raises(MsticpyNotConnectedError) as mp_ex: + openobserve_driver.query("some query") + check.is_false(openobserve_driver.connected) + check.is_in("not connected to OpenObserve.", mp_ex.value.args) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +@patch("requests.post", side_effect=mock_post) +def test_openobserve_query(mock_post): + """Check queries with different outcomes.""" + openobserve_drv = OpenObserveDriver() + openobserve_drv.connect(connection_str=OO_HOST, user=OO_USER, password=OO_PASS) + end = datetime.now(UTC) + start = end - timedelta(1) + + df_result = openobserve_drv.query( + "RandomQuery", start=start, end=end, timeout=1, verbosity=4 + ) + check.is_instance(df_result, pd.DataFrame) + check.equal(len(df_result), 1) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +@patch("requests.post", side_effect=mock_post) +def test_openobserve_query_params(mock_post): + """Check queries with different parameters.""" + openobserve_drv = OpenObserveDriver() + openobserve_drv.connect(connection_str=OO_HOST, user=OO_USER, password=OO_PASS) + + df_result = openobserve_drv.query("RecordSuccess", days=1) + check.is_instance(df_result, pd.DataFrame) + check.equal(len(df_result), 1) + + with pytest.raises(MsticpyUserError) as mp_ex: + df_result = openobserve_drv.query("RecordSuccess") + check.is_in("Missing parameter.", mp_ex.value.args) + + +@pytest.mark.skipif(_OPEN_OBSERVE_NOT_LOADED, reason="OpenObserve driver not installed") +@patch("requests.post", side_effect=mock_post) +def test_openobserve_query_export(mock_post, tmpdir): + """Check queries with different parameters.""" + openobserve_drv = OpenObserveDriver() + openobserve_drv.connect(connection_str=OO_HOST, user=OO_USER, password=OO_PASS) + ext = "csv" + exp_file = f"openobserve_test.{ext}" + f_path = tmpdir.join(exp_file) + params = { + "exporting": True, + "export_path": str(f_path), + "verbosity": 5, + } + df_result = openobserve_drv.query("RecordSuccess", days=1, **params) + check.is_instance(df_result, pd.DataFrame) + check.equal(len(df_result), 1) + check.is_true(Path(f_path).is_file()) diff --git a/tests/test_pkg_imports.py b/tests/test_pkg_imports.py index fbc8885f3..a1601b27c 100644 --- a/tests/test_pkg_imports.py +++ b/tests/test_pkg_imports.py @@ -53,6 +53,7 @@ "kqlmagiccustom", "sumologic-sdk", "openpyxl", + "python-openobserve", } diff --git a/tools/create_reqs_all.py b/tools/create_reqs_all.py index da937b9f7..df53c3470 100644 --- a/tools/create_reqs_all.py +++ b/tools/create_reqs_all.py @@ -4,6 +4,7 @@ # license information. # -------------------------------------------------------------------------- """Requirements file writer from setup.py extras.""" + from __future__ import annotations import argparse @@ -12,16 +13,18 @@ from importlib import import_module from pathlib import Path +# Import Requirement with fallbacks for isolated environments (e.g., pre-commit) try: from packaging.requirements import Requirement except ImportError: - # Fallback for older environments try: - from importlib_metadata import Requirement + from importlib_metadata import Requirement # type: ignore[assignment] except ImportError: - # Last resort fallback - # pylint: disable=deprecated-module - from pkg_resources import Requirement + # Suppress deprecation warning in isolated environments where we have no choice + import warnings + + warnings.filterwarnings("ignore", ".*pkg_resources.*", DeprecationWarning) + from pkg_resources import Requirement # type: ignore[assignment] from setuptools.config import read_configuration @@ -65,9 +68,7 @@ def parse_requirements(requirements: list[str]) -> list[Requirement]: def _add_script_args(): """Define script arguments.""" - parser = argparse.ArgumentParser( - description=f"Requirements sync script. v.{VERSION}" - ) + parser = argparse.ArgumentParser(description=f"Requirements sync script. v.{VERSION}") parser.add_argument( "--req-all-path", "-r", @@ -86,7 +87,7 @@ def _add_script_args(): parser.add_argument( "--pyver", "-y", - default="3.8", + default="3.10", required=False, help="Python version to use in the generated Pipfile", ) @@ -101,7 +102,7 @@ def _add_script_args(): "--diff", "-d", required=False, - default=True, + default=False, action="store_true", help="Print diffs, don't write file.", ) @@ -147,7 +148,7 @@ def _compare_reqs(new: list[Requirement], current: list[Requirement]) -> list[st def _write_requirements(file_name: str, requirements: list[Requirement]) -> None: """Write requirements file.""" Path(file_name).write_text( - "\n".join(str(req) for req in requirements), encoding="utf-8" + "\n".join(str(req) for req in sorted(requirements, key=str)), encoding="utf-8" ) @@ -157,9 +158,7 @@ def _get_pyver_from_setup(setup_cfg: str = "setup.cfg") -> str: return str(settings["options"]["python_requires"]) -def _create_pipfile( - reqs: list[Requirement], reqs_dev: list[Requirement], py_ver: str -) -> str: +def _create_pipfile(reqs: list[Requirement], reqs_dev: list[Requirement], py_ver: str) -> str: """Return the text of a Pipfile.""" packages = [f'{req.name} = "{req.specifier}"' for req in reqs] dev_packages = [f'{req.name} = "{req.specifier}"' for req in reqs_dev] @@ -195,13 +194,11 @@ def _get_extras_from_setup( """ setup_mod = import_module("setup") - extras = getattr(setup_mod, "EXTRAS").get(extra) + extras = setup_mod.EXTRAS.get(extra) if include_base: - base_install = getattr(setup_mod, "INSTALL_REQUIRES") - extras.extend( - [req.strip() for req in base_install if not req.strip().startswith("#")] - ) - return list(parse_requirements(sorted(list(set(extras)), key=str.casefold))) + base_install = setup_mod.INSTALL_REQUIRES + extras.extend([req.strip() for req in base_install if not req.strip().startswith("#")]) + return list(parse_requirements(sorted(set(extras), key=str.casefold))) # pylint: disable=invalid-name @@ -246,9 +243,7 @@ def _get_extras_from_setup( _write_requirements(file_name=args.req_all_path, requirements=all_reqs) # We may need to create and write a Pipfile - if args.pipfile and diff_reqs or not Path(args.pipfile).is_file(): - pipfile_text = _create_pipfile( - reqs=all_reqs, reqs_dev=dev_reqs, py_ver=args.pyver - ) + if args.pipfile and (diff_reqs or not Path("Pipfile").is_file()): + pipfile_text = _create_pipfile(reqs=all_reqs, reqs_dev=dev_reqs, py_ver=args.pyver) Path("Pipfile").write_text(pipfile_text, encoding="utf-8") sys.exit(0) diff --git a/tools/toollib/import_analyzer.py b/tools/toollib/import_analyzer.py index f763dbd05..ee62b7947 100644 --- a/tools/toollib/import_analyzer.py +++ b/tools/toollib/import_analyzer.py @@ -63,6 +63,7 @@ def __init__(self): "dateutil": "python-dateutil", "splunklib": "splunk-sdk", "sumologic": "sumologic-sdk", + "openobserve": "python-openobserve", "vt": "vt-py", "kqlmagic": "KqlmagicCustom", "jwt": "pyjwt",