Skip to content

Commit 8a3a2fe

Browse files
committed
[feat] Genesis of cloud2sql
1 parent f3e07e8 commit 8a3a2fe

17 files changed

+949
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ share/python-wheels/
2727
*.egg
2828
MANIFEST
2929

30+
# IDEs
31+
.idea/
32+
.vscode/
33+
*.iml
34+
3035
# PyInstaller
3136
# Usually these files are written by a python script from a template
3237
# before PyInstaller builds the exe, so as to inject date/other infos into it.

Makefile

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
.PHONY: clean clean-test clean-pyc clean-build clean-env docs help setup
2+
.DEFAULT_GOAL := help
3+
.SILENT: clean clean-build clean-pyc clean-test setup
4+
5+
define BROWSER_PYSCRIPT
6+
import os, webbrowser, sys
7+
8+
from urllib.request import pathname2url
9+
10+
webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
11+
endef
12+
export BROWSER_PYSCRIPT
13+
14+
define PRINT_HELP_PYSCRIPT
15+
import re, sys
16+
17+
for line in sys.stdin:
18+
match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
19+
if match:
20+
target, help = match.groups()
21+
print("%-20s %s" % (target, help))
22+
endef
23+
export PRINT_HELP_PYSCRIPT
24+
25+
BROWSER := python -c "$$BROWSER_PYSCRIPT"
26+
27+
help:
28+
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
29+
30+
clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
31+
32+
clean-build: ## remove build artifacts
33+
rm -fr build/
34+
rm -fr out/
35+
rm -fr gen/
36+
rm -fr dist/
37+
rm -fr .eggs/
38+
rm -fr .hypothesis/
39+
rm -fr .mypy_cache/
40+
find . -name '*.egg-info' -exec rm -fr {} +
41+
find . -name '*.egg' -exec rm -fr {} +
42+
43+
clean-pyc: ## remove Python file artifacts
44+
find . -name '*.pyc' -exec rm -f {} +
45+
find . -name '*.pyo' -exec rm -f {} +
46+
find . -name '*~' -exec rm -f {} +
47+
find . -name '__pycache__' -exec rm -fr {} +
48+
49+
clean-test: ## remove test and coverage artifacts
50+
rm -fr .tox/
51+
rm -f .coverage
52+
rm -fr htmlcov/
53+
rm -fr .pytest_cache
54+
55+
clean-env: ## remove environment
56+
rm -fr venv
57+
58+
lint: ## static code analysis
59+
black --line-length 120 --check cloud2sql tests
60+
flake8 cloud2sql
61+
mypy --python-version 3.9 --strict cloud2sql tests
62+
63+
test: ## run tests quickly with the default Python
64+
pytest
65+
66+
test-all: ## run tests on every Python version with tox
67+
tox
68+
69+
coverage: ## check code coverage quickly with the default Python
70+
coverage run --source cloud2sql -m pytest
71+
coverage combine
72+
coverage report -m
73+
coverage html
74+
$(BROWSER) htmlcov/index.html
75+
76+
venv:
77+
python3 -m venv venv --prompt "cloud2sql"
78+
. ./venv/bin/activate && python3 -m pip install --upgrade pip
79+
# region as long as version 3 is not available on pypi
80+
. ./venv/bin/activate && pip install -e ../resoto/resotolib
81+
. ./venv/bin/activate && pip install -e ../resoto/plugins/aws
82+
. ./venv/bin/activate && pip install -e ../resoto/plugins/digitalocean
83+
. ./venv/bin/activate && pip install -e ../resoto/plugins/gcp
84+
. ./venv/bin/activate && pip install -e ../resoto/plugins/k8s
85+
. ./venv/bin/activate && pip install -e ../resoto/plugins/example_collector
86+
# end region
87+
. ./venv/bin/activate && pip install -r requirements-test.txt
88+
. ./venv/bin/activate && pip install -r requirements.txt
89+
. ./venv/bin/activate && pip install -e .
90+
. ./venv/bin/activate && mypy --install-types --non-interactive cloud2sql tests
91+
92+
setup: clean clean-env venv
93+
94+
list-outdated:
95+
pip list --outdated
96+
97+
install-latest:
98+
pip list --outdated --format=freeze | grep -v '^\-e' | cut -d = -f 1 | xargs -n1 pip install -U

cloud2sql/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
"""
2+
Cloud2Sql
3+
~~~~~~~~~~~~~~~~~~~~~~~~~~
4+
Read infrastructure data from your cloud and export it to an SQL database.
5+
:copyright: © 2022 Some Engineering Inc.
6+
:license: Apache 2.0, see LICENSE for more details.
7+
"""
8+
9+
__title__ = "cloud2sql"
10+
__description__ = "Read infrastructure data from your cloud and export it to an SQL database."
11+
__author__ = "Some Engineering Inc."
12+
__license__ = "Apache 2.0"
13+
__copyright__ = "Copyright © 2022 Some Engineering Inc."

cloud2sql/__main__.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from logging import getLogger
2+
3+
from resotolib.args import Namespace, ArgumentParser
4+
from resotolib.logger import setup_logger
5+
from sqlalchemy import create_engine, Engine
6+
7+
from cloud2sql.collect_plugins import collect_from_plugins
8+
9+
log = getLogger("cloud2sql")
10+
11+
12+
def parse_args() -> Namespace:
13+
parser = ArgumentParser(epilog="Collect data from cloud providers and store it in a database")
14+
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
15+
parser.add_argument("--config", help="Path to config file", required=True)
16+
parser.add_argument(
17+
"--show",
18+
choices=["progress", "log", "none"],
19+
default="progress",
20+
help="Output to show during the process. Default: progress",
21+
)
22+
parser.add_argument(
23+
"--db",
24+
help="The database url. See https://docs.sqlalchemy.org/en/20/core/engines.html.",
25+
required=True,
26+
)
27+
args = parser.parse_args()
28+
args.log_level = "CRITICAL" if args.show != "log" else "DEBUG" if args.debug else "INFO"
29+
return args # type: ignore
30+
31+
32+
def collect(engine: Engine, args: Namespace) -> None:
33+
try:
34+
collect_from_plugins(engine, args)
35+
except Exception as e:
36+
log.error("Error during collection", e)
37+
print(f"Error syncing data to database: {e}")
38+
39+
40+
def main() -> None:
41+
args = parse_args()
42+
setup_logger("cloud2sql", level=args.log_level, force=True)
43+
engine = create_engine(args.db)
44+
collect(engine, args)
45+
46+
47+
if __name__ == "__main__":
48+
main()

cloud2sql/collect_plugins.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import concurrent
2+
import multiprocessing
3+
from concurrent.futures import ThreadPoolExecutor, Future
4+
from contextlib import suppress
5+
from logging import getLogger
6+
from queue import Queue
7+
from threading import Event
8+
from time import sleep
9+
from typing import Dict, Optional, List, Any
10+
11+
import pkg_resources
12+
import yaml
13+
from resotoclient import Kind, Model
14+
from resotolib.args import Namespace
15+
from resotolib.baseplugin import BaseCollectorPlugin
16+
from resotolib.baseresources import BaseResource
17+
from resotolib.config import Config
18+
from resotolib.core.actions import CoreFeedback
19+
from resotolib.core.model_export import node_to_dict
20+
from resotolib.json import from_json
21+
from resotolib.proc import emergency_shutdown
22+
from resotolib.types import Json
23+
from rich import print as rich_print
24+
from rich.live import Live
25+
from sqlalchemy import Engine
26+
27+
from cloud2sql.show_progress import CollectInfo
28+
from cloud2sql.sql import SqlModel, SqlUpdater
29+
30+
log = getLogger("cloud2sql")
31+
32+
33+
def collectors(raw_config: Json, feedback: CoreFeedback) -> Dict[str, BaseCollectorPlugin]:
34+
result = {}
35+
config: Config = Config # type: ignore
36+
for entry_point in pkg_resources.iter_entry_points("resoto.plugins"):
37+
plugin_class = entry_point.load()
38+
if issubclass(plugin_class, BaseCollectorPlugin) and plugin_class.cloud in raw_config:
39+
log.info(f"Found collector {plugin_class.cloud} ({plugin_class.__name__})")
40+
plugin_class.add_config(config)
41+
plugin = plugin_class()
42+
if hasattr(plugin, "core_feedback"):
43+
setattr(plugin, "core_feedback", feedback.with_context(plugin.cloud))
44+
result[plugin_class.cloud] = plugin
45+
46+
Config.init_default_config()
47+
Config.running_config.data = {**Config.running_config.data, **Config.read_config(raw_config)}
48+
return result
49+
50+
51+
def configure(path_to_config: Optional[str]) -> Json:
52+
# Config.init_default_config()
53+
if path_to_config:
54+
with open(path_to_config) as f:
55+
return yaml.safe_load(f) # type: ignore
56+
return {}
57+
58+
59+
def collect(collector: BaseCollectorPlugin, engine: Engine, feedback: CoreFeedback, args: Namespace) -> None:
60+
# collect cloud data
61+
feedback.progress_done(collector.cloud, 0, 1)
62+
collector.collect()
63+
# read the kinds created from this collector
64+
kinds = [from_json(m, Kind) for m in collector.graph.export_model(walk_subclasses=False)]
65+
model = SqlModel(Model({k.fqn: k for k in kinds}))
66+
node_edge_count = len(collector.graph.nodes) + len(collector.graph.edges)
67+
ne_current = 0
68+
progress_update = 5000
69+
feedback.progress_done("sync_db", 0, node_edge_count, context=[collector.cloud])
70+
with engine.connect() as conn:
71+
# create the ddl metadata from the kinds
72+
model.create_schema(conn, args)
73+
# ingest the data
74+
updater = SqlUpdater(model)
75+
node: BaseResource
76+
for node in collector.graph.nodes:
77+
node._graph = collector.graph
78+
exported = node_to_dict(node)
79+
exported["type"] = "node"
80+
exported["ancestors"] = {
81+
"cloud": {"reported": {"id": node.cloud().name}},
82+
"account": {"reported": {"id": node.account().name}},
83+
"region": {"reported": {"id": node.region().name}},
84+
"zone": {"reported": {"id": node.zone().name}},
85+
}
86+
stmt = updater.insert_node(exported)
87+
if stmt is not None:
88+
conn.execute(stmt)
89+
ne_current += 1
90+
if ne_current % progress_update == 0:
91+
feedback.progress_done("sync_db", ne_current, node_edge_count, context=[collector.cloud])
92+
for from_node, to_node, _ in collector.graph.edges:
93+
stmt = updater.insert_node({"from": from_node.chksum, "to": to_node.chksum, "type": "edge"})
94+
if stmt is not None:
95+
conn.execute(stmt)
96+
ne_current += 1
97+
if ne_current % progress_update == 0:
98+
feedback.progress_done("sync_db", ne_current, node_edge_count, context=[collector.cloud])
99+
# commit all the changes to the tmp tables
100+
conn.commit()
101+
feedback.progress_done(collector.cloud, 1, 1)
102+
103+
104+
def show_messages(core_messages: Queue[Json], end: Event) -> None:
105+
info = CollectInfo()
106+
while not end.is_set():
107+
with Live(info.render(), auto_refresh=False, transient=True) as live:
108+
with suppress(Exception):
109+
info.handle_message(core_messages.get(timeout=1))
110+
live.update(info.render())
111+
for message in info.rendered_messages():
112+
rich_print(message)
113+
114+
115+
def collect_from_plugins(engine: Engine, args: Namespace) -> None:
116+
# the multiprocessing manager is used to share data between processes
117+
mp_manager = multiprocessing.Manager()
118+
core_messages: Queue[Json] = mp_manager.Queue()
119+
feedback = CoreFeedback("cloud2sql", "collect", "collect", core_messages)
120+
raw_config = configure(args.config)
121+
all_collectors = collectors(raw_config, feedback)
122+
end = Event()
123+
with ThreadPoolExecutor(max_workers=4) as executor:
124+
try:
125+
if args.show == "progress":
126+
executor.submit(show_messages, core_messages, end)
127+
futures: List[Future[Any]] = []
128+
for collector in all_collectors.values():
129+
futures.append(executor.submit(collect, collector, engine, feedback, args))
130+
for future in concurrent.futures.as_completed(futures):
131+
future.result()
132+
# when all collectors are done, we can swap all temp tables
133+
SqlModel.swap_temp_tables(engine)
134+
except Exception as e:
135+
# set end and wait for live to finish, otherwise the cursor is not reset
136+
end.set()
137+
sleep(1)
138+
log.error("An error occurred", exc_info=e)
139+
print(f"Encountered Error. Giving up. {e}")
140+
emergency_shutdown()
141+
finally:
142+
end.set()

cloud2sql/collect_resoto.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from resotoclient import ResotoClient
2+
from resotolib.args import Namespace
3+
from sqlalchemy import Engine
4+
5+
from cloud2sql.sql import SqlModel, SqlUpdater
6+
7+
8+
def collect_from_resoto(engine: Engine, args: Namespace) -> None:
9+
with ResotoClient("https://localhost:8900", None) as client:
10+
model = SqlModel(client.model())
11+
12+
updater = SqlUpdater(model)
13+
14+
with engine.connect() as conn:
15+
model.create_schema(conn, args)
16+
for nd in client.search_graph("id(root) -[0:]->"):
17+
stmt = updater.insert_node(nd)
18+
if stmt is not None:
19+
conn.execute(stmt)
20+
conn.commit()

0 commit comments

Comments
 (0)