feat: Use date and datetime for related kinds (#42)

aquamatthias · web-flow · commit 82588d514ebd · 2023-04-14T13:12:09.000+02:00
* feat: Use date and datetime for related kinds

* downgrade boto

* make it 3.9 compatible
diff --git a/cloud2sql/collect_plugins.py b/cloud2sql/collect_plugins.py
@@ -76,21 +76,21 @@ def collectors(raw_config: Json, feedback: CoreFeedback) -> Dict[str, BaseCollec
 
 def configure(path_to_config: Optional[str]) -> Json:
     # at least one key should be present
-    def require(keys: List[str], obj: Json, msg: str):
+    def require(keys: List[str], obj: Json, msg: str) -> None:
         if not (set(keys) & obj.keys()):
             raise ValueError(msg)
 
     config = {}
     if path_to_config:
         with open(path_to_config) as f:
-            config = yaml.safe_load(f)  # type: ignore
+            config = yaml.safe_load(f)
 
     if "sources" not in config:
         raise ValueError("No sources configured")
     if "destinations" not in config:
         raise ValueError("No destinations configured")
 
-    def validate_arrow_config(config: Json):
+    def validate_arrow_config(config: Json) -> None:
         require(["format"], config, "No format configured for arrow destination")
         if not config["format"] in ["parquet", "csv"]:
             raise ValueError("Format must be either parquet or csv")
@@ -202,7 +202,7 @@ def collect_to_file(
     # ingest the data
     writer = ArrowWriter(model, output_config)
     node: BaseResource
-    for node in sorted(collector.graph.nodes, key=lambda n: n.kind):
+    for node in sorted(collector.graph.nodes, key=lambda n: n.kind):  # type: ignore
         exported = prepare_node(node, collector)
         writer.insert_node(exported)
         ne_current += 1
diff --git a/cloud2sql/show_progress.py b/cloud2sql/show_progress.py
@@ -66,4 +66,4 @@ def walk_node(nid: str, node: Dict[str, Any], rt: Optional[RichTree] = None) ->
                 walk_node(nid, child, sub)
         return sub
 
-    return walk_node(progress.sub_tree.root, progress.sub_tree.to_dict(with_data=True))
+    return walk_node(progress.sub_tree.root, progress.sub_tree.to_dict(with_data=True))  # type: ignore
diff --git a/cloud2sql/sql.py b/cloud2sql/sql.py
@@ -1,16 +1,30 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import List, Any, Type, Tuple, Dict, Iterator
+from datetime import datetime, date
+from typing import List, Any, Type, Tuple, Dict, Iterator, Optional
 
 from resotoclient.models import Kind, Model
 from resotolib.args import Namespace
 from resotolib.types import Json
-from sqlalchemy import Boolean, Column, Float, Integer, JSON, MetaData, String, Table, DDL
-from sqlalchemy.engine import Engine, Connection
+from resotolib.utils import UTC_Date_Format
+from sqlalchemy import (
+    Boolean,
+    Column,
+    Float,
+    Integer,
+    JSON,
+    MetaData,
+    String,
+    Table,
+    DDL,
+    DateTime,
+    Date,
+    TypeDecorator,
+)
+from sqlalchemy.engine import Engine, Connection, Dialect
 from sqlalchemy.sql.ddl import DropTable, DropConstraint
 from sqlalchemy.sql.dml import ValuesBase
 
-from cloud2sql.util import value_in_path
 from cloud2sql.schema_utils import (
     base_kinds,
     temp_prefix,
@@ -19,10 +33,41 @@
     get_link_table_name,
     kind_properties,
 )
+from cloud2sql.util import value_in_path
 
 log = logging.getLogger("resoto.cloud2sql")
 
 
+class DateTimeString(TypeDecorator):  # type: ignore
+    """
+    This type decorator translates between string (python) and datetime (sqlalchemy) types.
+    """
+
+    impl = DateTime
+    cache_ok = True
+
+    def process_bind_param(self, value: Optional[str], dialect: Dialect) -> Optional[datetime]:
+        return datetime.strptime(value, UTC_Date_Format) if value else None
+
+    def process_result_value(self, value: Optional[datetime], dialect: Dialect) -> Optional[str]:
+        return value.strftime(UTC_Date_Format) if value else None
+
+
+class DateString(TypeDecorator):  # type: ignore
+    """
+    This type decorator translates between string (python) and date (sqlalchemy) types.
+    """
+
+    impl = Date
+    cache_ok = True
+
+    def process_bind_param(self, value: Optional[str], dialect: Dialect) -> Optional[date]:
+        return date.fromisoformat(value) if value else None
+
+    def process_result_value(self, value: Optional[datetime], dialect: Dialect) -> Optional[str]:
+        return value.strftime("%Y-%m-%d") if value else None
+
+
 def sql_kind_to_column_type(kind_name: str, model: Model) -> Any:  # Type[TypeEngine[Any]]
     kind = model.kinds.get(kind_name)
     if "[]" in kind_name:
@@ -33,11 +78,15 @@ def sql_kind_to_column_type(kind_name: str, model: Model) -> Any:  # Type[TypeEn
         return JSON
     elif kind_name in ("int32", "int64"):
         return Integer
-    elif kind_name in "float":
+    elif kind_name == "float":
         return Float
-    elif kind_name in "double":
+    elif kind_name == "double":
         return Float  # use Double with sqlalchemy 2
-    elif kind_name in ("string", "date", "datetime", "duration"):
+    elif kind_name == "datetime":
+        return DateTimeString
+    elif kind_name == "date":
+        return DateString
+    elif kind_name in ("string", "duration"):
         return String
     elif kind_name == "boolean":
         return Boolean
diff --git a/requirements-parquet.txt b/requirements-parquet.txt
@@ -1,3 +1,3 @@
 pyarrow==11.0.0
-google-cloud-storage==2.7.0
+google-cloud-storage==2.8.0
 boto3>=1.26.61
diff --git a/requirements-snowflake.txt b/requirements-snowflake.txt
@@ -1 +1 @@
-snowflake-sqlalchemy==1.4.6
+snowflake-sqlalchemy==1.4.7
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -1,9 +1,9 @@
 # test dependencies
-pytest==7.2.2
-black==23.1.0
+pytest==7.3.0
+black==23.3.0
 flake8>=6.0.0
-mypy==1.1.1
-tox==4.4.6
+mypy==1.2.0
+tox==4.4.12
 wheel>=0.38.4
-coverage==7.2.1
-resoto-plugin-example-collector==3.2.5
+coverage==7.2.3
+resoto-plugin-example-collector>=3.3, <3.4
diff --git a/requirements.txt b/requirements.txt
@@ -8,9 +8,9 @@ resotoclient>=1.2.1
 posthog>=2.2.0
 requests>=2.28.2
 
-resotolib>=3.2.5, <3.3
+resotolib>=3.3, <3.4
 # all collector plugins
-resoto-plugin-aws>=3.2.5, <3.3
-resoto-plugin-digitalocean>=3.2.5, <3.3
-resoto-plugin-gcp>=3.2.5, <3.3
-resoto-plugin-k8s>=3.2.5, <3.3
+resoto-plugin-aws>=3.3, <3.4
+resoto-plugin-digitalocean>=3.3, <3.4
+resoto-plugin-gcp>=3.3, <3.4
+resoto-plugin-k8s>=3.3, <3.4
diff --git a/tests/arrow/model_test.py b/tests/arrow/model_test.py
@@ -34,7 +34,7 @@ def test_create_schema(model: Model) -> None:
 
 
 def test_update(parquet_writer: ArrowWriter) -> None:
-    parquet_writer.insert_node(  # type: ignore
+    parquet_writer.insert_node(
         {
             "type": "node",
             "id": "i-123",
@@ -53,7 +53,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:
             },
         }
     )
-    parquet_writer.insert_node(  # type: ignore
+    parquet_writer.insert_node(
         {
             "type": "node",
             "id": "v-123",
@@ -71,7 +71,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:
             },
         }
     )
-    parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})  # type: ignore
+    parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})
 
     # one instance is persisted
     assert set(parquet_writer.batches["some_instance"].rows[0].values()) == {
@@ -102,7 +102,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:
     assert set(parquet_writer.batches["link_some_instance_some_volume"].rows[0].values()) == {"i-123", "v-123"}
 
     # write the batch when the batch size is reached
-    parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})  # type: ignore
+    parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})
     assert len(parquet_writer.batches["link_some_instance_some_volume"].rows) == 0
 
     # flush the batches and close the writer
diff --git a/tests/arrow/writer_test.py b/tests/arrow/writer_test.py
@@ -41,7 +41,7 @@ def test_normalize() -> None:
 
     normalize(NormalizationPath(path=["bar", None], convert_to=ParquetMap(convert_values_to_str=True)), object)
 
-    assert object["bar"] == [[("a", "b"), ("c", "d")], [("a", "b"), ("c", "d")]]
+    assert object["bar"] == [[("a", "b"), ("c", "d")], [("a", "b"), ("c", "d")]]  # type: ignore
 
     normalize(NormalizationPath(path=["foobar"], convert_to=ParquetString()), object)
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,5 +1,5 @@
 from queue import Queue
-from typing import List
+from typing import List, Iterator
 
 from resotoclient.models import Model, Kind, Property
 from pytest import fixture
@@ -71,7 +71,7 @@ def updater(model: Model) -> SqlDefaultUpdater:
 
 
 @fixture()
-def parquet_writer(model: Model):
+def parquet_writer(model: Model) -> Iterator[ArrowWriter]:
     parquet_model = ArrowModel(model, "parquet")
     parquet_model.create_schema([])
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-snowflake-sqlalchemy==1.4.6`
	`1`	`+snowflake-sqlalchemy==1.4.7`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ def test_create_schema(model: Model) -> None:`
`34`	`34`
`35`	`35`
`36`	`36`	`def test_update(parquet_writer: ArrowWriter) -> None:`
`37`		`- parquet_writer.insert_node( # type: ignore`
	`37`	`+ parquet_writer.insert_node(`
`38`	`38`	`{`
`39`	`39`	`"type": "node",`
`40`	`40`	`"id": "i-123",`
`@@ -53,7 +53,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:`
`53`	`53`	`},`
`54`	`54`	`}`
`55`	`55`	`)`
`56`		`- parquet_writer.insert_node( # type: ignore`
	`56`	`+ parquet_writer.insert_node(`
`57`	`57`	`{`
`58`	`58`	`"type": "node",`
`59`	`59`	`"id": "v-123",`
`@@ -71,7 +71,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:`
`71`	`71`	`},`
`72`	`72`	`}`
`73`	`73`	`)`
`74`		`- parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"}) # type: ignore`
	`74`	`+ parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})`
`75`	`75`
`76`	`76`	`# one instance is persisted`
`77`	`77`	`assert set(parquet_writer.batches["some_instance"].rows[0].values()) == {`
`@@ -102,7 +102,7 @@ def test_update(parquet_writer: ArrowWriter) -> None:`
`102`	`102`	`assert set(parquet_writer.batches["link_some_instance_some_volume"].rows[0].values()) == {"i-123", "v-123"}`
`103`	`103`
`104`	`104`	`# write the batch when the batch size is reached`
`105`		`- parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"}) # type: ignore`
	`105`	`+ parquet_writer.insert_node({"type": "edge", "from": "i-123", "to": "v-123"})`
`106`	`106`	`assert len(parquet_writer.batches["link_some_instance_some_volume"].rows) == 0`
`107`	`107`
`108`	`108`	`# flush the batches and close the writer`