test: integration: dataflow: Diagram and Merge

John Andersen · pdxjohnny · commit 2d4ff4002bb0 · 2019-12-06T16:04:43.000-08:00
Signed-off-by: John Andersen &lt;john.s.andersen@intel.com&gt;
diff --git a/.ci/run.sh b/.ci/run.sh
@@ -58,8 +58,16 @@ function run_plugin() {
     "${PYTHON}" -m dffml service dev install
     ./scripts/docs.sh
 
+    # Log skipped tests to file
+    check_skips="$(mktemp)"
+    TEMP_DIRS+=("${check_skips}")
+
     # Run with coverage
-    "${PYTHON}" -m coverage run setup.py test
+    "${PYTHON}" -m coverage run setup.py test 2>&1 | tee "${check_skips}"
+    "${PYTHON}" -m coverage report -m
+
+    # Fail if any tests were skipped
+    grep -v -q -E '(skipped=.*)' "${check_skips}"
   fi
 }
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - MySQL packaging issue.
 ### Removed
 - CLI command `operations` removed in favor of `dataflow run`
+- Duplicate dataflow diagram code from development service
 
 ## [0.3.0] - 2019-10-26
 ### Added
diff --git a/dffml/service/dev.py b/dffml/service/dev.py
@@ -247,141 +247,6 @@ class Entrypoints(CMD):
     _list = ListEntrypoints
 
 
-class Diagram(CMD):
-
-    arg_stages = Arg(
-        "-stages",
-        help="Which stages to display: (processing, cleanup, output)",
-        nargs="+",
-        default=[],
-        required=False,
-    )
-    arg_simple = Arg(
-        "-simple",
-        help="Don't display input and output names",
-        default=False,
-        action="store_true",
-        required=False,
-    )
-    arg_display = Arg(
-        "-display",
-        help="How to display (TD: top down, LR, RL, BT)",
-        default="TD",
-        required=False,
-    )
-    arg_dataflow = Arg("dataflow", help="File containing exported DataFlow")
-    arg_config = Arg(
-        "-config",
-        help="ConfigLoader to use for importing",
-        type=BaseConfigLoader.load,
-        default=None,
-    )
-
-    async def run(self):
-        dataflow_path = Path(self.dataflow)
-        config_cls = self.config
-        if config_cls is None:
-            config_type = dataflow_path.suffix.replace(".", "")
-            config_cls = BaseConfigLoader.load(config_type)
-        async with config_cls.withconfig(self.extra_config) as configloader:
-            async with configloader() as loader:
-                exported = await loader.loadb(dataflow_path.read_bytes())
-                dataflow = DataFlow._fromdict(**exported)
-        print(f"graph {self.display}")
-        for stage in Stage:
-            # Skip stage if not wanted
-            if self.stages and stage.value not in self.stages:
-                continue
-            stage_node = hashlib.md5(
-                ("stage." + stage.value).encode()
-            ).hexdigest()
-            if len(self.stages) != 1:
-                print(f"subgraph {stage_node}[{stage.value.title()} Stage]")
-                print(f"style {stage_node} fill:#afd388b5,stroke:#a4ca7a")
-            for instance_name, operation in dataflow.operations.items():
-                if operation.stage != stage:
-                    continue
-                subgraph_node = hashlib.md5(
-                    ("subgraph." + instance_name).encode()
-                ).hexdigest()
-                node = hashlib.md5(instance_name.encode()).hexdigest()
-                if not self.simple:
-                    print(f"subgraph {subgraph_node}[{instance_name}]")
-                    print(f"style {subgraph_node} fill:#fff4de,stroke:#cece71")
-                print(f"{node}[{operation.instance_name}]")
-                for input_name in operation.inputs.keys():
-                    input_node = hashlib.md5(
-                        ("input." + instance_name + "." + input_name).encode()
-                    ).hexdigest()
-                    if not self.simple:
-                        print(f"{input_node}({input_name})")
-                        print(f"{input_node} --> {node}")
-                for output_name in operation.outputs.keys():
-                    output_node = hashlib.md5(
-                        (
-                            "output." + instance_name + "." + output_name
-                        ).encode()
-                    ).hexdigest()
-                    if not self.simple:
-                        print(f"{output_node}({output_name})")
-                        print(f"{node} --> {output_node}")
-                if not self.simple:
-                    print(f"end")
-            if len(self.stages) != 1:
-                print(f"end")
-        if len(self.stages) != 1:
-            print(f"subgraph inputs[Inputs]")
-            print(f"style inputs fill:#f6dbf9,stroke:#a178ca")
-        for instance_name, input_flow in dataflow.flow.items():
-            operation = dataflow.operations[instance_name]
-            if self.stages and not operation.stage.value in self.stages:
-                continue
-            node = hashlib.md5(instance_name.encode()).hexdigest()
-            for input_name, sources in input_flow.inputs.items():
-                for source in sources:
-                    # TODO Put various sources in their own "Inputs" subgraphs
-                    if isinstance(source, str):
-                        input_definition = operation.inputs[input_name]
-                        seed_input_node = hashlib.md5(
-                            (source + "." + input_definition.name).encode()
-                        ).hexdigest()
-                        print(f"{seed_input_node}({input_definition.name})")
-                        if len(self.stages) == 1:
-                            print(
-                                f"style {seed_input_node} fill:#f6dbf9,stroke:#a178ca"
-                            )
-                        if not self.simple:
-                            input_node = hashlib.md5(
-                                (
-                                    "input." + instance_name + "." + input_name
-                                ).encode()
-                            ).hexdigest()
-                            print(f"{seed_input_node} --> {input_node}")
-                        else:
-                            print(f"{seed_input_node} --> {node}")
-                    else:
-                        if not self.simple:
-                            source_output_node = hashlib.md5(
-                                (
-                                    "output."
-                                    + ".".join(list(source.items())[0])
-                                ).encode()
-                            ).hexdigest()
-                            input_node = hashlib.md5(
-                                (
-                                    "input." + instance_name + "." + input_name
-                                ).encode()
-                            ).hexdigest()
-                            print(f"{source_output_node} --> {input_node}")
-                        else:
-                            source_operation_node = hashlib.md5(
-                                list(source.keys())[0].encode()
-                            ).hexdigest()
-                            print(f"{source_operation_node} --> {node}")
-        if len(self.stages) != 1:
-            print(f"end")
-
-
 class Export(CMD):
 
     arg_config = Arg(
@@ -470,7 +335,6 @@ class Develop(CMD):
     create = Create
     skel = Skeleton
     run = Run
-    diagram = Diagram
     export = Export
     entrypoints = Entrypoints
     install = Install
diff --git a/docs/tutorials/operations.rst b/docs/tutorials/operations.rst
@@ -490,7 +490,7 @@ are connected.
 
 .. code-block:: console
 
-    $ dffml service dev diagram -simple shouldi/deploy/df/shouldi.json
+    $ dffml dataflow diagram -simple shouldi/deploy/df/shouldi.json
     graph TD
     subgraph a759a07029077edc5c37fea0326fa281[Processing Stage]
     style a759a07029077edc5c37fea0326fa281 fill:#afd388b5,stroke:#a4ca7a
diff --git a/tests/test_integration_cli.py b/tests/test_integration_cli.py
@@ -2,20 +2,59 @@
 This file contains integration tests. We use the CLI to exercise functionality of
 various DFFML classes and constructs.
 """
+import re
+import os
 import io
+import json
 import inspect
 import pathlib
+import asyncio
 import contextlib
+import unittest.mock
 
+from dffml.df.types import Operation, DataFlow
 from dffml.cli.cli import CLI
+from dffml.service.dev import Develop
+from dffml.util.packaging import is_develop
+from dffml.util.entrypoint import load
 from dffml.util.asynctestcase import AsyncTestCase
 
 from .test_cli import non_existant_tempfile
 
 
+def relative_path(*args):
+    """
+    Returns a pathlib.Path object with the path relative to this file.
+    """
+    target = pathlib.Path(__file__).parents[0] / args[0]
+    for path in list(args)[1:]:
+        target /= path
+    return target
+
+
+@contextlib.contextmanager
+def relative_chdir(*args):
+    """
+    Change directory to a location relative to the location of this file.
+    """
+    target = relative_path(*args)
+    orig_dir = os.getcwd()
+    try:
+        os.chdir(target)
+        yield target
+    finally:
+        os.chdir(orig_dir)
+
+
 class IntegrationCLITestCase(AsyncTestCase):
+    REQUIRED_PLUGINS = []
+
     async def setUp(self):
         super().setUp()
+        if not all(map(is_develop, self.REQUIRED_PLUGINS)):
+            self.skipTest(
+                f"Required plugins: {', '.join(self.REQUIRED_PLUGINS)} must be installed in development mode"
+            )
         self.stdout = io.StringIO()
         self._stack = contextlib.ExitStack().__enter__()
 
@@ -94,3 +133,119 @@ async def test_memory_to_csv(self):
             )
             + "\n",
         )
+
+
+class TestDevelop(IntegrationCLITestCase):
+
+    REQUIRED_PLUGINS = ["shouldi"]
+
+    async def test_export(self):
+        stdout = io.StringIO()
+        # Use shouldi's dataflow for tests
+        with relative_chdir("..", "examples", "shouldi"):
+            with unittest.mock.patch("sys.stdout.buffer.write") as write:
+                await Develop.cli("export", "shouldi.cli:DATAFLOW")
+            DataFlow._fromdict(**json.loads(write.call_args[0][0]))
+
+
+class TestDataFlow(IntegrationCLITestCase):
+
+    REQUIRED_PLUGINS = ["shouldi", "dffml-config-yaml", "dffml-feature-git"]
+
+    async def setUp(self):
+        await super().setUp()
+        # Use shouldi's dataflow for tests
+        self.DATAFLOW = list(
+            load(
+                "shouldi.cli:DATAFLOW",
+                relative=relative_path("..", "examples", "shouldi"),
+            )
+        )[0]
+
+    async def test_diagram_default(self):
+        filename = self.mktempfile() + ".json"
+        pathlib.Path(filename).write_text(json.dumps(self.DATAFLOW.export()))
+        with contextlib.redirect_stdout(self.stdout):
+            await CLI.cli(
+                "dataflow", "diagram", filename,
+            )
+        stdout = self.stdout.getvalue()
+        # Check that a subgraph is being made for each operation
+        self.assertTrue(re.findall(r"subgraph.*run_bandit", stdout))
+        # Check that all stages are included
+        for check in ["Processing", "Output", "Cleanup"]:
+            self.assertIn(f"{check} Stage", stdout)
+
+    async def test_diagram_simple(self):
+        filename = self.mktempfile() + ".json"
+        pathlib.Path(filename).write_text(json.dumps(self.DATAFLOW.export()))
+        with contextlib.redirect_stdout(self.stdout):
+            await CLI.cli(
+                "dataflow", "diagram", "-simple", filename,
+            )
+        # Check that a subgraph is not being made for each operation
+        self.assertFalse(
+            re.findall(r"subgraph.*run_bandit", self.stdout.getvalue())
+        )
+
+    async def test_diagram_single_stage(self):
+        filename = self.mktempfile() + ".json"
+        pathlib.Path(filename).write_text(json.dumps(self.DATAFLOW.export()))
+        with contextlib.redirect_stdout(self.stdout):
+            await CLI.cli(
+                "dataflow", "diagram", "-stages", "processing", "--", filename,
+            )
+        stdout = self.stdout.getvalue()
+        # Check that the single stage is not its own subgraph
+        for check in ["Processing", "Output", "Cleanup"]:
+            self.assertNotIn(f"{check} Stage", stdout)
+
+    async def test_diagram_multi_stage(self):
+        filename = self.mktempfile() + ".json"
+        pathlib.Path(filename).write_text(json.dumps(self.DATAFLOW.export()))
+        with contextlib.redirect_stdout(self.stdout):
+            await CLI.cli(
+                "dataflow",
+                "diagram",
+                "-stages",
+                "processing",
+                "output",
+                "--",
+                filename,
+            )
+        stdout = self.stdout.getvalue()
+        # Check that the single stage is not its own subgraph
+        for check in ["Processing", "Output"]:
+            self.assertIn(f"{check} Stage", stdout)
+        for check in ["Cleanup"]:
+            self.assertNotIn(f"{check} Stage", stdout)
+
+    async def test_merge(self):
+        # Write out shouldi dataflow
+        orig = self.mktempfile() + ".json"
+        pathlib.Path(orig).write_text(json.dumps(self.DATAFLOW.export()))
+        # Import from feature/git
+        transform_to_repo = Operation.load("dffml.mapping.create")
+        lines_of_code_by_language, lines_of_code_to_comments = list(
+            load(
+                "dffml_feature_git.feature.operations:lines_of_code_by_language",
+                "dffml_feature_git.feature.operations:lines_of_code_to_comments",
+                relative=relative_path("..", "feature", "git"),
+            )
+        )
+        # Create new dataflow
+        override = DataFlow.auto(
+            transform_to_repo,
+            lines_of_code_by_language,
+            lines_of_code_to_comments,
+        )
+        # TODO Modify and compare against yaml in docs example
+        # Write out override dataflow
+        created = self.mktempfile() + ".json"
+        pathlib.Path(created).write_text(json.dumps(override.export()))
+        # Merge the two
+        with contextlib.redirect_stdout(self.stdout):
+            await CLI.cli(
+                "dataflow", "merge", orig, created,
+            )
+        DataFlow._fromdict(**json.loads(self.stdout.getvalue()))