diff --git a/USING_NEO4J.md b/USING_NEO4J.md index 0bbedcb5..42c7564d 100644 --- a/USING_NEO4J.md +++ b/USING_NEO4J.md @@ -1,11 +1,11 @@ # Experimenting with the Neo4j graph database Python STIX DataStore -The Neo4j graph database Python STIX DataStore is a proof-of-concept implementation to show how to store STIX content in a graph database. +The Neo4j graph database Python STIX DataStore is a proof-of-concept implementation to show how to store STIX content in a graph database. ## Limitations: -As a proof-of-concept it has minimal functionality. - +As a proof-of-concept it has minimal functionality. + ## Installing Neo4j See https://neo4j.com/docs/desktop-manual/current/installation @@ -18,18 +18,18 @@ The python neo4j library used is py2neo, available in pypi at https://pypi.org/p ## Implementation Details -We would like to that the folks at JHU/APL for their implementation of [STIX2NEO4J.py](https://github.com/opencybersecurityalliance/oca-iob/tree/main/STIX2NEO4J%20Converter), which this code is based on. +We would like to that the folks at JHU/APL for their implementation of [STIX2NEO4J.py](https://github.com/opencybersecurityalliance/oca-iob/tree/main/STIX2NEO4J%20Converter), which this code is based on. Only the DataSink (for storing STIX data) part of the DataStore object has been implemented. The DataSource part is implemented as a stub. However, the graph database can be queried using the neo4j cypher langauge within the neo4j browser. -The main concept behind any graphs is nodes and edges. STIX data is similar as it contains relationship objects (SROs) and node objects (SDOs, SCOs and SMOs). Additional edges are provided by STIX embedded relationships, which are expressed as properties in STIX node objects. This organization of data in STIX is a natural fit for graph models, such as neo4j. +The main concept behind any graphs is nodes and edges. STIX data is similar as it contains relationship objects (SROs) and node objects (SDOs, SCOs and SMOs). Additional edges are provided by STIX embedded relationships, which are expressed as properties in STIX node objects. This organization of data in STIX is a natural fit for graph models, such as neo4j. -The order in which STIX objects are added to the graph database is arbitrary. Therefore, when an SRO or embedded relationship is added via the DataStore, the nodes that it connects may not be present in the database, so the relationship is not added to the database, but remembered by the DataStore code as an unconnected relationship. Whenever a new node is -added to the database, the unconnected relationships must be reviewed to determine if both nodes of a relationship can now be represented using an edge in the graph database. +The order in which STIX objects are added to the graph database is arbitrary. Therefore, when an SRO or embedded relationship is added via the DataStore, the nodes that it connects may not be present in the database, so the relationship is not added to the database, but remembered by the DataStore code as an unconnected relationship. Whenever a new node is +added to the database, the unconnected relationships must be reviewed to determine if both nodes of a relationship can now be represented using an edge in the graph database. -Note that unless both the source and target nodes are eventually added, -the relationship will not be added either. +Note that unless both the source and target nodes are eventually added, +the relationship will not be added either. How to address this issue in the implementation has not been determined. ## Demonstrating a neo4j database for STIX diff --git a/stix2/datastore/neo4j/STIX2NEO4J.py b/stix2/datastore/neo4j/STIX2NEO4J.py.doc similarity index 94% rename from stix2/datastore/neo4j/STIX2NEO4J.py rename to stix2/datastore/neo4j/STIX2NEO4J.py.doc index b6111e3d..2073eff3 100644 --- a/stix2/datastore/neo4j/STIX2NEO4J.py +++ b/stix2/datastore/neo4j/STIX2NEO4J.py.doc @@ -3,11 +3,11 @@ # Code developed by JHU/APL - First Draft December 2021 # DISCLAIMER -# The script developed by JHU/APL for the demonstration are not “turn key” and are +# The script developed by JHU/APL for the demonstration are not “turn key” and are # not safe for deployment without being tailored to production infrastructure. These # files are not being delivered as software and are not appropriate for direct use on any # production networks. JHU/APL assumes no liability for the direct use of these files and -# they are provided strictly as a reference implementation. +# they are provided strictly as a reference implementation. # # NO WARRANTY, NO LIABILITY. THIS MATERIAL IS PROVIDED “AS IS.” JHU/APL MAKES NO # REPRESENTATION OR WARRANTY WITH RESPECT TO THE PERFORMANCE OF THE MATERIALS, INCLUDING @@ -20,11 +20,12 @@ # CONSEQUENTIAL, SPECIAL OR OTHER DAMAGES ARISING FROM THE USE OF, OR INABILITY TO USE, # THE MATERIAL, INCLUDING, BUT NOT LIMITED TO, ANY DAMAGES FOR LOST PROFITS. +from getpass import getpass ## Import python modules for this script import json from typing import List + from py2neo import Graph, Node -from getpass import getpass from tqdm import tqdm #Import variables @@ -44,10 +45,12 @@ def __init__(self): self.nodes_with_object_ref = list() self.nodes = list() self.bundlename = BundleName - self.infer_relation = {"parent_ref": "parent_of", + self.infer_relation = { + "parent_ref": "parent_of", "created_by_ref": "created_by", "src_ref": "source_of", - "dst_ref": "destination_of"} + "dst_ref": "destination_of", + } self.__load_json(JSONFILE) def __load_json(self, fd): @@ -85,16 +88,18 @@ def make_nodes(self): node_contents[key] = apobj[key] # Make the Bundle ID a property # use dictionary expansion as keywork for optional node properties - node = Node(apobj["type"], - name=node_name, - bundlesource=self.bundlename, - **node_contents) + node = Node( + apobj["type"], + name=node_name, + bundlesource=self.bundlename, + **node_contents, + ) # if node needs new created_by relation, create the node and then the relationship self.sgraph.create(node) # save off these nodes for additional relationship creating if 'object_refs' in keys: self.nodes_with_object_ref.append(apobj) - + # create relationships that exist outside of relationship objects # such as Created_by and Parent_Of def __make_inferred_relations(self): @@ -112,7 +117,7 @@ def __make_inferred_relations(self): else: ref_list = apobj[k] for ref in ref_list: - # The "b to a" relationship is reversed in this cypher query to ensure the correct relationship direction in the graph + # The "b to a" relationship is reversed in this cypher query to ensure the correct relationship direction in the graph cypher_string = f'MATCH (a),(b) WHERE a.bundlesource="{self.bundlename}" AND b.bundlesource="{self.bundlename}" AND a.ap_id="{str(ref)}" AND b.ap_id="{str(apobj["id"])}" CREATE (b)-[r:{rel_type}]->(a) RETURN a,b' try: self.sgraph.run(cypher_string) diff --git a/stix2/datastore/neo4j/demo.py b/stix2/datastore/neo4j/demo.py index 047bfcf9..2beb2b10 100644 --- a/stix2/datastore/neo4j/demo.py +++ b/stix2/datastore/neo4j/demo.py @@ -1,16 +1,17 @@ -import sys import json +import sys + +from identity_contact_information import \ + identity_contact_information # noqa F401 +# needed so the relational db code knows to create tables for this +from incident import event, impact, incident, task # noqa F401 +from observed_string import observed_string # noqa F401 import stix2 from stix2.datastore.neo4j.neo4j import Neo4jStore import stix2.properties -# needed so the relational db code knows to create tables for this -from incident import incident, event, task, impact -from identity_contact_information import identity_contact_information -from observed_string import observed_string - def main(): with open(sys.argv[1], "r") as f: diff --git a/stix2/datastore/neo4j/neo4j.py b/stix2/datastore/neo4j/neo4j.py index 95fecd4e..ae3fe6ac 100644 --- a/stix2/datastore/neo4j/neo4j.py +++ b/stix2/datastore/neo4j/neo4j.py @@ -1,23 +1,24 @@ -import json +import re from py2neo import Graph, Node, Relationship -import re import stix2 from stix2.base import _STIXBase -from stix2.datastore import ( - DataSink, DataSource, DataStoreMixin, -) +from stix2.datastore import DataSink, DataSource, DataStoreMixin from stix2.parsing import parse def convert_camel_case_to_snake_case(name): return re.sub(r'(?