|
| 1 | +DataStax Classic Graph Queries |
| 2 | +============================== |
| 3 | + |
| 4 | +Getting Started |
| 5 | +~~~~~~~~~~~~~~~ |
| 6 | + |
| 7 | +First, we need to create a graph in the system. To access the system API, we |
| 8 | +use the system execution profile :: |
| 9 | + |
| 10 | + from cassandra.cluster import Cluster, EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT |
| 11 | + |
| 12 | + cluster = Cluster() |
| 13 | + session = cluster.connect() |
| 14 | + |
| 15 | + graph_name = 'movies' |
| 16 | + session.execute_graph("system.graph(name).ifNotExists().engine(Classic).create()", {'name': graph_name}, |
| 17 | + execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) |
| 18 | + |
| 19 | + |
| 20 | +To execute requests on our newly created graph, we need to setup an execution |
| 21 | +profile. Additionally, we also need to set the schema_mode to `development` |
| 22 | +for the schema creation:: |
| 23 | + |
| 24 | + |
| 25 | + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT |
| 26 | + from cassandra.graph import GraphOptions |
| 27 | + |
| 28 | + graph_name = 'movies' |
| 29 | + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name)) |
| 30 | + |
| 31 | + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) |
| 32 | + session = cluster.connect() |
| 33 | + |
| 34 | + session.execute_graph("schema.config().option('graph.schema_mode').set('development')") |
| 35 | + |
| 36 | + |
| 37 | +We are ready to configure our graph schema. We will create a simple one for movies:: |
| 38 | + |
| 39 | + # properties are used to define a vertex |
| 40 | + properties = """ |
| 41 | + schema.propertyKey("genreId").Text().create(); |
| 42 | + schema.propertyKey("personId").Text().create(); |
| 43 | + schema.propertyKey("movieId").Text().create(); |
| 44 | + schema.propertyKey("name").Text().create(); |
| 45 | + schema.propertyKey("title").Text().create(); |
| 46 | + schema.propertyKey("year").Int().create(); |
| 47 | + schema.propertyKey("country").Text().create(); |
| 48 | + """ |
| 49 | + |
| 50 | + session.execute_graph(properties) # we can execute multiple statements in a single request |
| 51 | + |
| 52 | + # A Vertex represents a "thing" in the world. |
| 53 | + vertices = """ |
| 54 | + schema.vertexLabel("genre").properties("genreId","name").create(); |
| 55 | + schema.vertexLabel("person").properties("personId","name").create(); |
| 56 | + schema.vertexLabel("movie").properties("movieId","title","year","country").create(); |
| 57 | + """ |
| 58 | + |
| 59 | + session.execute_graph(vertices) |
| 60 | + |
| 61 | + # An edge represents a relationship between two vertices |
| 62 | + edges = """ |
| 63 | + schema.edgeLabel("belongsTo").single().connection("movie","genre").create(); |
| 64 | + schema.edgeLabel("actor").connection("movie","person").create(); |
| 65 | + """ |
| 66 | + |
| 67 | + session.execute_graph(edges) |
| 68 | + |
| 69 | + # Indexes to execute graph requests efficiently |
| 70 | + indexes = """ |
| 71 | + schema.vertexLabel("genre").index("genresById").materialized().by("genreId").add(); |
| 72 | + schema.vertexLabel("genre").index("genresByName").materialized().by("name").add(); |
| 73 | + schema.vertexLabel("person").index("personsById").materialized().by("personId").add(); |
| 74 | + schema.vertexLabel("person").index("personsByName").materialized().by("name").add(); |
| 75 | + schema.vertexLabel("movie").index("moviesById").materialized().by("movieId").add(); |
| 76 | + schema.vertexLabel("movie").index("moviesByTitle").materialized().by("title").add(); |
| 77 | + schema.vertexLabel("movie").index("moviesByYear").secondary().by("year").add(); |
| 78 | + """ |
| 79 | + |
| 80 | +Next, we'll add some data:: |
| 81 | + |
| 82 | + session.execute_graph(""" |
| 83 | + g.addV('genre').property('genreId', 1).property('name', 'Action').next(); |
| 84 | + g.addV('genre').property('genreId', 2).property('name', 'Drama').next(); |
| 85 | + g.addV('genre').property('genreId', 3).property('name', 'Comedy').next(); |
| 86 | + g.addV('genre').property('genreId', 4).property('name', 'Horror').next(); |
| 87 | + """) |
| 88 | + |
| 89 | + session.execute_graph(""" |
| 90 | + g.addV('person').property('personId', 1).property('name', 'Mark Wahlberg').next(); |
| 91 | + g.addV('person').property('personId', 2).property('name', 'Leonardo DiCaprio').next(); |
| 92 | + g.addV('person').property('personId', 3).property('name', 'Iggy Pop').next(); |
| 93 | + """) |
| 94 | + |
| 95 | + session.execute_graph(""" |
| 96 | + g.addV('movie').property('movieId', 1).property('title', 'The Happening'). |
| 97 | + property('year', 2008).property('country', 'United States').next(); |
| 98 | + g.addV('movie').property('movieId', 2).property('title', 'The Italian Job'). |
| 99 | + property('year', 2003).property('country', 'United States').next(); |
| 100 | + |
| 101 | + g.addV('movie').property('movieId', 3).property('title', 'Revolutionary Road'). |
| 102 | + property('year', 2008).property('country', 'United States').next(); |
| 103 | + g.addV('movie').property('movieId', 4).property('title', 'The Man in the Iron Mask'). |
| 104 | + property('year', 1998).property('country', 'United States').next(); |
| 105 | + |
| 106 | + g.addV('movie').property('movieId', 5).property('title', 'Dead Man'). |
| 107 | + property('year', 1995).property('country', 'United States').next(); |
| 108 | + """) |
| 109 | + |
| 110 | +Now that our genre, actor and movie vertices are added, we'll create the relationships (edges) between them:: |
| 111 | + |
| 112 | + session.execute_graph(""" |
| 113 | + genre_horror = g.V().hasLabel('genre').has('name', 'Horror').next(); |
| 114 | + genre_drama = g.V().hasLabel('genre').has('name', 'Drama').next(); |
| 115 | + genre_action = g.V().hasLabel('genre').has('name', 'Action').next(); |
| 116 | + |
| 117 | + leo = g.V().hasLabel('person').has('name', 'Leonardo DiCaprio').next(); |
| 118 | + mark = g.V().hasLabel('person').has('name', 'Mark Wahlberg').next(); |
| 119 | + iggy = g.V().hasLabel('person').has('name', 'Iggy Pop').next(); |
| 120 | + |
| 121 | + the_happening = g.V().hasLabel('movie').has('title', 'The Happening').next(); |
| 122 | + the_italian_job = g.V().hasLabel('movie').has('title', 'The Italian Job').next(); |
| 123 | + rev_road = g.V().hasLabel('movie').has('title', 'Revolutionary Road').next(); |
| 124 | + man_mask = g.V().hasLabel('movie').has('title', 'The Man in the Iron Mask').next(); |
| 125 | + dead_man = g.V().hasLabel('movie').has('title', 'Dead Man').next(); |
| 126 | + |
| 127 | + the_happening.addEdge('belongsTo', genre_horror); |
| 128 | + the_italian_job.addEdge('belongsTo', genre_action); |
| 129 | + rev_road.addEdge('belongsTo', genre_drama); |
| 130 | + man_mask.addEdge('belongsTo', genre_drama); |
| 131 | + man_mask.addEdge('belongsTo', genre_action); |
| 132 | + dead_man.addEdge('belongsTo', genre_drama); |
| 133 | + |
| 134 | + the_happening.addEdge('actor', mark); |
| 135 | + the_italian_job.addEdge('actor', mark); |
| 136 | + rev_road.addEdge('actor', leo); |
| 137 | + man_mask.addEdge('actor', leo); |
| 138 | + dead_man.addEdge('actor', iggy); |
| 139 | + """) |
| 140 | + |
| 141 | +We are all set. You can now query your graph. Here are some examples:: |
| 142 | + |
| 143 | + # Find all movies of the genre Drama |
| 144 | + for r in session.execute_graph(""" |
| 145 | + g.V().has('genre', 'name', 'Drama').in('belongsTo').valueMap();"""): |
| 146 | + print(r) |
| 147 | + |
| 148 | + # Find all movies of the same genre than the movie 'Dead Man' |
| 149 | + for r in session.execute_graph(""" |
| 150 | + g.V().has('movie', 'title', 'Dead Man').out('belongsTo').in('belongsTo').valueMap();"""): |
| 151 | + print(r) |
| 152 | + |
| 153 | + # Find all movies of Mark Wahlberg |
| 154 | + for r in session.execute_graph(""" |
| 155 | + g.V().has('person', 'name', 'Mark Wahlberg').in('actor').valueMap();"""): |
| 156 | + print(r) |
| 157 | + |
| 158 | +To see a more graph examples, see `DataStax Graph Examples <https://github.com/datastax/graph-examples/>`_. |
| 159 | + |
| 160 | +Graph Types |
| 161 | +~~~~~~~~~~~ |
| 162 | + |
| 163 | +Here are the supported graph types with their python representations: |
| 164 | + |
| 165 | +========== ================ |
| 166 | +DSE Graph Python |
| 167 | +========== ================ |
| 168 | +boolean bool |
| 169 | +bigint long, int (PY3) |
| 170 | +int int |
| 171 | +smallint int |
| 172 | +varint int |
| 173 | +float float |
| 174 | +double double |
| 175 | +uuid uuid.UUID |
| 176 | +Decimal Decimal |
| 177 | +inet str |
| 178 | +timestamp datetime.datetime |
| 179 | +date datetime.date |
| 180 | +time datetime.time |
| 181 | +duration datetime.timedelta |
| 182 | +point Point |
| 183 | +linestring LineString |
| 184 | +polygon Polygon |
| 185 | +blob bytearray, buffer (PY2), memoryview (PY3), bytes (PY3) |
| 186 | +========== ================ |
| 187 | + |
| 188 | +Graph Row Factory |
| 189 | +~~~~~~~~~~~~~~~~~ |
| 190 | + |
| 191 | +By default (with :class:`.GraphExecutionProfile.row_factory` set to :func:`.graph.graph_object_row_factory`), known graph result |
| 192 | +types are unpacked and returned as specialized types (:class:`.Vertex`, :class:`.Edge`). If the result is not one of these |
| 193 | +types, a :class:`.graph.Result` is returned, containing the graph result parsed from JSON and removed from its outer dict. |
| 194 | +The class has some accessor convenience methods for accessing top-level properties by name (`type`, `properties` above), |
| 195 | +or lists by index:: |
| 196 | + |
| 197 | + # dicts with `__getattr__` or `__getitem__` |
| 198 | + result = session.execute_graph("[[key_str: 'value', key_int: 3]]", execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] # Using system exec just because there is no graph defined |
| 199 | + result # dse.graph.Result({u'key_str': u'value', u'key_int': 3}) |
| 200 | + result.value # {u'key_int': 3, u'key_str': u'value'} (dict) |
| 201 | + result.key_str # u'value' |
| 202 | + result.key_int # 3 |
| 203 | + result['key_str'] # u'value' |
| 204 | + result['key_int'] # 3 |
| 205 | + |
| 206 | + # lists with `__getitem__` |
| 207 | + result = session.execute_graph('[[0, 1, 2]]', execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)[0] |
| 208 | + result # dse.graph.Result([0, 1, 2]) |
| 209 | + result.value # [0, 1, 2] (list) |
| 210 | + result[1] # 1 (list[1]) |
| 211 | + |
| 212 | +You can use a different row factory by setting :attr:`.Session.default_graph_row_factory` or passing it to |
| 213 | +:meth:`.Session.execute_graph`. For example, :func:`.graph.single_object_row_factory` returns the JSON result string`, |
| 214 | +unparsed. :func:`.graph.graph_result_row_factory` returns parsed, but unmodified results (such that all metadata is retained, |
| 215 | +unlike :func:`.graph.graph_object_row_factory`, which sheds some as attributes and properties are unpacked). These results |
| 216 | +also provide convenience methods for converting to known types (:meth:`~.Result.as_vertex`, :meth:`~.Result.as_edge`, :meth:`~.Result.as_path`). |
| 217 | + |
| 218 | +Vertex and Edge properties are never unpacked since their types are unknown. If you know your graph schema and want to |
| 219 | +deserialize properties, use the :class:`.GraphSON1Deserializer`. It provides convenient methods to deserialize by types (e.g. |
| 220 | +deserialize_date, deserialize_uuid, deserialize_polygon etc.) Example:: |
| 221 | + |
| 222 | + # ... |
| 223 | + from cassandra.graph import GraphSON1Deserializer |
| 224 | + |
| 225 | + row = session.execute_graph("g.V().toList()")[0] |
| 226 | + value = row.properties['my_property_key'][0].value # accessing the VertexProperty value |
| 227 | + value = GraphSON1Deserializer.deserialize_timestamp(value) |
| 228 | + |
| 229 | + print(value) # 2017-06-26 08:27:05 |
| 230 | + print(type(value)) # <type 'datetime.datetime'> |
| 231 | + |
| 232 | + |
| 233 | +Named Parameters |
| 234 | +~~~~~~~~~~~~~~~~ |
| 235 | + |
| 236 | +Named parameters are passed in a dict to :meth:`.cluster.Session.execute_graph`:: |
| 237 | + |
| 238 | + result_set = session.execute_graph('[a, b]', {'a': 1, 'b': 2}, execution_profile=EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT) |
| 239 | + [r.value for r in result_set] # [1, 2] |
| 240 | + |
| 241 | +All python types listed in `Graph Types`_ can be passed as named parameters and will be serialized |
| 242 | +automatically to their graph representation: |
| 243 | + |
| 244 | +Example:: |
| 245 | + |
| 246 | + session.execute_graph(""" |
| 247 | + g.addV('person'). |
| 248 | + property('name', text_value). |
| 249 | + property('age', integer_value). |
| 250 | + property('birthday', timestamp_value). |
| 251 | + property('house_yard', polygon_value).toList() |
| 252 | + """, { |
| 253 | + 'text_value': 'Mike Smith', |
| 254 | + 'integer_value': 34, |
| 255 | + 'timestamp_value': datetime.datetime(1967, 12, 30), |
| 256 | + 'polygon_value': Polygon(((30, 10), (40, 40), (20, 40), (10, 20), (30, 10))) |
| 257 | + }) |
| 258 | + |
| 259 | + |
| 260 | +As with all Execution Profile parameters, graph options can be set in the cluster default (as shown in the first example) |
| 261 | +or specified per execution:: |
| 262 | + |
| 263 | + ep = session.execution_profile_clone_update(EXEC_PROFILE_GRAPH_DEFAULT, |
| 264 | + graph_options=GraphOptions(graph_name='something-else')) |
| 265 | + session.execute_graph(statement, execution_profile=ep) |
| 266 | + |
| 267 | +Using GraphSON2 Protocol |
| 268 | +~~~~~~~~~~~~~~~~~~~~~~~~ |
| 269 | + |
| 270 | +The default graph protocol used is GraphSON1. However GraphSON1 may |
| 271 | +cause problems of type conversion happening during the serialization |
| 272 | +of the query to the DSE Graph server, or the deserialization of the |
| 273 | +responses back from a string Gremlin query. GraphSON2 offers better |
| 274 | +support for the complex data types handled by DSE Graph. |
| 275 | + |
| 276 | +DSE >=5.0.4 now offers the possibility to use the GraphSON2 protocol |
| 277 | +for graph queries. Enabling GraphSON2 can be done by `changing the |
| 278 | +graph protocol of the execution profile` and `setting the graphson2 row factory`:: |
| 279 | + |
| 280 | + from cassandra.cluster import Cluster, GraphExecutionProfile, EXEC_PROFILE_GRAPH_DEFAULT |
| 281 | + from cassandra.graph import GraphOptions, GraphProtocol, graph_graphson2_row_factory |
| 282 | + |
| 283 | + # Create a GraphSON2 execution profile |
| 284 | + ep = GraphExecutionProfile(graph_options=GraphOptions(graph_name='types', |
| 285 | + graph_protocol=GraphProtocol.GRAPHSON_2_0), |
| 286 | + row_factory=graph_graphson2_row_factory) |
| 287 | + |
| 288 | + cluster = Cluster(execution_profiles={EXEC_PROFILE_GRAPH_DEFAULT: ep}) |
| 289 | + session = cluster.connect() |
| 290 | + session.execute_graph(...) |
| 291 | + |
| 292 | +Using GraphSON2, all properties will be automatically deserialized to |
| 293 | +its Python representation. Note that it may bring significant |
| 294 | +behavioral change at runtime. |
| 295 | + |
| 296 | +It is generally recommended to switch to GraphSON2 as it brings more |
| 297 | +consistent support for complex data types in the Graph driver and will |
| 298 | +be activated by default in the next major version (Python dse-driver |
| 299 | +driver 3.0). |
0 commit comments