Skip to content

Commit 8fd0119

Browse files
authored
Merge pull request scylladb#45 from riptano/ngdg_master_ft
NGDG feature team branch for master
2 parents 3a5bd85 + e1739be commit 8fd0119

39 files changed

+5617
-2069
lines changed

CHANGELOG.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,25 @@ UNRELEASED
44

55
Features
66
--------
7+
78
* Add all() function to the ResultSet API (PYTHON-1203)
9+
* Parse new schema metadata in NGDG and generate table edges CQL syntax (PYTHON-996)
10+
* Add GraphSON3 support (PYTHON-788)
11+
* Use GraphSON3 as default for Native graphs (PYTHON-1004)
12+
* Add Tuple and UDT types for native graph (PYTHON-1005)
13+
* Add Duration type for native graph (PYTHON-1000)
14+
* Add gx:ByteBuffer graphson type support for Blob field (PYTHON-1027)
15+
* Enable Paging Through DSE Driver for Gremlin Traversals (PYTHON-1045)
16+
* Provide numerical wrappers to ensure proper graphson schema definition (PYTHON-1051)
17+
* Resolve the row_factory automatically for native graphs (PYTHON-1056)
18+
* Add g:TraversalMetrics/g:Metrics graph deserializers (PYTHON-1057)
19+
* Add g:BulkSet graph deserializers (PYTHON-1060)
20+
* Update Graph Engine names and the way to create a Classic/Native Graph (PYTHON-1090)
21+
* Update Native to Core Graph Engine
22+
* Add graphson3 and native graph support (PYTHON-1039)
23+
* Enable Paging Through DSE Driver for Gremlin Traversals (PYTHON-1045)
24+
* Expose filter predicates for cql collections (PYTHON-1019)
25+
* Add g:TraversalMetrics/Metrics deserializers (PYTHON-1057)
826

927
3.21.0
1028
======

build.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ cassandra:
162162
- 'dse-5.1'
163163
- 'dse-6.0'
164164
- 'dse-6.7'
165+
- 'dse-6.8'
165166

166167
env:
167168
CYTHON:
@@ -173,6 +174,7 @@ build:
173174
export JAVA_HOME=$CCM_JAVA_HOME
174175
export PATH=$JAVA_HOME/bin:$PATH
175176
export PYTHONPATH=""
177+
export CCM_MAX_HEAP_SIZE=1024M
176178
177179
# Required for unix socket tests
178180
sudo apt-get install socat

cassandra/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def emit(self, record):
2222

2323
logging.getLogger('cassandra').addHandler(NullHandler())
2424

25-
__version_info__ = (3, 21, 0)
25+
__version_info__ = (3, 21, 0, 'post0+20200127')
2626
__version__ = '.'.join(map(str, __version_info__))
2727

2828

cassandra/cluster.py

Lines changed: 115 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,16 @@
8080
from cassandra.marshal import int64_pack
8181
from cassandra.timestamps import MonotonicTimestampGenerator
8282
from cassandra.compat import Mapping
83-
from cassandra.util import _resolve_contact_points_to_string_map
83+
from cassandra.util import _resolve_contact_points_to_string_map, Version
8484

8585
from cassandra.datastax.insights.reporter import MonitorReporter
8686
from cassandra.datastax.insights.util import version_supports_insights
8787

8888
from cassandra.datastax.graph import (graph_object_row_factory, GraphOptions, GraphSON1Serializer,
89-
GraphProtocol, GraphSON2Serializer, GraphStatement, SimpleGraphStatement)
90-
from cassandra.datastax.graph.query import _request_timeout_key
89+
GraphProtocol, GraphSON2Serializer, GraphStatement, SimpleGraphStatement,
90+
graph_graphson2_row_factory, graph_graphson3_row_factory,
91+
GraphSON3Serializer)
92+
from cassandra.datastax.graph.query import _request_timeout_key, _GraphSONContextRowFactory
9193
from cassandra.datastax import cloud as dscloud
9294

9395
try:
@@ -153,6 +155,7 @@ def _is_gevent_monkey_patched():
153155
DEFAULT_MIN_CONNECTIONS_PER_REMOTE_HOST = 1
154156
DEFAULT_MAX_CONNECTIONS_PER_REMOTE_HOST = 2
155157

158+
_GRAPH_PAGING_MIN_DSE_VERSION = Version('6.8.0')
156159

157160
_NOT_SET = object()
158161

@@ -416,20 +419,21 @@ class GraphExecutionProfile(ExecutionProfile):
416419

417420
def __init__(self, load_balancing_policy=_NOT_SET, retry_policy=None,
418421
consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None,
419-
request_timeout=30.0, row_factory=graph_object_row_factory,
420-
graph_options=None):
422+
request_timeout=30.0, row_factory=None,
423+
graph_options=None, continuous_paging_options=_NOT_SET):
421424
"""
422425
Default execution profile for graph execution.
423426
424-
See :class:`.ExecutionProfile`
425-
for base attributes.
427+
See :class:`.ExecutionProfile` for base attributes. Note that if not explicitly set,
428+
the row_factory and graph_options.graph_protocol are resolved during the query execution.
426429
427430
In addition to default parameters shown in the signature, this profile also defaults ``retry_policy`` to
428431
:class:`cassandra.policies.NeverRetryPolicy`.
429432
"""
430433
retry_policy = retry_policy or NeverRetryPolicy()
431434
super(GraphExecutionProfile, self).__init__(load_balancing_policy, retry_policy, consistency_level,
432-
serial_consistency_level, request_timeout, row_factory)
435+
serial_consistency_level, request_timeout, row_factory,
436+
continuous_paging_options=continuous_paging_options)
433437
self.graph_options = graph_options or GraphOptions(graph_source=b'g',
434438
graph_language=b'gremlin-groovy')
435439

@@ -438,7 +442,7 @@ class GraphAnalyticsExecutionProfile(GraphExecutionProfile):
438442

439443
def __init__(self, load_balancing_policy=None, retry_policy=None,
440444
consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None,
441-
request_timeout=3600. * 24. * 7., row_factory=graph_object_row_factory,
445+
request_timeout=3600. * 24. * 7., row_factory=None,
442446
graph_options=None):
443447
"""
444448
Execution profile with timeout and load balancing appropriate for graph analytics queries.
@@ -2506,6 +2510,7 @@ def default_serial_consistency_level(self, cl):
25062510
_profile_manager = None
25072511
_metrics = None
25082512
_request_init_callbacks = None
2513+
_graph_paging_available = False
25092514

25102515
def __init__(self, cluster, hosts, keyspace=None):
25112516
self.cluster = cluster
@@ -2539,6 +2544,7 @@ def __init__(self, cluster, hosts, keyspace=None):
25392544
raise NoHostAvailable(msg, [h.address for h in hosts])
25402545

25412546
self.session_id = uuid.uuid4()
2547+
self._graph_paging_available = self._check_graph_paging_available()
25422548

25432549
cc_host = self.cluster.get_control_connection_host()
25442550
valid_insights_version = (cc_host and version_supports_insights(cc_host.dse_version))
@@ -2678,18 +2684,31 @@ def execute_graph_async(self, query, parameters=None, trace=False, execution_pro
26782684
if not isinstance(query, GraphStatement):
26792685
query = SimpleGraphStatement(query)
26802686

2681-
execution_profile = self._maybe_get_execution_profile(execution_profile) # look up instance here so we can apply the extended attributes
2687+
# Clone and look up instance here so we can resolve and apply the extended attributes
2688+
execution_profile = self.execution_profile_clone_update(execution_profile)
2689+
2690+
if not hasattr(execution_profile, 'graph_options'):
2691+
raise ValueError(
2692+
"Execution profile for graph queries must derive from GraphExecutionProfile, and provide graph_options")
26822693

2694+
self._resolve_execution_profile_options(execution_profile)
2695+
2696+
# make sure the graphson context row factory is binded to this cluster
26832697
try:
2684-
options = execution_profile.graph_options.copy()
2685-
except AttributeError:
2686-
raise ValueError("Execution profile for graph queries must derive from GraphExecutionProfile, and provide graph_options")
2698+
if issubclass(execution_profile.row_factory, _GraphSONContextRowFactory):
2699+
execution_profile.row_factory = execution_profile.row_factory(self.cluster)
2700+
except TypeError:
2701+
# issubclass might fail if arg1 is an instance
2702+
pass
2703+
2704+
# set graph paging if needed
2705+
self._maybe_set_graph_paging(execution_profile)
26872706

26882707
graph_parameters = None
26892708
if parameters:
2690-
graph_parameters = self._transform_params(parameters, graph_options=options)
2709+
graph_parameters = self._transform_params(parameters, graph_options=execution_profile.graph_options)
26912710

2692-
custom_payload = options.get_options_map()
2711+
custom_payload = execution_profile.graph_options.get_options_map()
26932712
if execute_as:
26942713
custom_payload[_proxy_execute_key] = six.b(execute_as)
26952714
custom_payload[_request_timeout_key] = int64_pack(long(execution_profile.request_timeout * 1000))
@@ -2700,25 +2719,99 @@ def execute_graph_async(self, query, parameters=None, trace=False, execution_pro
27002719
future.message.query_params = graph_parameters
27012720
future._protocol_handler = self.client_protocol_handler
27022721

2703-
if options.is_analytics_source and isinstance(execution_profile.load_balancing_policy, DefaultLoadBalancingPolicy):
2722+
if execution_profile.graph_options.is_analytics_source and \
2723+
isinstance(execution_profile.load_balancing_policy, DefaultLoadBalancingPolicy):
27042724
self._target_analytics_master(future)
27052725
else:
27062726
future.send_request()
27072727
return future
27082728

2729+
def _maybe_set_graph_paging(self, execution_profile):
2730+
graph_paging = execution_profile.continuous_paging_options
2731+
if execution_profile.continuous_paging_options is _NOT_SET:
2732+
graph_paging = ContinuousPagingOptions() if self._graph_paging_available else None
2733+
2734+
execution_profile.continuous_paging_options = graph_paging
2735+
2736+
def _check_graph_paging_available(self):
2737+
"""Verify if we can enable graph paging. This executed only once when the session is created."""
2738+
2739+
if not ProtocolVersion.has_continuous_paging_next_pages(self._protocol_version):
2740+
return False
2741+
2742+
for host in self.cluster.metadata.all_hosts():
2743+
if host.dse_version is None:
2744+
return False
2745+
2746+
version = Version(host.dse_version)
2747+
if version < _GRAPH_PAGING_MIN_DSE_VERSION:
2748+
return False
2749+
2750+
return True
2751+
2752+
def _resolve_execution_profile_options(self, execution_profile):
2753+
"""
2754+
Determine the GraphSON protocol and row factory for a graph query. This is useful
2755+
to configure automatically the execution profile when executing a query on a
2756+
core graph.
2757+
2758+
If `graph_protocol` is not explicitly specified, the following rules apply:
2759+
- Default to GraphProtocol.GRAPHSON_1_0, or GRAPHSON_2_0 if the `graph_language` is not gremlin-groovy.
2760+
- If `graph_options.graph_name` is specified and is a Core graph, set GraphSON_3_0.
2761+
If `row_factory` is not explicitly specified, the following rules apply:
2762+
- Default to graph_object_row_factory.
2763+
- If `graph_options.graph_name` is specified and is a Core graph, set graph_graphson3_row_factory.
2764+
"""
2765+
if execution_profile.graph_options.graph_protocol is not None and \
2766+
execution_profile.row_factory is not None:
2767+
return
2768+
2769+
graph_options = execution_profile.graph_options
2770+
2771+
is_core_graph = False
2772+
if graph_options.graph_name:
2773+
# graph_options.graph_name is bytes ...
2774+
name = graph_options.graph_name.decode('utf-8')
2775+
if name in self.cluster.metadata.keyspaces:
2776+
ks_metadata = self.cluster.metadata.keyspaces[name]
2777+
if ks_metadata.graph_engine == 'Core':
2778+
is_core_graph = True
2779+
2780+
if is_core_graph:
2781+
graph_protocol = GraphProtocol.GRAPHSON_3_0
2782+
row_factory = graph_graphson3_row_factory
2783+
else:
2784+
if graph_options.graph_language == GraphOptions.DEFAULT_GRAPH_LANGUAGE:
2785+
graph_protocol = GraphOptions.DEFAULT_GRAPH_PROTOCOL
2786+
row_factory = graph_object_row_factory
2787+
else:
2788+
# if not gremlin-groovy, GraphSON_2_0
2789+
graph_protocol = GraphProtocol.GRAPHSON_2_0
2790+
row_factory = graph_graphson2_row_factory
2791+
2792+
# Only apply if not set explicitly
2793+
if graph_options.graph_protocol is None:
2794+
graph_options.graph_protocol = graph_protocol
2795+
if execution_profile.row_factory is None:
2796+
execution_profile.row_factory = row_factory
2797+
27092798
def _transform_params(self, parameters, graph_options):
27102799
if not isinstance(parameters, dict):
27112800
raise ValueError('The parameters must be a dictionary. Unnamed parameters are not allowed.')
27122801

27132802
# Serialize python types to graphson
27142803
serializer = GraphSON1Serializer
27152804
if graph_options.graph_protocol == GraphProtocol.GRAPHSON_2_0:
2716-
serializer = GraphSON2Serializer
2717-
2718-
serialized_parameters = {
2719-
p: serializer.serialize(v)
2720-
for p, v in six.iteritems(parameters)
2721-
}
2805+
serializer = GraphSON2Serializer()
2806+
elif graph_options.graph_protocol == GraphProtocol.GRAPHSON_3_0:
2807+
# only required for core graphs
2808+
context = {
2809+
'cluster': self.cluster,
2810+
'graph_name': graph_options.graph_name.decode('utf-8') if graph_options.graph_name else None
2811+
}
2812+
serializer = GraphSON3Serializer(context)
2813+
2814+
serialized_parameters = serializer.serialize(parameters)
27222815
return [json.dumps(serialized_parameters).encode('utf-8')]
27232816

27242817
def _target_analytics_master(self, future):

cassandra/datastax/graph/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from cassandra.datastax.graph.query import (
1818
GraphOptions, GraphProtocol, GraphStatement, SimpleGraphStatement, Result,
1919
graph_object_row_factory, single_object_row_factory,
20-
graph_result_row_factory, graph_graphson2_row_factory
20+
graph_result_row_factory, graph_graphson2_row_factory,
21+
graph_graphson3_row_factory
2122
)
2223
from cassandra.datastax.graph.graphson import *

0 commit comments

Comments
 (0)