12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
import random
15
+
16
+ from collections import namedtuple
17
+ from functools import lru_cache
15
18
from itertools import islice , cycle , groupby , repeat
16
19
import logging
20
+ import os
17
21
from random import randint , shuffle
18
22
from threading import Lock
19
23
import socket
20
24
import warnings
25
+
26
+ from cryptography .hazmat .primitives import padding
27
+ from cryptography .hazmat .primitives .ciphers import Cipher , algorithms , modes
28
+
21
29
from cassandra import WriteType as WT
22
- from cassandra .connection import UnixSocketEndPoint
30
+ from cassandra .cqltypes import _cqltypes
23
31
24
32
25
33
# This is done this way because WriteType was originally
@@ -572,8 +580,9 @@ def __init__(self, hosts):
572
580
self ._allowed_hosts = tuple (hosts )
573
581
self ._allowed_hosts_resolved = []
574
582
for h in self ._allowed_hosts :
575
- if isinstance (h , UnixSocketEndPoint ):
576
- self ._allowed_hosts_resolved .append (h ._unix_socket_path )
583
+ unix_socket_path = getattr (h , "_unix_socket_path" , None )
584
+ if unix_socket_path :
585
+ self ._allowed_hosts_resolved .append (unix_socket_path )
577
586
else :
578
587
self ._allowed_hosts_resolved .extend ([endpoint [4 ][0 ]
579
588
for endpoint in socket .getaddrinfo (h , None , socket .AF_UNSPEC , socket .SOCK_STREAM )])
@@ -608,7 +617,7 @@ class HostFilterPolicy(LoadBalancingPolicy):
608
617
A :class:`.LoadBalancingPolicy` subclass configured with a child policy,
609
618
and a single-argument predicate. This policy defers to the child policy for
610
619
hosts where ``predicate(host)`` is truthy. Hosts for which
611
- ``predicate(host)`` is falsey will be considered :attr:`.IGNORED`, and will
620
+ ``predicate(host)`` is falsy will be considered :attr:`.IGNORED`, and will
612
621
not be used in a query plan.
613
622
614
623
This can be used in the cases where you need a whitelist or blacklist
@@ -644,7 +653,7 @@ def __init__(self, child_policy, predicate):
644
653
:param child_policy: an instantiated :class:`.LoadBalancingPolicy`
645
654
that this one will defer to.
646
655
:param predicate: a one-parameter function that takes a :class:`.Host`.
647
- If it returns a falsey value, the :class:`.Host` will
656
+ If it returns a falsy value, the :class:`.Host` will
648
657
be :attr:`.IGNORED` and not returned in query plans.
649
658
"""
650
659
super (HostFilterPolicy , self ).__init__ ()
@@ -680,7 +689,7 @@ def predicate(self):
680
689
def distance (self , host ):
681
690
"""
682
691
Checks if ``predicate(host)``, then returns
683
- :attr:`~HostDistance.IGNORED` if falsey , and defers to the child policy
692
+ :attr:`~HostDistance.IGNORED` if falsy , and defers to the child policy
684
693
otherwise.
685
694
"""
686
695
if self .predicate (host ):
@@ -769,7 +778,7 @@ class ReconnectionPolicy(object):
769
778
def new_schedule (self ):
770
779
"""
771
780
This should return a finite or infinite iterable of delays (each as a
772
- floating point number of seconds) inbetween each failed reconnection
781
+ floating point number of seconds) in-between each failed reconnection
773
782
attempt. Note that if the iterable is finite, reconnection attempts
774
783
will cease once the iterable is exhausted.
775
784
"""
@@ -779,12 +788,12 @@ def new_schedule(self):
779
788
class ConstantReconnectionPolicy (ReconnectionPolicy ):
780
789
"""
781
790
A :class:`.ReconnectionPolicy` subclass which sleeps for a fixed delay
782
- inbetween each reconnection attempt.
791
+ in-between each reconnection attempt.
783
792
"""
784
793
785
794
def __init__ (self , delay , max_attempts = 64 ):
786
795
"""
787
- `delay` should be a floating point number of seconds to wait inbetween
796
+ `delay` should be a floating point number of seconds to wait in-between
788
797
each attempt.
789
798
790
799
`max_attempts` should be a total number of attempts to be made before
@@ -808,7 +817,7 @@ def new_schedule(self):
808
817
class ExponentialReconnectionPolicy (ReconnectionPolicy ):
809
818
"""
810
819
A :class:`.ReconnectionPolicy` subclass which exponentially increases
811
- the length of the delay inbetween each reconnection attempt up to
820
+ the length of the delay in-between each reconnection attempt up to
812
821
a set maximum delay.
813
822
814
823
A random amount of jitter (+/- 15%) will be added to the pure exponential
@@ -868,7 +877,7 @@ class RetryPolicy(object):
868
877
timeout and unavailable failures. These are failures reported from the
869
878
server side. Timeouts are configured by
870
879
`settings in cassandra.yaml <https://github.com/apache/cassandra/blob/cassandra-2.1.4/conf/cassandra.yaml#L568-L584>`_.
871
- Unavailable failures occur when the coordinator cannot acheive the consistency
880
+ Unavailable failures occur when the coordinator cannot achieve the consistency
872
881
level for a request. For further information see the method descriptions
873
882
below.
874
883
@@ -1385,3 +1394,160 @@ def _rethrow(self, *args, **kwargs):
1385
1394
on_read_timeout = _rethrow
1386
1395
on_write_timeout = _rethrow
1387
1396
on_unavailable = _rethrow
1397
+
1398
+
1399
+ ColDesc = namedtuple ('ColDesc' , ['ks' , 'table' , 'col' ])
1400
+ ColData = namedtuple ('ColData' , ['key' ,'type' ])
1401
+
1402
+ class ColumnEncryptionPolicy (object ):
1403
+ """
1404
+ A policy enabling (mostly) transparent encryption and decryption of data before it is
1405
+ sent to the cluster.
1406
+
1407
+ Key materials and other configurations are specified on a per-column basis. This policy can
1408
+ then be used by driver structures which are aware of the underlying columns involved in their
1409
+ work. In practice this includes the following cases:
1410
+
1411
+ * Prepared statements - data for columns specified by the cluster's policy will be transparently
1412
+ encrypted before they are sent
1413
+ * Rows returned from any query - data for columns specified by the cluster's policy will be
1414
+ transparently decrypted before they are returned to the user
1415
+
1416
+ To enable this functionality, create an instance of this class (or more likely a subclass)
1417
+ before creating a cluster. This policy should then be configured and supplied to the Cluster
1418
+ at creation time via the :attr:`.Cluster.column_encryption_policy` attribute.
1419
+ """
1420
+
1421
+ def encrypt (self , coldesc , obj_bytes ):
1422
+ """
1423
+ Encrypt the specified bytes using the cryptography materials for the specified column.
1424
+ Largely used internally, although this could also be used to encrypt values supplied
1425
+ to non-prepared statements in a way that is consistent with this policy.
1426
+ """
1427
+ raise NotImplementedError ()
1428
+
1429
+ def decrypt (self , coldesc , encrypted_bytes ):
1430
+ """
1431
+ Decrypt the specified (encrypted) bytes using the cryptography materials for the
1432
+ specified column. Used internally; could be used externally as well but there's
1433
+ not currently an obvious use case.
1434
+ """
1435
+ raise NotImplementedError ()
1436
+
1437
+ def add_column (self , coldesc , key ):
1438
+ """
1439
+ Provide cryptography materials to be used when encrypted and/or decrypting data
1440
+ for the specified column.
1441
+ """
1442
+ raise NotImplementedError ()
1443
+
1444
+ def contains_column (self , coldesc ):
1445
+ """
1446
+ Predicate to determine if a specific column is supported by this policy.
1447
+ Currently only used internally.
1448
+ """
1449
+ raise NotImplementedError ()
1450
+
1451
+ def encode_and_encrypt (self , coldesc , obj ):
1452
+ """
1453
+ Helper function to enable use of this policy on simple (i.e. non-prepared)
1454
+ statements.
1455
+ """
1456
+ raise NotImplementedError ()
1457
+
1458
+ AES256_BLOCK_SIZE = 128
1459
+ AES256_BLOCK_SIZE_BYTES = int (AES256_BLOCK_SIZE / 8 )
1460
+ AES256_KEY_SIZE = 256
1461
+ AES256_KEY_SIZE_BYTES = int (AES256_KEY_SIZE / 8 )
1462
+
1463
+ class AES256ColumnEncryptionPolicy (ColumnEncryptionPolicy ):
1464
+
1465
+ # CBC uses an IV that's the same size as the block size
1466
+ #
1467
+ # TODO: Need to find some way to expose mode options
1468
+ # (CBC etc.) without leaking classes from the underlying
1469
+ # impl here
1470
+ def __init__ (self , mode = modes .CBC , iv = os .urandom (AES256_BLOCK_SIZE_BYTES )):
1471
+
1472
+ self .mode = mode
1473
+ self .iv = iv
1474
+
1475
+ # ColData for a given ColDesc is always preserved. We only create a Cipher
1476
+ # when there's an actual need to for a given ColDesc
1477
+ self .coldata = {}
1478
+ self .ciphers = {}
1479
+
1480
+ def encrypt (self , coldesc , obj_bytes ):
1481
+
1482
+ # AES256 has a 128-bit block size so if the input bytes don't align perfectly on
1483
+ # those blocks we have to pad them. There's plenty of room for optimization here:
1484
+ #
1485
+ # * Instances of the PKCS7 padder should be managed in a bounded pool
1486
+ # * It would be nice if we could get a flag from encrypted data to indicate
1487
+ # whether it was padded or not
1488
+ # * Might be able to make this happen with a leading block of flags in encrypted data
1489
+ padder = padding .PKCS7 (AES256_BLOCK_SIZE ).padder ()
1490
+ padded_bytes = padder .update (obj_bytes ) + padder .finalize ()
1491
+
1492
+ cipher = self ._get_cipher (coldesc )
1493
+ encryptor = cipher .encryptor ()
1494
+ return encryptor .update (padded_bytes ) + encryptor .finalize ()
1495
+
1496
+ def decrypt (self , coldesc , encrypted_bytes ):
1497
+
1498
+ cipher = self ._get_cipher (coldesc )
1499
+ decryptor = cipher .decryptor ()
1500
+ padded_bytes = decryptor .update (encrypted_bytes ) + decryptor .finalize ()
1501
+
1502
+ unpadder = padding .PKCS7 (AES256_BLOCK_SIZE ).unpadder ()
1503
+ return unpadder .update (padded_bytes ) + unpadder .finalize ()
1504
+
1505
+ def add_column (self , coldesc , key , type ):
1506
+
1507
+ if not coldesc :
1508
+ raise ValueError ("ColDesc supplied to add_column cannot be None" )
1509
+ if not key :
1510
+ raise ValueError ("Key supplied to add_column cannot be None" )
1511
+ if not type :
1512
+ raise ValueError ("Type supplied to add_column cannot be None" )
1513
+ if type not in _cqltypes .keys ():
1514
+ raise ValueError ("Type %s is not a supported type" .format (type ))
1515
+ if not len (key ) == AES256_KEY_SIZE_BYTES :
1516
+ raise ValueError ("AES256 column encryption policy expects a 256-bit encryption key" )
1517
+ self .coldata [coldesc ] = ColData (key , _cqltypes [type ])
1518
+
1519
+ def contains_column (self , coldesc ):
1520
+ return coldesc in self .coldata
1521
+
1522
+ def encode_and_encrypt (self , coldesc , obj ):
1523
+ if not coldesc :
1524
+ raise ValueError ("ColDesc supplied to encode_and_encrypt cannot be None" )
1525
+ if not obj :
1526
+ raise ValueError ("Object supplied to encode_and_encrypt cannot be None" )
1527
+ coldata = self .coldata .get (coldesc )
1528
+ if not coldata :
1529
+ raise ValueError ("Could not find ColData for ColDesc %s" .format (coldesc ))
1530
+ return self .encrypt (coldesc , coldata .type .serialize (obj , None ))
1531
+
1532
+ def cache_info (self ):
1533
+ return AES256ColumnEncryptionPolicy ._build_cipher .cache_info ()
1534
+
1535
+ def column_type (self , coldesc ):
1536
+ return self .coldata [coldesc ].type
1537
+
1538
+ def _get_cipher (self , coldesc ):
1539
+ """
1540
+ Access relevant state from this instance necessary to create a Cipher and then get one,
1541
+ hopefully returning a cached instance if we've already done so (and it hasn't been evicted)
1542
+ """
1543
+
1544
+ try :
1545
+ coldata = self .coldata [coldesc ]
1546
+ return AES256ColumnEncryptionPolicy ._build_cipher (coldata .key , self .mode , self .iv )
1547
+ except KeyError :
1548
+ raise ValueError ("Could not find column {}" .format (coldesc ))
1549
+
1550
+ # Explicitly use a class method here to avoid caching self
1551
+ @lru_cache (maxsize = 128 )
1552
+ def _build_cipher (key , mode , iv ):
1553
+ return Cipher (algorithms .AES256 (key ), mode (iv ))
0 commit comments