|
| 1 | +# Copyright DataStax, Inc. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +from collections import namedtuple |
| 16 | +from functools import lru_cache |
| 17 | + |
| 18 | +import logging |
| 19 | +import os |
| 20 | + |
| 21 | +log = logging.getLogger(__name__) |
| 22 | + |
| 23 | +from cassandra.cqltypes import _cqltypes |
| 24 | +from cassandra.policies import ColumnEncryptionPolicy |
| 25 | + |
| 26 | +from cryptography.hazmat.primitives import padding |
| 27 | +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes |
| 28 | + |
| 29 | +AES256_BLOCK_SIZE = 128 |
| 30 | +AES256_BLOCK_SIZE_BYTES = int(AES256_BLOCK_SIZE / 8) |
| 31 | +AES256_KEY_SIZE = 256 |
| 32 | +AES256_KEY_SIZE_BYTES = int(AES256_KEY_SIZE / 8) |
| 33 | + |
| 34 | +ColData = namedtuple('ColData', ['key','type']) |
| 35 | + |
| 36 | +class AES256ColumnEncryptionPolicy(ColumnEncryptionPolicy): |
| 37 | + |
| 38 | + # Fix block cipher mode for now. IV size is a function of block cipher used |
| 39 | + # so fixing this avoids (possibly unnecessary) validation logic here. |
| 40 | + mode = modes.CBC |
| 41 | + |
| 42 | + # "iv" param here expects a bytearray that's the same size as the block |
| 43 | + # size for AES-256 (128 bits or 16 bytes). If none is provided a new one |
| 44 | + # will be randomly generated, but in this case the IV should be recorded and |
| 45 | + # preserved or else you will not be able to decrypt any data encrypted by this |
| 46 | + # policy. |
| 47 | + def __init__(self, iv=None): |
| 48 | + |
| 49 | + # CBC uses an IV that's the same size as the block size |
| 50 | + # |
| 51 | + # Avoid defining IV with a default arg in order to stay away from |
| 52 | + # any issues around the caching of default args |
| 53 | + self.iv = iv |
| 54 | + if self.iv: |
| 55 | + if not len(self.iv) == AES256_BLOCK_SIZE_BYTES: |
| 56 | + raise ValueError("This policy uses AES-256 with CBC mode and therefore expects a 128-bit initialization vector") |
| 57 | + else: |
| 58 | + self.iv = os.urandom(AES256_BLOCK_SIZE_BYTES) |
| 59 | + |
| 60 | + # ColData for a given ColDesc is always preserved. We only create a Cipher |
| 61 | + # when there's an actual need to for a given ColDesc |
| 62 | + self.coldata = {} |
| 63 | + self.ciphers = {} |
| 64 | + |
| 65 | + def encrypt(self, coldesc, obj_bytes): |
| 66 | + |
| 67 | + # AES256 has a 128-bit block size so if the input bytes don't align perfectly on |
| 68 | + # those blocks we have to pad them. There's plenty of room for optimization here: |
| 69 | + # |
| 70 | + # * Instances of the PKCS7 padder should be managed in a bounded pool |
| 71 | + # * It would be nice if we could get a flag from encrypted data to indicate |
| 72 | + # whether it was padded or not |
| 73 | + # * Might be able to make this happen with a leading block of flags in encrypted data |
| 74 | + padder = padding.PKCS7(AES256_BLOCK_SIZE).padder() |
| 75 | + padded_bytes = padder.update(obj_bytes) + padder.finalize() |
| 76 | + |
| 77 | + cipher = self._get_cipher(coldesc) |
| 78 | + encryptor = cipher.encryptor() |
| 79 | + return self.iv + encryptor.update(padded_bytes) + encryptor.finalize() |
| 80 | + |
| 81 | + def decrypt(self, coldesc, bytes): |
| 82 | + |
| 83 | + iv = bytes[:AES256_BLOCK_SIZE_BYTES] |
| 84 | + encrypted_bytes = bytes[AES256_BLOCK_SIZE_BYTES:] |
| 85 | + cipher = self._get_cipher(coldesc, iv=iv) |
| 86 | + decryptor = cipher.decryptor() |
| 87 | + padded_bytes = decryptor.update(encrypted_bytes) + decryptor.finalize() |
| 88 | + |
| 89 | + unpadder = padding.PKCS7(AES256_BLOCK_SIZE).unpadder() |
| 90 | + return unpadder.update(padded_bytes) + unpadder.finalize() |
| 91 | + |
| 92 | + def add_column(self, coldesc, key, type): |
| 93 | + |
| 94 | + if not coldesc: |
| 95 | + raise ValueError("ColDesc supplied to add_column cannot be None") |
| 96 | + if not key: |
| 97 | + raise ValueError("Key supplied to add_column cannot be None") |
| 98 | + if not type: |
| 99 | + raise ValueError("Type supplied to add_column cannot be None") |
| 100 | + if type not in _cqltypes.keys(): |
| 101 | + raise ValueError("Type %s is not a supported type".format(type)) |
| 102 | + if not len(key) == AES256_KEY_SIZE_BYTES: |
| 103 | + raise ValueError("AES256 column encryption policy expects a 256-bit encryption key") |
| 104 | + self.coldata[coldesc] = ColData(key, _cqltypes[type]) |
| 105 | + |
| 106 | + def contains_column(self, coldesc): |
| 107 | + return coldesc in self.coldata |
| 108 | + |
| 109 | + def encode_and_encrypt(self, coldesc, obj): |
| 110 | + if not coldesc: |
| 111 | + raise ValueError("ColDesc supplied to encode_and_encrypt cannot be None") |
| 112 | + if not obj: |
| 113 | + raise ValueError("Object supplied to encode_and_encrypt cannot be None") |
| 114 | + coldata = self.coldata.get(coldesc) |
| 115 | + if not coldata: |
| 116 | + raise ValueError("Could not find ColData for ColDesc %s".format(coldesc)) |
| 117 | + return self.encrypt(coldesc, coldata.type.serialize(obj, None)) |
| 118 | + |
| 119 | + def cache_info(self): |
| 120 | + return AES256ColumnEncryptionPolicy._build_cipher.cache_info() |
| 121 | + |
| 122 | + def column_type(self, coldesc): |
| 123 | + return self.coldata[coldesc].type |
| 124 | + |
| 125 | + def _get_cipher(self, coldesc, iv=None): |
| 126 | + """ |
| 127 | + Access relevant state from this instance necessary to create a Cipher and then get one, |
| 128 | + hopefully returning a cached instance if we've already done so (and it hasn't been evicted) |
| 129 | + """ |
| 130 | + try: |
| 131 | + coldata = self.coldata[coldesc] |
| 132 | + return AES256ColumnEncryptionPolicy._build_cipher(coldata.key, iv or self.iv) |
| 133 | + except KeyError: |
| 134 | + raise ValueError("Could not find column {}".format(coldesc)) |
| 135 | + |
| 136 | + # Explicitly use a class method here to avoid caching self |
| 137 | + @lru_cache(maxsize=128) |
| 138 | + def _build_cipher(key, iv): |
| 139 | + return Cipher(algorithms.AES256(key), AES256ColumnEncryptionPolicy.mode(iv)) |
0 commit comments