|
| 1 | +package misk.jooq.listeners |
| 2 | + |
| 3 | +import misk.jooq.toInstant |
| 4 | +import java.nio.ByteBuffer |
| 5 | +import java.time.LocalDateTime |
| 6 | +import java.time.temporal.ChronoUnit |
| 7 | +import org.jooq.Record |
| 8 | +import org.jooq.RecordContext |
| 9 | +import org.jooq.Table |
| 10 | +import org.jooq.TableField |
| 11 | +import org.jooq.exception.DataAccessException |
| 12 | +import misk.logging.getLogger |
| 13 | +import org.jooq.RecordListener |
| 14 | + |
| 15 | +/** |
| 16 | + * Using this listener will allow you to guard against direct DB updates. This listener will compute a MAC signature |
| 17 | + * from the column values provided and store the mac into another column in the DB. When it comes time to retrieve the |
| 18 | + * record, the mac will be verified with column data again. If the column data has been changed, then the mac will not |
| 19 | + * validate, and you will get an exception. |
| 20 | + * |
| 21 | + * Things to remember: Add this in when your service is slightly mature. You can't arbitrarily change the columns used |
| 22 | + * nor change the column order in creating the mac later on. This will prevent all old rows from being read. |
| 23 | + * |
| 24 | + * You can add this listener behind a flag and correct the signatures using a backfill and then use this listener again |
| 25 | + * |
| 26 | + * Also, general Jooq RecordListener rules apply. Specifically - As such, a RecordListener does not affect any bulk DML |
| 27 | + * statements (e.g. a DSLContext.update(Table)), whose affected records are not available to clients more info here |
| 28 | + * [org.jooq.RecordListener] |
| 29 | + * |
| 30 | + * Important! If you use the [JooqTimestampRecordListener] make sure this listener is added after the |
| 31 | + * [RecordSignatureListener] if you are using created_at and updated_at columns in the signature. So something like this |
| 32 | + * |
| 33 | + * To see a more the full example see the [RecordSignatureListenerTest] |
| 34 | + */ |
| 35 | +class RecordSignatureListener( |
| 36 | + private val recordHasher: RecordHasher, |
| 37 | + private val tableSignatureDetails: List<TableSignatureDetails>, |
| 38 | +) : RecordListener { |
| 39 | + |
| 40 | + /** |
| 41 | + * We are overriding insertStart and updateStart instead of storeStart() which is called regardless of whether it is |
| 42 | + * an update or an insert. The reason is, people generally add another listener - JooqTimestampRecordListener which |
| 43 | + * sets the timestamp for created_at and updated_at columns. If these columns form part of the signature then these |
| 44 | + * values need to be set before the signature can be calculated. The JooqTimestampRecordListener has insertStart and |
| 45 | + * updateStart overridden. Jooq looks to be calling storeStart() on all listeners before moving on to call |
| 46 | + * insertStart() and updateStart() on all listeners. So either all listeners need to implement storeStart() or |
| 47 | + * insertStart() and updateStart(). We can mix the 2 styles and guarantee an order. |
| 48 | + */ |
| 49 | + override fun insertStart(ctx: RecordContext?) = updateSignature(ctx) |
| 50 | + |
| 51 | + override fun updateStart(ctx: RecordContext?) = updateSignature(ctx) |
| 52 | + |
| 53 | + private fun updateSignature(ctx: RecordContext?) { |
| 54 | + if (ctx?.record() == null) return |
| 55 | + val tableSignature = tableSignatureDetails.find { ctx.record().field(it.signatureRecordColumn) != null } ?: return |
| 56 | + |
| 57 | + val concatenatedByteArray = concatenateByteArrayFromColumnValues(tableSignature, ctx) |
| 58 | + val signature = recordHasher.computeMac(tableSignature.signatureKeyName, concatenatedByteArray) |
| 59 | + ctx.record().set(tableSignature.signatureRecordColumn, signature) |
| 60 | + } |
| 61 | + |
| 62 | + override fun loadEnd(ctx: RecordContext?) { |
| 63 | + if (ctx?.record() == null) return |
| 64 | + val tableSignature = tableSignatureDetails.find { ctx.record().field(it.signatureRecordColumn) != null } ?: return |
| 65 | + |
| 66 | + // Skip validation if all signature columns are null (indicates a partially loaded record) |
| 67 | + val allColumnsNull = tableSignature.columns.all { column -> |
| 68 | + ctx.record().get(column) == null |
| 69 | + } |
| 70 | + if (allColumnsNull) return |
| 71 | + |
| 72 | + val concatenatedByteArray = concatenateByteArrayFromColumnValues(tableSignature, ctx) |
| 73 | + val signature = |
| 74 | + ctx.record().get(tableSignature.signatureRecordColumn) |
| 75 | + ?: if (!tableSignature.allowNullSignatures) { |
| 76 | + throw DataIntegrityException(exceptionMessage("Signature is null", tableSignature, ctx)) |
| 77 | + } else { |
| 78 | + return |
| 79 | + } |
| 80 | + |
| 81 | + try { |
| 82 | + recordHasher.verifyMac(tableSignature.signatureKeyName, signature, concatenatedByteArray) |
| 83 | + } catch (e: Exception) { |
| 84 | + log.warn(e) { |
| 85 | + exceptionMessage("The data in the database does not match the record signature on the", tableSignature, ctx) |
| 86 | + } |
| 87 | + |
| 88 | + throw DataIntegrityException( |
| 89 | + exceptionMessage("The data in the database does not match the record signature on the", tableSignature, ctx), |
| 90 | + cause = e, |
| 91 | + ) |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + private fun concatenateByteArrayFromColumnValues( |
| 96 | + tableSignature: TableSignatureDetails, |
| 97 | + ctx: RecordContext, |
| 98 | + ): ByteArray { |
| 99 | + /** |
| 100 | + Here, we have implemented LV (length-value) encoding scheme. |
| 101 | + Encoding the column values and concatenating them as byte array in this manner |
| 102 | + prevents these two distinct records creating the same signature, |
| 103 | + given that signature is built using values from foo and bar columns. |
| 104 | +
|
| 105 | + Encoding scheme: |
| 106 | + - null: 4 bytes with value -1 (no data follows) |
| 107 | + - non-null: 4 bytes with length >= 0, followed by that many bytes of data |
| 108 | +
|
| 109 | + more info here: https://en.wikipedia.org/wiki/Type%E2%80%93length%E2%80%93value |
| 110 | +
|
| 111 | + without LV encoding |
| 112 | + id | foo | bar | |
| 113 | + 1 | ab | c | bytearray(ab) + bytearray(c) |
| 114 | + 2 | a | bc | bytearray(a) + bytearray(bc) |
| 115 | + result: the two bytearrays from record 1 and record 2 are the same |
| 116 | +
|
| 117 | + with LV encoding |
| 118 | + id | foo | bar | |
| 119 | + 1 | ab | c | (lengthByte(2) + bytearray(ab)) + (lengthByte(1) + (bytearray(c)) |
| 120 | + 2 | a | bc | (lengthByte(1) + bytearray(a)) + (lengthByte(2) + bytearray(bc)) |
| 121 | + result: the two bytearrays from record 1 and record 2 are NOT the same |
| 122 | +
|
| 123 | + We also encode null values with a special marker (-1) to prevent collisions like: |
| 124 | + id | foo | bar | |
| 125 | + 1 | null | a | bytearray(special_for_null) + (lengthByte(1) + bytearray(a)) |
| 126 | + 2 | a | null | (lengthByte(1) + bytearray(a)) + bytearray(special_for_null) |
| 127 | +
|
| 128 | + bytearray(special_for_null) cannot be conflicted with other real values |
| 129 | + */ |
| 130 | + return tableSignature.columns.fold(ByteArray(0)) { bytes, column -> |
| 131 | + when (val columnValue = ctx.record().get(column)) { |
| 132 | + // For null values, encode with -1 as a special marker (no value bytes follow) |
| 133 | + null -> { |
| 134 | + val nullMarker = ByteBuffer.allocate(4).putInt(-1).array() |
| 135 | + bytes + nullMarker |
| 136 | + } |
| 137 | + |
| 138 | + // For ByteArray values, prepend the length (4 bytes) then the value |
| 139 | + is ByteArray -> { |
| 140 | + val lengthBytes = ByteBuffer.allocate(4).putInt(columnValue.size).array() |
| 141 | + bytes + lengthBytes + columnValue |
| 142 | + } |
| 143 | + |
| 144 | + // For LocalDateTime, convert to bytes first, then apply Length-Value encoding |
| 145 | + is LocalDateTime -> { |
| 146 | + val precision = column.dataType.precision() |
| 147 | + val valueBytes = columnValue.toByteArray(precision) |
| 148 | + val lengthBytes = ByteBuffer.allocate(4).putInt(valueBytes.size).array() |
| 149 | + bytes + lengthBytes + valueBytes |
| 150 | + } |
| 151 | + // For all other types, convert to string, then to bytes, then apply Length-Value encoding |
| 152 | + else -> { |
| 153 | + val valueBytes = columnValue.toString().toByteArray() |
| 154 | + val lengthBytes = ByteBuffer.allocate(4).putInt(valueBytes.size).array() |
| 155 | + bytes + lengthBytes + valueBytes |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + /** |
| 162 | + * MySQL's precision for a timestamp is millis. But in the Kube Pod, where the code runs the JVM timestamp is in |
| 163 | + * nanos. So when we store the data, the signature is computed with nanos, but when we load the data from the DB, the |
| 164 | + * nanos are lost and hence the signature computed is different. This method truncates the instant based on the |
| 165 | + * precision. The check with precision is required to be able to test this on a MAC. Mac JVM's precision is millis. So |
| 166 | + * in order to test truncation we need to create a mysql timestamp with a precision of 0. This also allows this |
| 167 | + * signature to work for any column created in prod where the precision is 0 (in the sense, restricted to store |
| 168 | + * seconds alone). |
| 169 | + */ |
| 170 | + private fun LocalDateTime.toByteArray(precision: Int): ByteArray { |
| 171 | + return when { |
| 172 | + precision < 3 -> toInstant().truncatedTo(ChronoUnit.SECONDS).toEpochMilli().toString().toByteArray() |
| 173 | + else -> toInstant().truncatedTo(ChronoUnit.MILLIS).toEpochMilli().toString().toByteArray() |
| 174 | + } |
| 175 | + } |
| 176 | + |
| 177 | + private fun exceptionMessage(message: String, tableSignature: TableSignatureDetails, ctx: RecordContext): String { |
| 178 | + return message + |
| 179 | + " [Table=${tableSignature.table}] " + |
| 180 | + "[PK=${tableSignature.table.primaryKey?.fields?.map { ctx.record().get(it) }?.joinToString(", ")}]" |
| 181 | + } |
| 182 | + |
| 183 | + companion object { |
| 184 | + val log = getLogger<RecordSignatureListener>() |
| 185 | + } |
| 186 | +} |
| 187 | + |
| 188 | +data class TableSignatureDetails( |
| 189 | + /** |
| 190 | + * The key name used to create the HMAC signature More details here - |
| 191 | + * https://cash-dev-guide.squarecloudservices.com/security/key_management/ and |
| 192 | + * https://github.com/google/tink/blob/master/docs/PRIMITIVES.md |
| 193 | + */ |
| 194 | + val signatureKeyName: String, |
| 195 | + /** |
| 196 | + * The columns that need to be protected against direct change in the database. Please note: the value of these |
| 197 | + * columns should be convertable deterministically into a string value or should be a byte array already such as BLOB |
| 198 | + * types. Most SQL value types can be converted into a string via toString() call. Note: |
| 199 | + * 1. JSON columns cannot be used as part of the signature columns as the string comparison of a JSON differs if there |
| 200 | + * are whitespace differences. MYSQL does not store JSON as a string and hence when it is retrieved there usually |
| 201 | + * are white space differences. |
| 202 | + * 2. If a timestamp column is used in the signature, remember that MYSQL's precision is limited to millis. The MAC |
| 203 | + * JVM precision is limited to millis too. But the Kube pod where this is deployed has nano precision. So ensure |
| 204 | + * the timestamp is truncated to millis before setting it into the record. |
| 205 | + */ |
| 206 | + val columns: List<TableField<out Record, out Any?>>, |
| 207 | + /** The column where the HMAC signature (or hash) will be stored and then used to validate against */ |
| 208 | + val signatureRecordColumn: TableField<out Record, ByteArray?>, |
| 209 | + /** The table that needs to be protected against direct change in the database. */ |
| 210 | + val table: Table<out Record>, |
| 211 | + /** |
| 212 | + * When adding this listener to an existing table, set this flag to true until you are sure that all records in the |
| 213 | + * table have a signature set |
| 214 | + */ |
| 215 | + val allowNullSignatures: Boolean, |
| 216 | +) |
| 217 | + |
| 218 | +class DataIntegrityException @JvmOverloads constructor(message: String, cause: Exception? = null) : DataAccessException(message, cause) |
0 commit comments