|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: iso-8859-1 -*- |
| 3 | + |
| 4 | +# Note that PyPy contains also a built-in module 'md5' which will hide |
| 5 | +# this one if compiled in. |
| 6 | + |
| 7 | +"""A sample implementation of MD5 in pure Python. |
| 8 | +
|
| 9 | +This is an implementation of the MD5 hash function, as specified by |
| 10 | +RFC 1321, in pure Python. It was implemented using Bruce Schneier's |
| 11 | +excellent book "Applied Cryptography", 2nd ed., 1996. |
| 12 | +
|
| 13 | +Surely this is not meant to compete with the existing implementation |
| 14 | +of the Python standard library (written in C). Rather, it should be |
| 15 | +seen as a Python complement that is more readable than C and can be |
| 16 | +used more conveniently for learning and experimenting purposes in |
| 17 | +the field of cryptography. |
| 18 | +
|
| 19 | +This module tries very hard to follow the API of the existing Python |
| 20 | +standard library's "md5" module, but although it seems to work fine, |
| 21 | +it has not been extensively tested! (But note that there is a test |
| 22 | +module, test_md5py.py, that compares this Python implementation with |
| 23 | +the C one of the Python standard library. |
| 24 | +
|
| 25 | +BEWARE: this comes with no guarantee whatsoever about fitness and/or |
| 26 | +other properties! Specifically, do not use this in any production |
| 27 | +code! License is Python License! |
| 28 | +
|
| 29 | +Special thanks to Aurelian Coman who fixed some nasty bugs! |
| 30 | +
|
| 31 | +Dinu C. Gherman |
| 32 | +""" |
| 33 | + |
| 34 | + |
| 35 | +__date__ = '2004-11-17' |
| 36 | +__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy |
| 37 | + |
| 38 | +__metaclass__ = type # or genrpy won't work |
| 39 | + |
| 40 | +import struct, copy |
| 41 | + |
| 42 | + |
| 43 | +# ====================================================================== |
| 44 | +# Bit-Manipulation helpers |
| 45 | +# ====================================================================== |
| 46 | + |
| 47 | +def _bytelist2long(list): |
| 48 | + "Transform a list of characters into a list of longs." |
| 49 | + |
| 50 | + imax = len(list) // 4 |
| 51 | + hl = [0] * imax |
| 52 | + |
| 53 | + j = 0 |
| 54 | + i = 0 |
| 55 | + while i < imax: |
| 56 | + b0 = list[j] |
| 57 | + b1 = list[j+1] << 8 |
| 58 | + b2 = list[j+2] << 16 |
| 59 | + b3 = list[j+3] << 24 |
| 60 | + hl[i] = b0 | b1 |b2 | b3 |
| 61 | + i = i+1 |
| 62 | + j = j+4 |
| 63 | + |
| 64 | + return hl |
| 65 | + |
| 66 | + |
| 67 | +def _rotateLeft(x, n): |
| 68 | + "Rotate x (32 bit) left n bits circularly." |
| 69 | + |
| 70 | + return (x << n) | (x >> (32-n)) |
| 71 | + |
| 72 | + |
| 73 | +# ====================================================================== |
| 74 | +# The real MD5 meat... |
| 75 | +# |
| 76 | +# Implemented after "Applied Cryptography", 2nd ed., 1996, |
| 77 | +# pp. 436-441 by Bruce Schneier. |
| 78 | +# ====================================================================== |
| 79 | + |
| 80 | +# F, G, H and I are basic MD5 functions. |
| 81 | + |
| 82 | +def F(x, y, z): |
| 83 | + return (x & y) | ((~x) & z) |
| 84 | + |
| 85 | +def G(x, y, z): |
| 86 | + return (x & z) | (y & (~z)) |
| 87 | + |
| 88 | +def H(x, y, z): |
| 89 | + return x ^ y ^ z |
| 90 | + |
| 91 | +def I(x, y, z): |
| 92 | + return y ^ (x | (~z)) |
| 93 | + |
| 94 | + |
| 95 | +def XX(func, a, b, c, d, x, s, ac): |
| 96 | + """Wrapper for call distribution to functions F, G, H and I. |
| 97 | +
|
| 98 | + This replaces functions FF, GG, HH and II from "Appl. Crypto." |
| 99 | + Rotation is separate from addition to prevent recomputation |
| 100 | + (now summed-up in one function). |
| 101 | + """ |
| 102 | + |
| 103 | + res = 0 |
| 104 | + res = res + a + func(b, c, d) |
| 105 | + res = res + x |
| 106 | + res = res + ac |
| 107 | + res = res & 0xffffffff |
| 108 | + res = _rotateLeft(res, s) |
| 109 | + res = res & 0xffffffff |
| 110 | + res = res + b |
| 111 | + |
| 112 | + return res & 0xffffffff |
| 113 | + |
| 114 | + |
| 115 | +class md5: |
| 116 | + "An implementation of the MD5 hash function in pure Python." |
| 117 | + |
| 118 | + digest_size = digestsize = 16 |
| 119 | + block_size = 64 |
| 120 | + |
| 121 | + def __init__(self, arg=None): |
| 122 | + "Initialisation." |
| 123 | + |
| 124 | + # Initial message length in bits(!). |
| 125 | + self.length = 0 |
| 126 | + self.count = [0, 0] |
| 127 | + |
| 128 | + # Initial empty message as a sequence of bytes (8 bit characters). |
| 129 | + self.input = [] |
| 130 | + |
| 131 | + # Call a separate init function, that can be used repeatedly |
| 132 | + # to start from scratch on the same object. |
| 133 | + self.init() |
| 134 | + |
| 135 | + if arg: |
| 136 | + self.update(arg) |
| 137 | + |
| 138 | + |
| 139 | + def init(self): |
| 140 | + "Initialize the message-digest and set all fields to zero." |
| 141 | + |
| 142 | + self.length = 0 |
| 143 | + self.count = [0, 0] |
| 144 | + self.input = [] |
| 145 | + |
| 146 | + # Load magic initialization constants. |
| 147 | + self.A = 0x67452301 |
| 148 | + self.B = 0xefcdab89 |
| 149 | + self.C = 0x98badcfe |
| 150 | + self.D = 0x10325476 |
| 151 | + |
| 152 | + |
| 153 | + def _transform(self, inp): |
| 154 | + """Basic MD5 step transforming the digest based on the input. |
| 155 | +
|
| 156 | + Note that if the Mysterious Constants are arranged backwards |
| 157 | + in little-endian order and decrypted with the DES they produce |
| 158 | + OCCULT MESSAGES! |
| 159 | + """ |
| 160 | + |
| 161 | + a, b, c, d = A, B, C, D = self.A, self.B, self.C, self.D |
| 162 | + |
| 163 | + # Round 1. |
| 164 | + |
| 165 | + S11, S12, S13, S14 = 7, 12, 17, 22 |
| 166 | + |
| 167 | + a = XX(F, a, b, c, d, inp[ 0], S11, 0xD76AA478) # 1 |
| 168 | + d = XX(F, d, a, b, c, inp[ 1], S12, 0xE8C7B756) # 2 |
| 169 | + c = XX(F, c, d, a, b, inp[ 2], S13, 0x242070DB) # 3 |
| 170 | + b = XX(F, b, c, d, a, inp[ 3], S14, 0xC1BDCEEE) # 4 |
| 171 | + a = XX(F, a, b, c, d, inp[ 4], S11, 0xF57C0FAF) # 5 |
| 172 | + d = XX(F, d, a, b, c, inp[ 5], S12, 0x4787C62A) # 6 |
| 173 | + c = XX(F, c, d, a, b, inp[ 6], S13, 0xA8304613) # 7 |
| 174 | + b = XX(F, b, c, d, a, inp[ 7], S14, 0xFD469501) # 8 |
| 175 | + a = XX(F, a, b, c, d, inp[ 8], S11, 0x698098D8) # 9 |
| 176 | + d = XX(F, d, a, b, c, inp[ 9], S12, 0x8B44F7AF) # 10 |
| 177 | + c = XX(F, c, d, a, b, inp[10], S13, 0xFFFF5BB1) # 11 |
| 178 | + b = XX(F, b, c, d, a, inp[11], S14, 0x895CD7BE) # 12 |
| 179 | + a = XX(F, a, b, c, d, inp[12], S11, 0x6B901122) # 13 |
| 180 | + d = XX(F, d, a, b, c, inp[13], S12, 0xFD987193) # 14 |
| 181 | + c = XX(F, c, d, a, b, inp[14], S13, 0xA679438E) # 15 |
| 182 | + b = XX(F, b, c, d, a, inp[15], S14, 0x49B40821) # 16 |
| 183 | + |
| 184 | + # Round 2. |
| 185 | + |
| 186 | + S21, S22, S23, S24 = 5, 9, 14, 20 |
| 187 | + |
| 188 | + a = XX(G, a, b, c, d, inp[ 1], S21, 0xF61E2562) # 17 |
| 189 | + d = XX(G, d, a, b, c, inp[ 6], S22, 0xC040B340) # 18 |
| 190 | + c = XX(G, c, d, a, b, inp[11], S23, 0x265E5A51) # 19 |
| 191 | + b = XX(G, b, c, d, a, inp[ 0], S24, 0xE9B6C7AA) # 20 |
| 192 | + a = XX(G, a, b, c, d, inp[ 5], S21, 0xD62F105D) # 21 |
| 193 | + d = XX(G, d, a, b, c, inp[10], S22, 0x02441453) # 22 |
| 194 | + c = XX(G, c, d, a, b, inp[15], S23, 0xD8A1E681) # 23 |
| 195 | + b = XX(G, b, c, d, a, inp[ 4], S24, 0xE7D3FBC8) # 24 |
| 196 | + a = XX(G, a, b, c, d, inp[ 9], S21, 0x21E1CDE6) # 25 |
| 197 | + d = XX(G, d, a, b, c, inp[14], S22, 0xC33707D6) # 26 |
| 198 | + c = XX(G, c, d, a, b, inp[ 3], S23, 0xF4D50D87) # 27 |
| 199 | + b = XX(G, b, c, d, a, inp[ 8], S24, 0x455A14ED) # 28 |
| 200 | + a = XX(G, a, b, c, d, inp[13], S21, 0xA9E3E905) # 29 |
| 201 | + d = XX(G, d, a, b, c, inp[ 2], S22, 0xFCEFA3F8) # 30 |
| 202 | + c = XX(G, c, d, a, b, inp[ 7], S23, 0x676F02D9) # 31 |
| 203 | + b = XX(G, b, c, d, a, inp[12], S24, 0x8D2A4C8A) # 32 |
| 204 | + |
| 205 | + # Round 3. |
| 206 | + |
| 207 | + S31, S32, S33, S34 = 4, 11, 16, 23 |
| 208 | + |
| 209 | + a = XX(H, a, b, c, d, inp[ 5], S31, 0xFFFA3942) # 33 |
| 210 | + d = XX(H, d, a, b, c, inp[ 8], S32, 0x8771F681) # 34 |
| 211 | + c = XX(H, c, d, a, b, inp[11], S33, 0x6D9D6122) # 35 |
| 212 | + b = XX(H, b, c, d, a, inp[14], S34, 0xFDE5380C) # 36 |
| 213 | + a = XX(H, a, b, c, d, inp[ 1], S31, 0xA4BEEA44) # 37 |
| 214 | + d = XX(H, d, a, b, c, inp[ 4], S32, 0x4BDECFA9) # 38 |
| 215 | + c = XX(H, c, d, a, b, inp[ 7], S33, 0xF6BB4B60) # 39 |
| 216 | + b = XX(H, b, c, d, a, inp[10], S34, 0xBEBFBC70) # 40 |
| 217 | + a = XX(H, a, b, c, d, inp[13], S31, 0x289B7EC6) # 41 |
| 218 | + d = XX(H, d, a, b, c, inp[ 0], S32, 0xEAA127FA) # 42 |
| 219 | + c = XX(H, c, d, a, b, inp[ 3], S33, 0xD4EF3085) # 43 |
| 220 | + b = XX(H, b, c, d, a, inp[ 6], S34, 0x04881D05) # 44 |
| 221 | + a = XX(H, a, b, c, d, inp[ 9], S31, 0xD9D4D039) # 45 |
| 222 | + d = XX(H, d, a, b, c, inp[12], S32, 0xE6DB99E5) # 46 |
| 223 | + c = XX(H, c, d, a, b, inp[15], S33, 0x1FA27CF8) # 47 |
| 224 | + b = XX(H, b, c, d, a, inp[ 2], S34, 0xC4AC5665) # 48 |
| 225 | + |
| 226 | + # Round 4. |
| 227 | + |
| 228 | + S41, S42, S43, S44 = 6, 10, 15, 21 |
| 229 | + |
| 230 | + a = XX(I, a, b, c, d, inp[ 0], S41, 0xF4292244) # 49 |
| 231 | + d = XX(I, d, a, b, c, inp[ 7], S42, 0x432AFF97) # 50 |
| 232 | + c = XX(I, c, d, a, b, inp[14], S43, 0xAB9423A7) # 51 |
| 233 | + b = XX(I, b, c, d, a, inp[ 5], S44, 0xFC93A039) # 52 |
| 234 | + a = XX(I, a, b, c, d, inp[12], S41, 0x655B59C3) # 53 |
| 235 | + d = XX(I, d, a, b, c, inp[ 3], S42, 0x8F0CCC92) # 54 |
| 236 | + c = XX(I, c, d, a, b, inp[10], S43, 0xFFEFF47D) # 55 |
| 237 | + b = XX(I, b, c, d, a, inp[ 1], S44, 0x85845DD1) # 56 |
| 238 | + a = XX(I, a, b, c, d, inp[ 8], S41, 0x6FA87E4F) # 57 |
| 239 | + d = XX(I, d, a, b, c, inp[15], S42, 0xFE2CE6E0) # 58 |
| 240 | + c = XX(I, c, d, a, b, inp[ 6], S43, 0xA3014314) # 59 |
| 241 | + b = XX(I, b, c, d, a, inp[13], S44, 0x4E0811A1) # 60 |
| 242 | + a = XX(I, a, b, c, d, inp[ 4], S41, 0xF7537E82) # 61 |
| 243 | + d = XX(I, d, a, b, c, inp[11], S42, 0xBD3AF235) # 62 |
| 244 | + c = XX(I, c, d, a, b, inp[ 2], S43, 0x2AD7D2BB) # 63 |
| 245 | + b = XX(I, b, c, d, a, inp[ 9], S44, 0xEB86D391) # 64 |
| 246 | + |
| 247 | + A = (A + a) & 0xffffffff |
| 248 | + B = (B + b) & 0xffffffff |
| 249 | + C = (C + c) & 0xffffffff |
| 250 | + D = (D + d) & 0xffffffff |
| 251 | + |
| 252 | + self.A, self.B, self.C, self.D = A, B, C, D |
| 253 | + |
| 254 | + |
| 255 | + # Down from here all methods follow the Python Standard Library |
| 256 | + # API of the md5 module. |
| 257 | + |
| 258 | + def update(self, inBuf): |
| 259 | + """Add to the current message. |
| 260 | +
|
| 261 | + Update the md5 object with the string arg. Repeated calls |
| 262 | + are equivalent to a single call with the concatenation of all |
| 263 | + the arguments, i.e. m.update(a); m.update(b) is equivalent |
| 264 | + to m.update(a+b). |
| 265 | +
|
| 266 | + The hash is immediately calculated for all full blocks. The final |
| 267 | + calculation is made in digest(). This allows us to keep an |
| 268 | + intermediate value for the hash, so that we only need to make |
| 269 | + minimal recalculation if we call update() to add moredata to |
| 270 | + the hashed string. |
| 271 | + """ |
| 272 | + |
| 273 | + leninBuf = len(inBuf) |
| 274 | + |
| 275 | + # Compute number of bytes mod 64. |
| 276 | + index = (self.count[0] >> 3) & 0x3F |
| 277 | + |
| 278 | + # Update number of bits. |
| 279 | + self.count[0] = self.count[0] + (leninBuf << 3) |
| 280 | + if self.count[0] < (leninBuf << 3): |
| 281 | + self.count[1] = self.count[1] + 1 |
| 282 | + self.count[1] = self.count[1] + (leninBuf >> 29) |
| 283 | + |
| 284 | + partLen = 64 - index |
| 285 | + |
| 286 | + if leninBuf >= partLen: |
| 287 | + self.input[index:] = list(inBuf[:partLen]) |
| 288 | + self._transform(_bytelist2long(self.input)) |
| 289 | + i = partLen |
| 290 | + while i + 63 < leninBuf: |
| 291 | + self._transform(_bytelist2long(list(inBuf[i:i+64]))) |
| 292 | + i = i + 64 |
| 293 | + else: |
| 294 | + self.input = list(inBuf[i:leninBuf]) |
| 295 | + else: |
| 296 | + i = 0 |
| 297 | + self.input = self.input + list(inBuf) |
| 298 | + |
| 299 | + |
| 300 | + def digest(self): |
| 301 | + """Terminate the message-digest computation and return digest. |
| 302 | +
|
| 303 | + Return the digest of the strings passed to the update() |
| 304 | + method so far. This is a 16-byte string which may contain |
| 305 | + non-ASCII characters, including null bytes. |
| 306 | + """ |
| 307 | + |
| 308 | + A = self.A |
| 309 | + B = self.B |
| 310 | + C = self.C |
| 311 | + D = self.D |
| 312 | + input = [] + self.input |
| 313 | + count = [] + self.count |
| 314 | + |
| 315 | + index = (self.count[0] >> 3) & 0x3f |
| 316 | + |
| 317 | + if index < 56: |
| 318 | + padLen = 56 - index |
| 319 | + else: |
| 320 | + padLen = 120 - index |
| 321 | + |
| 322 | + padding = [128] + [0] * 63 |
| 323 | + self.update(padding[:padLen]) |
| 324 | + |
| 325 | + # Append length (before padding). |
| 326 | + bits = _bytelist2long(self.input[:56]) + count |
| 327 | + |
| 328 | + self._transform(bits) |
| 329 | + |
| 330 | + # Store state in digest. |
| 331 | + digest = struct.pack("<IIII", self.A, self.B, self.C, self.D) |
| 332 | + |
| 333 | + self.A = A |
| 334 | + self.B = B |
| 335 | + self.C = C |
| 336 | + self.D = D |
| 337 | + self.input = input |
| 338 | + self.count = count |
| 339 | + |
| 340 | + return digest |
| 341 | + |
| 342 | + |
| 343 | + def hexdigest(self): |
| 344 | + """Terminate and return digest in HEX form. |
| 345 | +
|
| 346 | + Like digest() except the digest is returned as a string of |
| 347 | + length 32, containing only hexadecimal digits. This may be |
| 348 | + used to exchange the value safely in email or other non- |
| 349 | + binary environments. |
| 350 | + """ |
| 351 | + |
| 352 | + return ''.join(['%02x' % c for c in self.digest()]) |
| 353 | + |
| 354 | + def copy(self): |
| 355 | + """Return a clone object. |
| 356 | +
|
| 357 | + Return a copy ('clone') of the md5 object. This can be used |
| 358 | + to efficiently compute the digests of strings that share |
| 359 | + a common initial substring. |
| 360 | + """ |
| 361 | + if 0: # set this to 1 to make the flow space crash |
| 362 | + return copy.deepcopy(self) |
| 363 | + clone = self.__class__() |
| 364 | + clone.length = self.length |
| 365 | + clone.count = [] + self.count[:] |
| 366 | + clone.input = [] + self.input |
| 367 | + clone.A = self.A |
| 368 | + clone.B = self.B |
| 369 | + clone.C = self.C |
| 370 | + clone.D = self.D |
| 371 | + return clone |
0 commit comments