|
| 1 | +/* |
| 2 | + * Copyright (c) 2025, magnum |
| 3 | + * This software is hereby released to the general public under |
| 4 | + * the following terms: Redistribution and use in source and binary |
| 5 | + * forms, with or without modification, are permitted. |
| 6 | + */ |
| 7 | + |
| 8 | +#include "opencl_device_info.h" |
| 9 | +#define AMD_PUTCHAR_NOCAST |
| 10 | +#include "opencl_misc.h" |
| 11 | +#include "opencl_mask.h" |
| 12 | +#include "opencl_sha1.h" |
| 13 | + |
| 14 | +/* This handles an input of 0xffffffffU correctly */ |
| 15 | +#define BITMAP_SHIFT ((BITMAP_MASK >> 5) + 1) |
| 16 | + |
| 17 | +INLINE void cmp_final(uint gid, |
| 18 | + uint iter, |
| 19 | + uint *hash, |
| 20 | + __global uint *offset_table, |
| 21 | + __global uint *hash_table, |
| 22 | + __global uint *return_hashes, |
| 23 | + volatile __global uint *output, |
| 24 | + volatile __global uint *bitmap_dupe) { |
| 25 | + |
| 26 | + uint t, offset_table_index, hash_table_index; |
| 27 | + ulong LO, MI, HI; |
| 28 | + ulong p; |
| 29 | + |
| 30 | + HI = (ulong)hash[4]; |
| 31 | + MI = ((ulong)hash[3] << 32) | (ulong)hash[2]; |
| 32 | + LO = ((ulong)hash[1] << 32) | (ulong)hash[0]; |
| 33 | + |
| 34 | + p = (HI % OFFSET_TABLE_SIZE) * SHIFT128_OT_SZ; |
| 35 | + p += (MI % OFFSET_TABLE_SIZE) * SHIFT64_OT_SZ; |
| 36 | + p += LO % OFFSET_TABLE_SIZE; |
| 37 | + p %= OFFSET_TABLE_SIZE; |
| 38 | + offset_table_index = (uint)p; |
| 39 | + |
| 40 | + //error: chances of overflow is extremely low. |
| 41 | + LO += (ulong)offset_table[offset_table_index]; |
| 42 | + |
| 43 | + p = (HI % HASH_TABLE_SIZE) * SHIFT128_HT_SZ; |
| 44 | + p += (MI % HASH_TABLE_SIZE) * SHIFT64_HT_SZ; |
| 45 | + p += LO % HASH_TABLE_SIZE; |
| 46 | + p %= HASH_TABLE_SIZE; |
| 47 | + hash_table_index = (uint)p; |
| 48 | + |
| 49 | + if (hash_table[hash_table_index] == hash[0]) |
| 50 | + if (hash_table[HASH_TABLE_SIZE + hash_table_index] == hash[1]) |
| 51 | + { |
| 52 | +/* |
| 53 | + * Prevent duplicate keys from cracking same hash |
| 54 | + */ |
| 55 | + if (!(atomic_or(&bitmap_dupe[hash_table_index/32], (1U << (hash_table_index % 32))) & (1U << (hash_table_index % 32)))) { |
| 56 | + t = atomic_inc(&output[0]); |
| 57 | + output[1 + 3 * t] = gid; |
| 58 | + output[2 + 3 * t] = iter; |
| 59 | + output[3 + 3 * t] = hash_table_index; |
| 60 | + return_hashes[2 * t] = hash[2]; |
| 61 | + return_hashes[2 * t + 1] = hash[3]; |
| 62 | + } |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +INLINE void cmp(uint gid, |
| 67 | + uint iter, |
| 68 | + uint *hash, |
| 69 | +#if USE_LOCAL_BITMAPS |
| 70 | + __local |
| 71 | +#else |
| 72 | + __global |
| 73 | +#endif |
| 74 | + uint *bitmaps, |
| 75 | + __global uint *offset_table, |
| 76 | + __global uint *hash_table, |
| 77 | + __global uint *return_hashes, |
| 78 | + volatile __global uint *output, |
| 79 | + volatile __global uint *bitmap_dupe) { |
| 80 | + uint bitmap_index, tmp = 1; |
| 81 | + |
| 82 | + hash[0] = hash[0]; |
| 83 | + hash[1] = hash[1]; |
| 84 | + hash[2] = hash[2]; |
| 85 | + hash[3] = hash[3]; |
| 86 | + hash[4] = hash[4]; |
| 87 | + |
| 88 | +#if SELECT_CMP_STEPS > 4 |
| 89 | + bitmap_index = hash[0] & BITMAP_MASK; |
| 90 | + tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; |
| 91 | + bitmap_index = (hash[0] >> 16) & BITMAP_MASK; |
| 92 | + tmp &= (bitmaps[BITMAP_SHIFT + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 93 | + bitmap_index = hash[1] & BITMAP_MASK; |
| 94 | + tmp &= (bitmaps[BITMAP_SHIFT * 2 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 95 | + bitmap_index = (hash[1] >> 16) & BITMAP_MASK; |
| 96 | + tmp &= (bitmaps[BITMAP_SHIFT * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 97 | + bitmap_index = hash[2] & BITMAP_MASK; |
| 98 | + tmp &= (bitmaps[BITMAP_SHIFT * 4 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 99 | + bitmap_index = (hash[2] >> 16) & BITMAP_MASK; |
| 100 | + tmp &= (bitmaps[BITMAP_SHIFT * 5 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 101 | + bitmap_index = hash[3] & BITMAP_MASK; |
| 102 | + tmp &= (bitmaps[BITMAP_SHIFT * 6 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 103 | + bitmap_index = (hash[3] >> 16) & BITMAP_MASK; |
| 104 | + tmp &= (bitmaps[BITMAP_SHIFT * 7 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 105 | +#elif SELECT_CMP_STEPS > 2 |
| 106 | + bitmap_index = hash[3] & BITMAP_MASK; |
| 107 | + tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; |
| 108 | + bitmap_index = hash[2] & BITMAP_MASK; |
| 109 | + tmp &= (bitmaps[BITMAP_SHIFT + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 110 | + bitmap_index = hash[1] & BITMAP_MASK; |
| 111 | + tmp &= (bitmaps[BITMAP_SHIFT * 2 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 112 | + bitmap_index = hash[0] & BITMAP_MASK; |
| 113 | + tmp &= (bitmaps[BITMAP_SHIFT * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 114 | +#elif SELECT_CMP_STEPS > 1 |
| 115 | + bitmap_index = hash[3] & BITMAP_MASK; |
| 116 | + tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; |
| 117 | + bitmap_index = hash[2] & BITMAP_MASK; |
| 118 | + tmp &= (bitmaps[BITMAP_SHIFT + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; |
| 119 | +#else |
| 120 | + bitmap_index = hash[3] & BITMAP_MASK; |
| 121 | + tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; |
| 122 | +#endif |
| 123 | + |
| 124 | + if (tmp) |
| 125 | + cmp_final(gid, iter, hash, offset_table, hash_table, return_hashes, output, bitmap_dupe); |
| 126 | +} |
| 127 | + |
| 128 | +#define USE_CONST_CACHE \ |
| 129 | + (CONST_CACHE_SIZE >= (NUM_INT_KEYS * 4)) |
| 130 | + |
| 131 | +typedef struct { |
| 132 | + uint iter; |
| 133 | + uint len; |
| 134 | + uchar salt[MAX_SALT_SIZE]; |
| 135 | +} salt_t; |
| 136 | + |
| 137 | +__kernel |
| 138 | +void sha1(__global uint *keys, |
| 139 | + __global uint *index, |
| 140 | + __global uint *int_key_loc, |
| 141 | +#if USE_CONST_CACHE |
| 142 | + constant |
| 143 | +#else |
| 144 | + __global |
| 145 | +#endif |
| 146 | + uint *int_keys, |
| 147 | + __constant salt_t *salt, |
| 148 | + __global uint *bitmaps, |
| 149 | + __global uint *offset_table, |
| 150 | + __global uint *hash_table, |
| 151 | + __global uint *return_hashes, |
| 152 | + volatile __global uint *out_hash_ids, |
| 153 | + volatile __global uint *bitmap_dupe) |
| 154 | +{ |
| 155 | + uint i; |
| 156 | + uint gid = get_global_id(0); |
| 157 | + uint base = index[gid]; |
| 158 | + uint T[16] = { 0 }; |
| 159 | + uint len = base & 63; |
| 160 | + |
| 161 | +#if NUM_INT_KEYS > 1 && !IS_STATIC_GPU_MASK |
| 162 | + uint ikl = int_key_loc[gid]; |
| 163 | + uint loc0 = ikl & 0xff; |
| 164 | +#if MASK_FMT_INT_PLHDR > 1 |
| 165 | +#if LOC_1 >= 0 |
| 166 | + uint loc1 = (ikl & 0xff00) >> 8; |
| 167 | +#endif |
| 168 | +#endif |
| 169 | +#if MASK_FMT_INT_PLHDR > 2 |
| 170 | +#if LOC_2 >= 0 |
| 171 | + uint loc2 = (ikl & 0xff0000) >> 16; |
| 172 | +#endif |
| 173 | +#endif |
| 174 | +#if MASK_FMT_INT_PLHDR > 3 |
| 175 | +#if LOC_3 >= 0 |
| 176 | + uint loc3 = (ikl & 0xff000000) >> 24; |
| 177 | +#endif |
| 178 | +#endif |
| 179 | +#endif |
| 180 | + |
| 181 | +#if !IS_STATIC_GPU_MASK |
| 182 | +#define GPU_LOC_0 loc0 |
| 183 | +#define GPU_LOC_1 loc1 |
| 184 | +#define GPU_LOC_2 loc2 |
| 185 | +#define GPU_LOC_3 loc3 |
| 186 | +#else |
| 187 | +#define GPU_LOC_0 LOC_0 |
| 188 | +#define GPU_LOC_1 LOC_1 |
| 189 | +#define GPU_LOC_2 LOC_2 |
| 190 | +#define GPU_LOC_3 LOC_3 |
| 191 | +#endif |
| 192 | + |
| 193 | +#if USE_LOCAL_BITMAPS |
| 194 | + uint lid = get_local_id(0); |
| 195 | + uint lws = get_local_size(0); |
| 196 | + __local uint s_bitmaps[BITMAP_SHIFT * SELECT_CMP_STEPS]; |
| 197 | + |
| 198 | + for (i = lid; i < BITMAP_SHIFT * SELECT_CMP_STEPS; i+= lws) |
| 199 | + s_bitmaps[i] = bitmaps[i]; |
| 200 | + |
| 201 | + barrier(CLK_LOCAL_MEM_FENCE); |
| 202 | +#endif |
| 203 | + |
| 204 | + keys += base >> 6; |
| 205 | + |
| 206 | + for (i = 0; i < salt->len; i++) |
| 207 | + PUTCHAR_BE(T, i, salt->salt[i]); |
| 208 | + |
| 209 | + __global uchar *key = (__global uchar*)keys; |
| 210 | + for (; i < salt->len + len; i++) |
| 211 | + PUTCHAR_BE(T, i, *key++); |
| 212 | + |
| 213 | + PUTCHAR_BE(T, (salt->len + len), 0x80); |
| 214 | + T[15] = (salt->len + len) << 3; |
| 215 | + |
| 216 | + for (uint idx = 0; idx < NUM_INT_KEYS; idx++) { |
| 217 | +#if NUM_INT_KEYS > 1 |
| 218 | + PUTCHAR_BE(T, salt->len + GPU_LOC_0, (int_keys[idx] & 0xff)); |
| 219 | + |
| 220 | +#if MASK_FMT_INT_PLHDR > 1 |
| 221 | +#if LOC_1 >= 0 |
| 222 | + PUTCHAR_BE(T, salt->len + GPU_LOC_1, ((int_keys[idx] & 0xff00) >> 8)); |
| 223 | +#endif |
| 224 | +#endif |
| 225 | +#if MASK_FMT_INT_PLHDR > 2 |
| 226 | +#if LOC_2 >= 0 |
| 227 | + PUTCHAR_BE(T, salt->len + GPU_LOC_2, ((int_keys[idx] & 0xff0000) >> 16)); |
| 228 | +#endif |
| 229 | +#endif |
| 230 | +#if MASK_FMT_INT_PLHDR > 3 |
| 231 | +#if LOC_3 >= 0 |
| 232 | + PUTCHAR_BE(T, salt->len + GPU_LOC_3, ((int_keys[idx] & 0xff000000) >> 24)); |
| 233 | +#endif |
| 234 | +#endif |
| 235 | +#endif |
| 236 | + uint W[16]; |
| 237 | + uint hash[5]; |
| 238 | + |
| 239 | + memcpy_macro(W, T, 16); |
| 240 | + |
| 241 | + sha1_single(uint, W, hash); |
| 242 | + |
| 243 | + uint iter = salt->iter; |
| 244 | + while (--iter) { |
| 245 | + memcpy_macro(W, hash, 5); |
| 246 | + W[5] = 0x80000000; |
| 247 | + W[15] = 20 << 3; |
| 248 | + sha1_single_160Z(uint, W, hash); |
| 249 | + } |
| 250 | + |
| 251 | + cmp(gid, idx, hash, |
| 252 | +#if USE_LOCAL_BITMAPS |
| 253 | + s_bitmaps |
| 254 | +#else |
| 255 | + bitmaps |
| 256 | +#endif |
| 257 | + , offset_table, hash_table, return_hashes, out_hash_ids, bitmap_dupe); |
| 258 | + } |
| 259 | +} |
0 commit comments