#version 430 core
layout (local_size_x=1, local_size_y=1, local_size_z=32) in; // shader threads per global invocation
layout (location=0, r32ui) uniform writeonly uimage1D result; // matched input string upon success
uniform uvec4 target; // hash to find
uniform uvec4 prefix[32]; // start string/salt, including length, little endian, per shader thread
uniform uint chars[256]; // characters to iterate through, individual bytes
void set_chr_by_pos(const in uint pos, const in uint chr, inout uvec4 str) {
const uvec2 offset = uvec2(pos / 4u, (pos % 4u) * 8u);
str[offset[0]] |= chars[chr] << offset[1];
}
void md5(const in uvec4 str, const in uint len, out uvec4 H) {
const uint s[64] = uint[](7u, 12u, 17u, 22u, 7u, 12u, 17u, 22u, 7u, 12u, 17u, 22u, 7u, 12u, 17u, 22u, 5u, 9u, 14u, 20u, 5u, 9u, 14u, 20u, 5u, 9u, 14u, 20u, 5u, 9u, 14u, 20u, 4u, 11u, 16u, 23u, 4u, 11u, 16u, 23u, 4u, 11u, 16u, 23u, 4u, 11u, 16u, 23u, 6u, 10u, 15u, 21u, 6u, 10u, 15u, 21u, 6u, 10u, 15u, 21u, 6u, 10u, 15u, 21u);
const uint K[64] = uint[](0xd76aa478u, 0xe8c7b756u, 0x242070dbu, 0xc1bdceeeu, 0xf57c0fafu, 0x4787c62au, 0xa8304613u, 0xfd469501u, 0x698098d8u, 0x8b44f7afu, 0xffff5bb1u, 0x895cd7beu, 0x6b901122u, 0xfd987193u, 0xa679438eu, 0x49b40821u, 0xf61e2562u, 0xc040b340u, 0x265e5a51u, 0xe9b6c7aau, 0xd62f105du, 0x02441453u, 0xd8a1e681u, 0xe7d3fbc8u, 0x21e1cde6u, 0xc33707d6u, 0xf4d50d87u, 0x455a14edu, 0xa9e3e905u, 0xfcefa3f8u, 0x676f02d9u, 0x8d2a4c8au, 0xfffa3942u, 0x8771f681u, 0x6d9d6122u, 0xfde5380cu, 0xa4beea44u, 0x4bdecfa9u, 0xf6bb4b60u, 0xbebfbc70u, 0x289b7ec6u, 0xeaa127fau, 0xd4ef3085u, 0x04881d05u, 0xd9d4d039u, 0xe6db99e5u, 0x1fa27cf8u, 0xc4ac5665u, 0xf4292244u, 0x432aff97u, 0xab9423a7u, 0xfc93a039u, 0x655b59c3u, 0x8f0ccc92u, 0xffeff47du, 0x85845dd1u, 0x6fa87e4fu, 0xfe2ce6e0u, 0xa3014314u, 0x4e0811a1u, 0xf7537e82u, 0xbd3af235u, 0x2ad7d2bbu, 0xeb86d391u);
uint M[16] = uint[](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
M[0] = str[0];
M[1] = str[1];
M[2] = str[2];
M[3] = str[3];
M[len/4u] |= 128u << ((len%4u) * 8u); // terminate with '1' bit
M[14] = len * 8u; // as 64-bit little-endian (single byte though)
const uvec4 h = uvec4(0x67452301u, 0xefcdab89u, 0x98badcfeu, 0x10325476u); // a0, b0, c0, d0
H = h; // A, B, C, D
for (uint i=0u; i<64u; i++) {
uint F;
uint g;
if (i < 16u) {
F = (H[1] & H[2]) | ((~H[1]) & H[3]); // D xor (B and (C xor D))
g = i;
} else if (i < 32u) {
F = (H[3] & H[1]) | ((~H[3]) & H[2]); // C xor (D and (B xor C))
g = ((5u*i) + 1u) % 16u;
} else if (i < 48u) {
F = (H[1] ^ H[2]) ^ H[3];
g = ((3u*i) + 5u) % 16u;
} else {
F = H[2] ^ (H[1] | (~H[3]));
g = (7u*i) % 16u;
}
uint rot = H[0] + F + K[i] + M[g];
H[0] = H[3];
H[3] = H[2];
H[2] = H[1];
H[1] += ((rot << s[i]) | (rot >> (32u-s[i])));
}
H += h;
}
void main() {
// use per-thread input prefix, with encoded string length
uvec4 str = prefix[gl_LocalInvocationID.z];
uint len = (str[3] & 0xff000000u) >> 24;
str[3] &= 0x00ffffffu;
// one global invocation results in three digits to be probed
set_chr_by_pos(len + 0, gl_WorkGroupID.x, str);
set_chr_by_pos(len + 1, gl_WorkGroupID.y, str);
set_chr_by_pos(len + 2, gl_WorkGroupID.z, str);
// compute hash
uvec4 hash;
md5(str, len + 3u, hash);
// upon success, set the matching string in feedback texture
if (hash == target) {
imageStore(result, 0, uvec4(str[0]));
imageStore(result, 1, uvec4(str[1]));
imageStore(result, 2, uvec4(str[2]));
imageStore(result, 3, uvec4(str[3]));
}
}