/// Inverts a (h ^= h >> s) operation with 8 <= s <= 16 unsigned int invert_shift_xor(unsigned int hs, unsigned int s) { XENSURE(s >= 8 && s <= 16); unsigned hs0 = hs >> 24; unsigned hs1 = (hs >> 16) & 0xff; unsigned hs2 = (hs >> 8) & 0xff; unsigned hs3 = hs & 0xff; unsigned h0 = hs0; unsigned h1 = hs1 ^ (h0 >> (s-8)); unsigned h2 = (hs2 ^ (h0 << (16-s)) ^ (h1 >> (s-8))) & 0xff; unsigned h3 = (hs3 ^ (h1 << (16-s)) ^ (h2 >> (s-8))) & 0xff; return (h0<<24) + (h1<<16) + (h2<<8) + h3; } unsigned int murmur_hash_inverse(unsigned int h, unsigned int seed) { const unsigned int m = 0x5bd1e995; const unsigned int minv = 0xe59b19bd; // Multiplicative inverse of m under % 2^32 const int r = 24; h = invert_shift_xor(h,15); h *= minv; h = invert_shift_xor(h,13); unsigned int hforward = seed ^ 4; hforward *= m; unsigned int k = hforward ^ h; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *data = (char *)&k; k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #endif return k; }And for reference, here is the full code, with both the regular murmur hash and the inverses for 32- and 64-bit hashes:

unsigned int murmur_hash ( const void * key, int len, unsigned int seed ) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const unsigned int m = 0x5bd1e995; const int r = 24; // Initialize the hash to a 'random' value unsigned int h = seed ^ len; // Mix 4 bytes at a time into the hash const unsigned char * data = (const unsigned char *)key; while(len >= 4) { #ifdef PLATFORM_BIG_ENDIAN unsigned int k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #else unsigned int k = *(unsigned int *)data; #endif k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } // Handle the last few bytes of the input array switch(len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; // Do a few final mixes of the hash to ensure the last few // bytes are well-incorporated. h ^= h >> 13; h *= m; h ^= h >> 15; return h; } /// Inverts a (h ^= h >> s) operation with 8 <= s <= 16 unsigned int invert_shift_xor(unsigned int hs, unsigned int s) { XENSURE(s >= 8 && s <= 16); unsigned hs0 = hs >> 24; unsigned hs1 = (hs >> 16) & 0xff; unsigned hs2 = (hs >> 8) & 0xff; unsigned hs3 = hs & 0xff; unsigned h0 = hs0; unsigned h1 = hs1 ^ (h0 >> (s-8)); unsigned h2 = (hs2 ^ (h0 << (16-s)) ^ (h1 >> (s-8))) & 0xff; unsigned h3 = (hs3 ^ (h1 << (16-s)) ^ (h2 >> (s-8))) & 0xff; return (h0<<24) + (h1<<16) + (h2<<8) + h3; } unsigned int murmur_hash_inverse(unsigned int h, unsigned int seed) { const unsigned int m = 0x5bd1e995; const unsigned int minv = 0xe59b19bd; // Multiplicative inverse of m under % 2^32 const int r = 24; h = invert_shift_xor(h,15); h *= minv; h = invert_shift_xor(h,13); unsigned int hforward = seed ^ 4; hforward *= m; unsigned int k = hforward ^ h; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *data = (char *)&k; k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #endif return k; } uint64 murmur_hash_64(const void * key, int len, uint64 seed) { const uint64 m = 0xc6a4a7935bd1e995ULL; const int r = 47; uint64 h = seed ^ (len * m); const uint64 * data = (const uint64 *)key; const uint64 * end = data + (len/8); while(data != end) { #ifdef PLATFORM_BIG_ENDIAN uint64 k = *data++; char *p = (char *)&k; char c; c = p[0]; p[0] = p[7]; p[7] = c; c = p[1]; p[1] = p[6]; p[6] = c; c = p[2]; p[2] = p[5]; p[5] = c; c = p[3]; p[3] = p[4]; p[4] = c; #else uint64 k = *data++; #endif k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } const unsigned char * data2 = (const unsigned char*)data; switch(len & 7) { case 7: h ^= uint64(data2[6]) << 48; case 6: h ^= uint64(data2[5]) << 40; case 5: h ^= uint64(data2[4]) << 32; case 4: h ^= uint64(data2[3]) << 24; case 3: h ^= uint64(data2[2]) << 16; case 2: h ^= uint64(data2[1]) << 8; case 1: h ^= uint64(data2[0]); h *= m; }; h ^= h >> r; h *= m; h ^= h >> r; return h; } uint64 murmur_hash_64_inverse(uint64 h, uint64 seed) { const uint64 m = 0xc6a4a7935bd1e995ULL; const uint64 minv = 0x5f7a0ea7e59b19bdULL; // Multiplicative inverse of m under % 2^64 const int r = 47; h ^= h >> r; h *= minv; h ^= h >> r; h *= minv; uint64 hforward = seed ^ (8 * m); uint64 k = h ^ hforward; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *p = (char *)&k; char c; c = p[0]; p[0] = p[7]; p[7] = c; c = p[1]; p[1] = p[6]; p[6] = c; c = p[2]; p[2] = p[5]; p[5] = c; c = p[3]; p[3] = p[4]; p[4] = c; #endif return k; }

I just started using hashes instead of strings/guids, and I'm using murmurhash. I'm using murmurhash3 right now, but I don't think I could manage making my own inverse hash function in a decent amount of time.

ReplyDeleteMight as well change to murmurhash2. I tried this code, but I seem to be missing the function/macro XENSURE.

XENSURE is just our internal assert function, you can replace it with your own assert.

ReplyDelete.thanks for sharing

ReplyDeleteWhat are the use cases of this?

ReplyDeleteConverted this project to lua.

ReplyDeleteSteam workshop: https://steamcommunity.com/sharedfiles/filedetails/?id=1653994440

Source code: http://gitlab.ludoruisch.nl/root/MurmurHashInverse/blob/master/scripts/mods/MurmurHashInverse/MurmurHashInverse.lua