/// Inverts a (h ^= h >> s) operation with 8 <= s <= 16 unsigned int invert_shift_xor(unsigned int hs, unsigned int s) { XENSURE(s >= 8 && s <= 16); unsigned hs0 = hs >> 24; unsigned hs1 = (hs >> 16) & 0xff; unsigned hs2 = (hs >> 8) & 0xff; unsigned hs3 = hs & 0xff; unsigned h0 = hs0; unsigned h1 = hs1 ^ (h0 >> (s-8)); unsigned h2 = (hs2 ^ (h0 << (16-s)) ^ (h1 >> (s-8))) & 0xff; unsigned h3 = (hs3 ^ (h1 << (16-s)) ^ (h2 >> (s-8))) & 0xff; return (h0<<24) + (h1<<16) + (h2<<8) + h3; } unsigned int murmur_hash_inverse(unsigned int h, unsigned int seed) { const unsigned int m = 0x5bd1e995; const unsigned int minv = 0xe59b19bd; // Multiplicative inverse of m under % 2^32 const int r = 24; h = invert_shift_xor(h,15); h *= minv; h = invert_shift_xor(h,13); unsigned int hforward = seed ^ 4; hforward *= m; unsigned int k = hforward ^ h; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *data = (char *)&k; k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #endif return k; }And for reference, here is the full code, with both the regular murmur hash and the inverses for 32- and 64-bit hashes:

unsigned int murmur_hash ( const void * key, int len, unsigned int seed ) { // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. const unsigned int m = 0x5bd1e995; const int r = 24; // Initialize the hash to a 'random' value unsigned int h = seed ^ len; // Mix 4 bytes at a time into the hash const unsigned char * data = (const unsigned char *)key; while(len >= 4) { #ifdef PLATFORM_BIG_ENDIAN unsigned int k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #else unsigned int k = *(unsigned int *)data; #endif k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } // Handle the last few bytes of the input array switch(len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; // Do a few final mixes of the hash to ensure the last few // bytes are well-incorporated. h ^= h >> 13; h *= m; h ^= h >> 15; return h; } /// Inverts a (h ^= h >> s) operation with 8 <= s <= 16 unsigned int invert_shift_xor(unsigned int hs, unsigned int s) { XENSURE(s >= 8 && s <= 16); unsigned hs0 = hs >> 24; unsigned hs1 = (hs >> 16) & 0xff; unsigned hs2 = (hs >> 8) & 0xff; unsigned hs3 = hs & 0xff; unsigned h0 = hs0; unsigned h1 = hs1 ^ (h0 >> (s-8)); unsigned h2 = (hs2 ^ (h0 << (16-s)) ^ (h1 >> (s-8))) & 0xff; unsigned h3 = (hs3 ^ (h1 << (16-s)) ^ (h2 >> (s-8))) & 0xff; return (h0<<24) + (h1<<16) + (h2<<8) + h3; } unsigned int murmur_hash_inverse(unsigned int h, unsigned int seed) { const unsigned int m = 0x5bd1e995; const unsigned int minv = 0xe59b19bd; // Multiplicative inverse of m under % 2^32 const int r = 24; h = invert_shift_xor(h,15); h *= minv; h = invert_shift_xor(h,13); unsigned int hforward = seed ^ 4; hforward *= m; unsigned int k = hforward ^ h; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *data = (char *)&k; k = (data[0]) + (data[1] << 8) + (data[2] << 16) + (data[3] << 24); #endif return k; } uint64 murmur_hash_64(const void * key, int len, uint64 seed) { const uint64 m = 0xc6a4a7935bd1e995ULL; const int r = 47; uint64 h = seed ^ (len * m); const uint64 * data = (const uint64 *)key; const uint64 * end = data + (len/8); while(data != end) { #ifdef PLATFORM_BIG_ENDIAN uint64 k = *data++; char *p = (char *)&k; char c; c = p[0]; p[0] = p[7]; p[7] = c; c = p[1]; p[1] = p[6]; p[6] = c; c = p[2]; p[2] = p[5]; p[5] = c; c = p[3]; p[3] = p[4]; p[4] = c; #else uint64 k = *data++; #endif k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } const unsigned char * data2 = (const unsigned char*)data; switch(len & 7) { case 7: h ^= uint64(data2[6]) << 48; case 6: h ^= uint64(data2[5]) << 40; case 5: h ^= uint64(data2[4]) << 32; case 4: h ^= uint64(data2[3]) << 24; case 3: h ^= uint64(data2[2]) << 16; case 2: h ^= uint64(data2[1]) << 8; case 1: h ^= uint64(data2[0]); h *= m; }; h ^= h >> r; h *= m; h ^= h >> r; return h; } uint64 murmur_hash_64_inverse(uint64 h, uint64 seed) { const uint64 m = 0xc6a4a7935bd1e995ULL; const uint64 minv = 0x5f7a0ea7e59b19bdULL; // Multiplicative inverse of m under % 2^64 const int r = 47; h ^= h >> r; h *= minv; h ^= h >> r; h *= minv; uint64 hforward = seed ^ (8 * m); uint64 k = h ^ hforward; k *= minv; k ^= k >> r; k *= minv; #ifdef PLATFORM_BIG_ENDIAN char *p = (char *)&k; char c; c = p[0]; p[0] = p[7]; p[7] = c; c = p[1]; p[1] = p[6]; p[6] = c; c = p[2]; p[2] = p[5]; p[5] = c; c = p[3]; p[3] = p[4]; p[4] = c; #endif return k; }

I just started using hashes instead of strings/guids, and I'm using murmurhash. I'm using murmurhash3 right now, but I don't think I could manage making my own inverse hash function in a decent amount of time.

ReplyDeleteMight as well change to murmurhash2. I tried this code, but I seem to be missing the function/macro XENSURE.

XENSURE is just our internal assert function, you can replace it with your own assert.

ReplyDelete.thanks for sharing

ReplyDeleteWhat are the use cases of this?

ReplyDelete