185 lines
5.6 KiB
C
185 lines
5.6 KiB
C
/*
|
|
* Copyright (C) 2011 Andrea Mazzoleni
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/*
|
|
* Derivative work from MurmorHash3.cpp revision r136
|
|
*
|
|
* SMHasher & MurmurHash
|
|
* http://code.google.com/p/smhasher/
|
|
*
|
|
* Exact source used as reference:
|
|
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp?spec=svn136&r=136
|
|
*/
|
|
|
|
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
|
// domain. The author hereby disclaims copyright to this source code.
|
|
|
|
/* Finalization mix - force all bits of a hash block to avalanche */
|
|
static inline uint32_t fmix32(uint32_t h)
|
|
{
|
|
h ^= h >> 16;
|
|
h *= 0x85ebca6b;
|
|
h ^= h >> 13;
|
|
h *= 0xc2b2ae35;
|
|
h ^= h >> 16;
|
|
return h;
|
|
}
|
|
|
|
/*
|
|
* Warning!
|
|
* Don't declare these variables static, otherwise the gcc optimizer
|
|
* may generate very slow code for multiplication with these constants,
|
|
* like:
|
|
|
|
-> .cpp
|
|
k1 *= c1;
|
|
-> .asm
|
|
152: 8d 14 80 lea (%eax,%eax,4),%edx
|
|
155: 8d 14 90 lea (%eax,%edx,4),%edx
|
|
158: c1 e2 03 shl $0x3,%edx
|
|
15b: 29 c2 sub %eax,%edx
|
|
15d: 8d 14 d2 lea (%edx,%edx,8),%edx
|
|
160: 8d 14 90 lea (%eax,%edx,4),%edx
|
|
163: 8d 14 d0 lea (%eax,%edx,8),%edx
|
|
166: 8d 14 90 lea (%eax,%edx,4),%edx
|
|
169: 8d 14 50 lea (%eax,%edx,2),%edx
|
|
16c: 8d 14 90 lea (%eax,%edx,4),%edx
|
|
16f: 8d 14 92 lea (%edx,%edx,4),%edx
|
|
172: 8d 14 50 lea (%eax,%edx,2),%edx
|
|
175: 8d 04 d0 lea (%eax,%edx,8),%eax
|
|
178: 8d 14 c5 00 00 00 00 lea 0x0(,%eax,8),%edx
|
|
17f: 29 d0 sub %edx,%eax
|
|
|
|
* resulting in speeds of 500 MB/s instead of 3000 MB/s.
|
|
*
|
|
* Verified with gcc 4.4.4 compiling with :
|
|
*
|
|
* g++ -g -c -O2 MurmurHash3.cpp -o MurmurHash3.o
|
|
*/
|
|
uint32_t c1 = 0x239b961b;
|
|
uint32_t c2 = 0xab0e9789;
|
|
uint32_t c3 = 0x38b34ae5;
|
|
uint32_t c4 = 0xa1e38b93;
|
|
|
|
void MurmurHash3_x86_128(const void* data, size_t size, const uint8_t* seed, void* digest)
|
|
{
|
|
size_t nblocks;
|
|
const uint32_t* blocks;
|
|
const uint32_t* end;
|
|
size_t size_remainder;
|
|
uint32_t h1, h2, h3, h4;
|
|
|
|
h1 = util_read32(seed + 0);
|
|
h2 = util_read32(seed + 4);
|
|
h3 = util_read32(seed + 8);
|
|
h4 = util_read32(seed + 12);
|
|
|
|
nblocks = size / 16;
|
|
blocks = data;
|
|
end = blocks + nblocks * 4;
|
|
|
|
/* body */
|
|
while (blocks < end) {
|
|
uint32_t k1 = blocks[0];
|
|
uint32_t k2 = blocks[1];
|
|
uint32_t k3 = blocks[2];
|
|
uint32_t k4 = blocks[3];
|
|
|
|
#if WORDS_BIGENDIAN
|
|
k1 = util_swap32(k1);
|
|
k2 = util_swap32(k2);
|
|
k3 = util_swap32(k3);
|
|
k4 = util_swap32(k4);
|
|
#endif
|
|
|
|
k1 *= c1; k1 = util_rotl32(k1, 15); k1 *= c2; h1 ^= k1;
|
|
|
|
h1 = util_rotl32(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b;
|
|
|
|
k2 *= c2; k2 = util_rotl32(k2, 16); k2 *= c3; h2 ^= k2;
|
|
|
|
h2 = util_rotl32(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747;
|
|
|
|
k3 *= c3; k3 = util_rotl32(k3, 17); k3 *= c4; h3 ^= k3;
|
|
|
|
h3 = util_rotl32(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35;
|
|
|
|
k4 *= c4; k4 = util_rotl32(k4, 18); k4 *= c1; h4 ^= k4;
|
|
|
|
h4 = util_rotl32(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17;
|
|
|
|
blocks += 4;
|
|
}
|
|
|
|
/* tail */
|
|
size_remainder = size & 15;
|
|
if (size_remainder != 0) {
|
|
const uint8_t* tail = (const uint8_t*)blocks;
|
|
|
|
uint32_t k1 = 0;
|
|
uint32_t k2 = 0;
|
|
uint32_t k3 = 0;
|
|
uint32_t k4 = 0;
|
|
|
|
switch (size_remainder) {
|
|
case 15 : k4 ^= (uint32_t)tail[14] << 16; /* fallthrough */
|
|
case 14 : k4 ^= (uint32_t)tail[13] << 8; /* fallthrough */
|
|
case 13 : k4 ^= (uint32_t)tail[12] << 0; /* fallthrough */
|
|
k4 *= c4; k4 = util_rotl32(k4, 18); k4 *= c1; h4 ^= k4;
|
|
/* fallthrough */
|
|
case 12 : k3 ^= (uint32_t)tail[11] << 24; /* fallthrough */
|
|
case 11 : k3 ^= (uint32_t)tail[10] << 16; /* fallthrough */
|
|
case 10 : k3 ^= (uint32_t)tail[ 9] << 8; /* fallthrough */
|
|
case 9 : k3 ^= (uint32_t)tail[ 8] << 0; /* fallthrough */
|
|
k3 *= c3; k3 = util_rotl32(k3, 17); k3 *= c4; h3 ^= k3;
|
|
/* fallthrough */
|
|
case 8 : k2 ^= (uint32_t)tail[ 7] << 24; /* fallthrough */
|
|
case 7 : k2 ^= (uint32_t)tail[ 6] << 16; /* fallthrough */
|
|
case 6 : k2 ^= (uint32_t)tail[ 5] << 8; /* fallthrough */
|
|
case 5 : k2 ^= (uint32_t)tail[ 4] << 0; /* fallthrough */
|
|
k2 *= c2; k2 = util_rotl32(k2, 16); k2 *= c3; h2 ^= k2;
|
|
/* fallthrough */
|
|
case 4 : k1 ^= (uint32_t)tail[ 3] << 24; /* fallthrough */
|
|
case 3 : k1 ^= (uint32_t)tail[ 2] << 16; /* fallthrough */
|
|
case 2 : k1 ^= (uint32_t)tail[ 1] << 8; /* fallthrough */
|
|
case 1 : k1 ^= (uint32_t)tail[ 0] << 0; /* fallthrough */
|
|
k1 *= c1; k1 = util_rotl32(k1, 15); k1 *= c2; h1 ^= k1;
|
|
/* fallthrough */
|
|
}
|
|
}
|
|
|
|
/* finalization */
|
|
h1 ^= size; h2 ^= size; h3 ^= size; h4 ^= size;
|
|
|
|
h1 += h2; h1 += h3; h1 += h4;
|
|
h2 += h1; h3 += h1; h4 += h1;
|
|
|
|
h1 = fmix32(h1);
|
|
h2 = fmix32(h2);
|
|
h3 = fmix32(h3);
|
|
h4 = fmix32(h4);
|
|
|
|
h1 += h2; h1 += h3; h1 += h4;
|
|
h2 += h1; h3 += h1; h4 += h1;
|
|
|
|
util_write32(digest + 0, h1);
|
|
util_write32(digest + 4, h2);
|
|
util_write32(digest + 8, h3);
|
|
util_write32(digest + 12, h4);
|
|
}
|
|
|