189 lines
5.1 KiB
Plaintext
189 lines
5.1 KiB
Plaintext
//-----------------------------------------------------------------------------
|
|
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
|
// domain. The author hereby disclaims copyright to this source code.
|
|
|
|
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
|
// algorithms are optimized for their respective platforms. You can still
|
|
// compile and run any of them on any platform, but your performance with the
|
|
// non-native version will be less than optimal.
|
|
|
|
/*
|
|
* This version is taken from https://github.com/PeterScott/murmur3
|
|
* and modified to work with CUDA.
|
|
*/
|
|
// Including stdint.h is a pain in cupy, so just put the declarations in.
|
|
// Beware that long int is not 64bit on all platforms!
|
|
// e.g. Windows requires a 'long long' to get to 64 bit.
|
|
|
|
typedef unsigned char uint8_t;
|
|
typedef unsigned int uint32_t;
|
|
typedef signed char int8_t;
|
|
typedef int int32_t;
|
|
|
|
const unsigned long int test_var = 0;
|
|
const int size = sizeof(test_var);
|
|
|
|
#if size == 64
|
|
|
|
typedef unsigned long int uint64_t;
|
|
typedef long int int64_t;
|
|
|
|
#else
|
|
|
|
typedef unsigned long long uint64_t;
|
|
typedef long long int64_t;
|
|
|
|
#endif
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Platform-specific functions and macros
|
|
|
|
#define FORCE_INLINE
|
|
|
|
__device__ static inline FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
|
|
{
|
|
return (x << r) | (x >> (64 - r));
|
|
}
|
|
|
|
#define ROTL64(x,y) rotl64(x,y)
|
|
#define BIG_CONSTANT(x) (x##LLU)
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Block read - if your platform needs to do endian-swapping or can only
|
|
// handle aligned reads, do the conversion here
|
|
|
|
#define getblock(p, i) (p[i])
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Finalization mix - force all bits of a hash block to avalanche
|
|
|
|
__device__ static inline FORCE_INLINE uint32_t fmix32 ( uint32_t h )
|
|
{
|
|
h ^= h >> 16;
|
|
h *= 0x85ebca6b;
|
|
h ^= h >> 13;
|
|
h *= 0xc2b2ae35;
|
|
h ^= h >> 16;
|
|
|
|
return h;
|
|
}
|
|
|
|
//----------
|
|
|
|
__device__ static inline FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
|
{
|
|
k ^= k >> 33;
|
|
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
|
k ^= k >> 33;
|
|
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
|
k ^= k >> 33;
|
|
|
|
return k;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
__device__ void MurmurHash3_x64_128 ( const void * key, const int len,
|
|
const uint32_t seed, void * out )
|
|
{
|
|
const uint8_t * data = (const uint8_t*)key;
|
|
const int nblocks = len / 16;
|
|
int i;
|
|
|
|
uint64_t h1 = seed;
|
|
uint64_t h2 = seed;
|
|
|
|
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
|
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
|
|
|
//----------
|
|
// body
|
|
|
|
const uint64_t * blocks = (const uint64_t *)(data);
|
|
|
|
for(i = 0; i < nblocks; i++)
|
|
{
|
|
uint64_t k1 = getblock(blocks,i*2+0);
|
|
uint64_t k2 = getblock(blocks,i*2+1);
|
|
|
|
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
|
|
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
|
|
|
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
|
|
|
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
|
}
|
|
|
|
//----------
|
|
// tail
|
|
|
|
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
|
|
|
uint64_t k1 = 0;
|
|
uint64_t k2 = 0;
|
|
|
|
switch(len & 15)
|
|
{
|
|
case 15: k2 ^= (uint64_t)(tail[14]) << 48;
|
|
case 14: k2 ^= (uint64_t)(tail[13]) << 40;
|
|
case 13: k2 ^= (uint64_t)(tail[12]) << 32;
|
|
case 12: k2 ^= (uint64_t)(tail[11]) << 24;
|
|
case 11: k2 ^= (uint64_t)(tail[10]) << 16;
|
|
case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
|
|
case 9: k2 ^= (uint64_t)(tail[ 8]) << 0;
|
|
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
|
|
|
case 8: k1 ^= (uint64_t)(tail[ 7]) << 56;
|
|
case 7: k1 ^= (uint64_t)(tail[ 6]) << 48;
|
|
case 6: k1 ^= (uint64_t)(tail[ 5]) << 40;
|
|
case 5: k1 ^= (uint64_t)(tail[ 4]) << 32;
|
|
case 4: k1 ^= (uint64_t)(tail[ 3]) << 24;
|
|
case 3: k1 ^= (uint64_t)(tail[ 2]) << 16;
|
|
case 2: k1 ^= (uint64_t)(tail[ 1]) << 8;
|
|
case 1: k1 ^= (uint64_t)(tail[ 0]) << 0;
|
|
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
};
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= len; h2 ^= len;
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
h1 = fmix64(h1);
|
|
h2 = fmix64(h2);
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
((uint64_t*)out)[0] = h1;
|
|
((uint64_t*)out)[1] = h2;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// Write a stream of hash values for an input stream. Each output may have up to 128 bits
|
|
// of entropy. Input size should be specified in bytes.
|
|
extern "C" __global__
|
|
void hash_data(char* dest,
|
|
const char* src, size_t out_size, size_t in_size, size_t n_items, uint32_t seed)
|
|
{
|
|
char entropy[16]; // 128/8=16
|
|
int _loop_start = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int _loop_stride = blockDim.x * gridDim.x;
|
|
for (int i = _loop_start; i < n_items; i += _loop_stride)
|
|
{
|
|
|
|
const char* src_i = &src[i*in_size];
|
|
char* dest_i = &dest[i*out_size];
|
|
|
|
MurmurHash3_x64_128(src_i, in_size, seed, entropy);
|
|
for (int j=0; j < out_size; ++j)
|
|
dest_i[j] = entropy[j];
|
|
}
|
|
}
|