Add Murmur2A hashing feature to BLI
Murmur2a is a very fast hashing function generation int32 hashes. It also features a very good distribution of generated hashes. However, it is not endianness-agnostic, meaning it will usually generate different hashes for a same key on big- and little-endian architectures. Consequently, **it shall not be used to generate persistent hashes** (never store them in .blend file e.g.). This implementation supports incremental hashing, and is a direct adaptation of reference implementation (in c++): https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp That cpp code was also used to generate reference values in gtests file. Reviewers: sergey, campbellbarton Reviewed By: campbellbarton Projects: #bf_blender Differential Revision: https://developer.blender.org/D892
This commit is contained in:
parent
c8a9764987
commit
64c0c13e6e
|
@ -19,9 +19,9 @@
|
|||
*/
|
||||
|
||||
#ifndef __BLI_MD5_H__
|
||||
#define __BLI_MD5_H__
|
||||
#define __BLI_MD5_H__
|
||||
|
||||
/** \file BLI_md5.h
|
||||
/** \file BLI_hash_md5.h
|
||||
* \ingroup bli
|
||||
*/
|
||||
|
||||
|
@ -43,5 +43,4 @@ int md5_stream(FILE *stream, void *resblock);
|
|||
|
||||
char *md5_to_hexdigest(void *resblock, char r_hex_digest[33]);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __BLI_MD5_H__ */
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* ***** END GPL LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
#ifndef __BLI_MM2A_H__
|
||||
#define __BLI_MM2A_H__
|
||||
|
||||
/** \file BLI_hash_mm2a.h
|
||||
* \ingroup bli
|
||||
*/
|
||||
|
||||
#include "BLI_sys_types.h"
|
||||
|
||||
typedef struct BLI_HashMurmur2A {
|
||||
uint32_t hash;
|
||||
uint32_t tail;
|
||||
uint32_t count;
|
||||
uint32_t size;
|
||||
} BLI_HashMurmur2A;
|
||||
|
||||
void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed);
|
||||
|
||||
void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len);
|
||||
|
||||
void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data);
|
||||
|
||||
uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2);
|
||||
|
||||
#endif /* __BLI_MM2A_H__ */
|
|
@ -65,6 +65,8 @@ set(SRC
|
|||
intern/freetypefont.c
|
||||
intern/graph.c
|
||||
intern/gsqueue.c
|
||||
intern/hash_md5.c
|
||||
intern/hash_mm2a.c
|
||||
intern/jitter.c
|
||||
intern/lasso.c
|
||||
intern/listbase.c
|
||||
|
@ -80,7 +82,6 @@ set(SRC
|
|||
intern/math_rotation.c
|
||||
intern/math_vector.c
|
||||
intern/math_vector_inline.c
|
||||
intern/md5.c
|
||||
intern/noise.c
|
||||
intern/path_util.c
|
||||
intern/polyfill2d.c
|
||||
|
@ -134,6 +135,8 @@ set(SRC
|
|||
BLI_ghash.h
|
||||
BLI_graph.h
|
||||
BLI_gsqueue.h
|
||||
BLI_hash_md5.h
|
||||
BLI_hash_mm2a.h
|
||||
BLI_heap.h
|
||||
BLI_jitter.h
|
||||
BLI_kdopbvh.h
|
||||
|
@ -153,7 +156,6 @@ set(SRC
|
|||
BLI_math_matrix.h
|
||||
BLI_math_rotation.h
|
||||
BLI_math_vector.h
|
||||
BLI_md5.h
|
||||
BLI_memarena.h
|
||||
BLI_mempool.h
|
||||
BLI_noise.h
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
* according to the definition of MD5 in RFC 1321 from April 1992.
|
||||
*/
|
||||
|
||||
#include "BLI_md5.h" /* own include */
|
||||
#include "BLI_hash_md5.h" /* own include */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
|
@ -0,0 +1,107 @@
|
|||
/*
|
||||
* ***** BEGIN GPL LICENSE BLOCK *****
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* ***** END GPL LICENSE BLOCK *****
|
||||
*
|
||||
* Copyright (C) 2014 Blender Foundation.
|
||||
*
|
||||
*/
|
||||
|
||||
/** \file blender/blenlib/intern/hash_mm2a.c
|
||||
* \ingroup bli
|
||||
*
|
||||
* Functions to compute Murmur2A hash key.
|
||||
*
|
||||
* A very fast hash generating int32 result, with few collisions and good repartition.
|
||||
*
|
||||
* See also:
|
||||
* reference implementation: https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp
|
||||
* and http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed
|
||||
*
|
||||
* \warning Do not store that hash in files or such, it is not endian-agnostic, so you should only use it
|
||||
* for temporary data.
|
||||
*/
|
||||
|
||||
#include "BLI_hash_mm2a.h" /* own include */
|
||||
|
||||
/* Helpers. */
|
||||
#define MM2A_M 0x5bd1e995
|
||||
|
||||
#define MM2A_MIX(h, k) \
|
||||
{ \
|
||||
(k) *= MM2A_M; \
|
||||
(k) ^= (k) >> 24; \
|
||||
(k) *= MM2A_M; \
|
||||
(h) = ((h) * MM2A_M) ^ (k); \
|
||||
} (void)0
|
||||
|
||||
static void mm2a_mix_tail(BLI_HashMurmur2A *mm2, const unsigned char **data, size_t *len)
|
||||
{
|
||||
while (*len && ((*len < 4) || mm2->count)) {
|
||||
mm2->tail |= (uint32_t)(**data) << (mm2->count * 8);
|
||||
|
||||
mm2->count++;
|
||||
(*len)--;
|
||||
(*data)++;
|
||||
|
||||
if (mm2->count == 4) {
|
||||
MM2A_MIX(mm2->hash, mm2->tail);
|
||||
mm2->tail = 0;
|
||||
mm2->count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BLI_hash_mm2a_init(BLI_HashMurmur2A *mm2, uint32_t seed)
|
||||
{
|
||||
mm2->hash = seed;
|
||||
mm2->tail = 0;
|
||||
mm2->count = 0;
|
||||
mm2->size = 0;
|
||||
}
|
||||
|
||||
void BLI_hash_mm2a_add(BLI_HashMurmur2A *mm2, const unsigned char *data, size_t len)
|
||||
{
|
||||
mm2->size += (uint32_t)len;
|
||||
|
||||
mm2a_mix_tail(mm2, &data, &len);
|
||||
|
||||
for (; len >= 4; data += 4, len -= 4) {
|
||||
uint32_t k = *(uint32_t *)data;
|
||||
|
||||
MM2A_MIX(mm2->hash, k);
|
||||
}
|
||||
|
||||
mm2a_mix_tail(mm2, &data, &len);
|
||||
}
|
||||
|
||||
void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data)
|
||||
{
|
||||
BLI_hash_mm2a_add(mm2, (const unsigned char *)&data, sizeof(data));
|
||||
}
|
||||
|
||||
uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
|
||||
{
|
||||
MM2A_MIX(mm2->hash, mm2->tail);
|
||||
MM2A_MIX(mm2->hash, mm2->size);
|
||||
|
||||
mm2->hash ^= mm2->hash >> 13;
|
||||
mm2->hash *= MM2A_M;
|
||||
mm2->hash ^= mm2->hash >> 15;
|
||||
|
||||
return mm2->hash;
|
||||
}
|
|
@ -35,7 +35,7 @@
|
|||
#include "BLI_string.h"
|
||||
#include "BLI_path_util.h"
|
||||
#include "BLI_fileops.h"
|
||||
#include "BLI_md5.h"
|
||||
#include "BLI_hash_md5.h"
|
||||
#include "BLI_system.h"
|
||||
#include BLI_SYSTEM_PID_H
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
|
||||
#include "BLI_utildefines.h"
|
||||
#include "BLI_listbase.h"
|
||||
#include "BLI_md5.h"
|
||||
#include "BLI_hash_md5.h"
|
||||
#include "BLI_path_util.h"
|
||||
#include "BLI_rect.h"
|
||||
#include "BLI_string.h"
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/* Apache License, Version 2.0 */
|
||||
|
||||
#include "testing/testing.h"
|
||||
|
||||
extern "C" {
|
||||
#include "BLI_hash_mm2a.h"
|
||||
}
|
||||
|
||||
/* Note: Reference results are taken from reference implementation (cpp code, CMurmurHash2A variant):
|
||||
* https://smhasher.googlecode.com/svn-history/r130/trunk/MurmurHash2.cpp
|
||||
*/
|
||||
|
||||
TEST(hash_mm2a, MM2ABasic)
|
||||
{
|
||||
BLI_HashMurmur2A mm2;
|
||||
|
||||
const char *data = "Blender";
|
||||
|
||||
BLI_hash_mm2a_init(&mm2, 0);
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)data, strlen(data));
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
EXPECT_EQ(1633988145, BLI_hash_mm2a_end(&mm2));
|
||||
#else
|
||||
EXPECT_EQ(959283772, BLI_hash_mm2a_end(&mm2));
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(hash_mm2a, MM2AConcatenateStrings)
|
||||
{
|
||||
BLI_HashMurmur2A mm2;
|
||||
uint32_t hash;
|
||||
|
||||
const char *data1 = "Blender";
|
||||
const char *data2 = " is ";
|
||||
const char *data3 = "FaNtAsTiC";
|
||||
const char *data123 = "Blender is FaNtAsTiC";
|
||||
|
||||
BLI_hash_mm2a_init(&mm2, 0);
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)data1, strlen(data1));
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)data2, strlen(data2));
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)data3, strlen(data3));
|
||||
hash = BLI_hash_mm2a_end(&mm2);
|
||||
BLI_hash_mm2a_init(&mm2, 0);
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)data123, strlen(data123));
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
EXPECT_EQ(1545105348, hash);
|
||||
#else
|
||||
EXPECT_EQ(2604964730, hash);
|
||||
#endif
|
||||
EXPECT_EQ(hash, BLI_hash_mm2a_end(&mm2));
|
||||
}
|
||||
|
||||
TEST(hash_mm2a, MM2AIntegers)
|
||||
{
|
||||
BLI_HashMurmur2A mm2;
|
||||
uint32_t hash;
|
||||
|
||||
const int ints[4] = {1, 2, 3, 4};
|
||||
|
||||
BLI_hash_mm2a_init(&mm2, 0);
|
||||
BLI_hash_mm2a_add_int(&mm2, ints[0]);
|
||||
BLI_hash_mm2a_add_int(&mm2, ints[1]);
|
||||
BLI_hash_mm2a_add_int(&mm2, ints[2]);
|
||||
BLI_hash_mm2a_add_int(&mm2, ints[3]);
|
||||
hash = BLI_hash_mm2a_end(&mm2);
|
||||
BLI_hash_mm2a_init(&mm2, 0);
|
||||
BLI_hash_mm2a_add(&mm2, (const unsigned char *)ints, sizeof(ints));
|
||||
/* Yes, same hash here on little and big endian. */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
EXPECT_EQ(405493096, hash);
|
||||
#else
|
||||
EXPECT_EQ(405493096, hash);
|
||||
#endif
|
||||
EXPECT_EQ(hash, BLI_hash_mm2a_end(&mm2));
|
||||
}
|
|
@ -42,3 +42,4 @@ BLENDER_TEST(BLI_string "bf_blenlib")
|
|||
BLENDER_TEST(BLI_path_util "bf_blenlib;extern_wcwidth;${ZLIB_LIBRARIES}")
|
||||
BLENDER_TEST(BLI_polyfill2d "bf_blenlib")
|
||||
BLENDER_TEST(BLI_listbase "bf_blenlib")
|
||||
BLENDER_TEST(BLI_hash_mm2a "bf_blenlib")
|
||||
|
|
Loading…
Reference in New Issue