From 745b3b9ec0e28ec287e0cbf455bb8e2e23927011 Mon Sep 17 00:00:00 2001 From: lambda-abstraction Date: Wed, 13 May 2026 23:28:00 +0300 Subject: [PATCH 1/3] `frozendict_hash` : hash entry mixed hashes rather than just keys and values --- Objects/dictobject.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b33a273dac3b95..44415f41043d0e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8222,10 +8222,18 @@ frozendict_repr(PyObject *self) return res; } -static Py_uhash_t -_shuffle_bits(Py_uhash_t h) +// based on boost's old hash_combine +static inline Py_uhash_t +_combine_hashes(Py_uhash_t h1, Py_uhash_t h2) { - return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; + // 2^sizeof(Py_hash_t) / phi +#if SIZEOF_PY_HASH_T == 8 + const Py_uhash_t GOLDEN_C = 0x9e3779b97f4a7c15ULL; +#else + const Py_uhash_t GOLDEN_C = 0x9e3779b9UL; +#endif + h1 ^= h2 + GOLDEN_C + (h1 << 6) + (h1 >> 2); + return h1; } // Code copied from frozenset_hash() @@ -8239,7 +8247,7 @@ frozendict_hash(PyObject *op) } PyDictObject *mp = _PyAnyDict_CAST(op); - Py_uhash_t hash = 0; + Py_uhash_t hash = 0xfd1c74; // start at a different value from frozenset to avoid collision with empty frozenset PyObject *key, *value; // borrowed refs Py_ssize_t pos = 0; @@ -8248,13 +8256,11 @@ frozendict_hash(PyObject *op) if (key_hash == -1) { return -1; } - hash ^= _shuffle_bits(key_hash); - Py_hash_t value_hash = PyObject_Hash(value); if (value_hash == -1) { return -1; } - hash ^= _shuffle_bits(value_hash); + hash ^= _combine_hashes(key_hash, value_hash); } /* Factor in the number of active entries */ From 22f55bb851001a27d0a263d431d2314782ed7f04 Mon Sep 17 00:00:00 2001 From: lambda-abstraction Date: Thu, 14 May 2026 02:00:19 +0300 Subject: [PATCH 2/3] `_combine_hashes` better mixing --- Objects/dictobject.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 44415f41043d0e..2ba3d31ca31fd7 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8222,17 +8222,23 @@ frozendict_repr(PyObject *self) return res; } -// based on boost's old hash_combine static inline Py_uhash_t _combine_hashes(Py_uhash_t h1, Py_uhash_t h2) { - // 2^sizeof(Py_hash_t) / phi #if SIZEOF_PY_HASH_T == 8 + // 2^sizeof(Py_hash_t) / phi const Py_uhash_t GOLDEN_C = 0x9e3779b97f4a7c15ULL; + h1 += GOLDEN_C; + h1 ^= h2; + h1 ^= (h1 << 13) + (h1 >> 3); + h1 ^= h1 >> 33; #else const Py_uhash_t GOLDEN_C = 0x9e3779b9UL; + h1 += GOLDEN_C; + h1 ^= h2; + h1 ^= (h1 << 6) + (h1 >> 2); + h1 ^= h1 >> 16; #endif - h1 ^= h2 + GOLDEN_C + (h1 << 6) + (h1 >> 2); return h1; } From bbebec76fdc0b34e06597ea6b0975cf380f86ba2 Mon Sep 17 00:00:00 2001 From: lambda-abstraction Date: Thu, 14 May 2026 21:35:07 +0300 Subject: [PATCH 3/3] reuse the tuple xxhash algorithm for entries in `frozendict_hash` by unrolling it for tuples of len 2 --- Objects/dictobject.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2ba3d31ca31fd7..78317740761083 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -129,7 +129,7 @@ As a consequence of this, split keys have a maximum size of 16. #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_setobject.h" // _PySet_NextEntry() -#include "pycore_tuple.h" // _PyTuple_Recycle() +#include "pycore_tuple.h" // _PyTuple_Recycle(), _PyTuple_HASH_XXPRIME1, _PyTuple_HASH_XXPRIME2, _PyTuple_HASH_XXPRIME5, _PyTuple_HASH_XXROTATE #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() #include "stringlib/eq.h" // unicode_eq() @@ -8222,24 +8222,27 @@ frozendict_repr(PyObject *self) return res; } +// Code unrolled from tuple_hash() for 2 values static inline Py_uhash_t -_combine_hashes(Py_uhash_t h1, Py_uhash_t h2) -{ -#if SIZEOF_PY_HASH_T == 8 - // 2^sizeof(Py_hash_t) / phi - const Py_uhash_t GOLDEN_C = 0x9e3779b97f4a7c15ULL; - h1 += GOLDEN_C; - h1 ^= h2; - h1 ^= (h1 << 13) + (h1 >> 3); - h1 ^= h1 >> 33; -#else - const Py_uhash_t GOLDEN_C = 0x9e3779b9UL; - h1 += GOLDEN_C; - h1 ^= h2; - h1 ^= (h1 << 6) + (h1 >> 2); - h1 ^= h1 >> 16; -#endif - return h1; +_tuple2_xxhash(Py_uhash_t h1, Py_uhash_t h2) +{ + Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; + + acc += h1 * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + acc += h2 * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + acc += 2 ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL); + + if (acc == (Py_uhash_t)-1) { + acc = 1546275796; + } + + return acc; } // Code copied from frozenset_hash() @@ -8253,7 +8256,7 @@ frozendict_hash(PyObject *op) } PyDictObject *mp = _PyAnyDict_CAST(op); - Py_uhash_t hash = 0xfd1c74; // start at a different value from frozenset to avoid collision with empty frozenset + Py_uhash_t hash = 0xfd1c74; // start at a different value from frozenset to avoid collision with frozenset(frozendict.items()) PyObject *key, *value; // borrowed refs Py_ssize_t pos = 0; @@ -8266,7 +8269,7 @@ frozendict_hash(PyObject *op) if (value_hash == -1) { return -1; } - hash ^= _combine_hashes(key_hash, value_hash); + hash ^= _tuple2_xxhash(key_hash, value_hash); } /* Factor in the number of active entries */