gh-106320: Move private _PyHash API to the internal C API by vstinner · Pull Request #107026 · python/cpython · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 80 additions & 5 deletions Include/internal/pycore_pyhash.h
84 changes: 3 additions & 81 deletions Include/pyhash.h
Original file line number Diff line number Diff line change
@@ -1,87 +1,10 @@
#ifndef Py_HASH_H

#define Py_HASH_H
#ifdef __cplusplus
extern "C" {
#endif

/* Helpers for hash functions */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
#endif

/* Prime multiplier used in string and various other hashes. */
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */

/* Parameters used for the numeric hash implementation. See notes for
_Py_HashDouble in Python/pyhash.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */

#if SIZEOF_VOID_P >= 8
# define _PyHASH_BITS 61
#else
# define _PyHASH_BITS 31
#endif

#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_IMAG _PyHASH_MULTIPLIER


/* hash secret
*
* memory layout on 64 bit systems
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
* pppppppp ssssssss ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeeeeeee pyexpat XML hash salt
*
* memory layout on 32 bit systems
* cccccccc cccccccc cccccccc uc
* ppppssss ........ ........ fnv -- two Py_hash_t
* k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
* ........ ........ eeee.... pyexpat XML hash salt
*
* (*) The siphash member may not be available on 32 bit platforms without
* an unsigned int64 data type.
*/
#ifndef Py_LIMITED_API
typedef union {
/* ensure 24 bytes */
unsigned char uc[24];
/* two Py_hash_t for FNV */
struct {
Py_hash_t prefix;
Py_hash_t suffix;
} fnv;
/* two uint64 for SipHash24 */
struct {
uint64_t k0;
uint64_t k1;
} siphash;
/* a different (!) Py_hash_t for small string optimization */
struct {
unsigned char padding[16];
Py_hash_t suffix;
} djbx33a;
struct {
unsigned char padding[16];
Py_hash_t hashsalt;
} expat;
} _Py_HashSecret_t;
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;

#ifdef Py_DEBUG
PyAPI_DATA(int) _Py_HashSecret_Initialized;
#endif


/* hash function definition */
typedef struct {
Py_hash_t (*const hash)(const void *, Py_ssize_t);
Expand All @@ -94,7 +17,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#endif


/* cutoff for small string DJBX33A optimization in range [1, cutoff).
/* Cutoff for small string DJBX33A optimization in range [1, cutoff).
*
* About 50% of the strings in a typical Python application are smaller than
* 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
Expand All @@ -112,7 +35,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#endif /* Py_HASH_CUTOFF */


/* hash algorithm selection
/* Hash algorithm selection
*
* The values for Py_HASH_* are hard-coded in the
* configure script.
Expand Down Expand Up @@ -140,5 +63,4 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
#ifdef __cplusplus
}
#endif

#endif /* !Py_HASH_H */
#endif // !Py_HASH_H
1 change: 1 addition & 0 deletions Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "Python.h"
#include "pycore_import.h" // _PyImport_GetModuleAttrString()
#include "pycore_pyhash.h" // _Py_HashSecret
#include "structmember.h" // PyMemberDef
#include "expat.h"
#include "pyexpat.h"
Expand Down
3 changes: 2 additions & 1 deletion Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@

#include "Python.h"
#include "pycore_hashtable.h"
#include "hashlib.h"
#include "pycore_pyhash.h" // _Py_HashBytes()
#include "pycore_strhex.h" // _Py_strhex()
#include "hashlib.h"

/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
Expand Down
5 changes: 5 additions & 0 deletions Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@

See the source code for LLVMFuzzerTestOneInput for details. */

#ifndef Py_BUILD_CORE
# define Py_BUILD_CORE 1
#endif

#include <Python.h>
#include "pycore_pyhash.h" // _Py_HashBytes()
#include <stdlib.h>
#include <inttypes.h>

Expand Down
1 change: 1 addition & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "Python.h"
#include "pycore_import.h" // _PyImport_SetModule()
#include "pycore_pyhash.h" // _Py_HashSecret
#include <ctype.h>

#include "structmember.h" // PyMemberDef
Expand Down
1 change: 1 addition & 0 deletions Python/hashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include "Python.h"
#include "pycore_hashtable.h"
#include "pycore_pyhash.h" // _Py_HashPointerRaw()

#define HASHTABLE_MIN_SIZE 16
#define HASHTABLE_HIGH 0.50
Expand Down
1 change: 1 addition & 0 deletions Python/pyhash.c