Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
5d8c093
add common object head for hashlib/hmac objects
picnixz Jun 8, 2025
81e3046
simplify digest computation
picnixz Jun 8, 2025
7f9f7b7
refactor update logic
picnixz Jun 8, 2025
15a4f2f
refactor alloc() logic
picnixz Jun 8, 2025
5cd828a
finalizing touches
picnixz Jun 8, 2025
63db1de
correct mutex usage
picnixz Jun 15, 2025
ea033a3
Revert 5cd828acdcfef753aee5eec7e13f07682af40f46
picnixz Jun 15, 2025
77baa67
revert some constructor changes
picnixz Jun 15, 2025
902759f
unconditionally lock when performing HASH updates
picnixz Jun 16, 2025
dde68c4
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 16, 2025
05c1e66
post-merge
picnixz Jun 16, 2025
db57278
do not guard against empty buffers for now
picnixz Jun 16, 2025
ead20a1
consistency fixes
picnixz Jun 16, 2025
68a6bbc
remove unused import
picnixz Jun 16, 2025
68f297e
correct naming for locked/unlocked versions
picnixz Jun 16, 2025
9817c3d
debug?
picnixz Jun 16, 2025
7c6842b
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 16, 2025
c14c87d
simplify HMAC
picnixz Jun 16, 2025
bfb5436
release the GIL for large buffers
picnixz Jun 16, 2025
923c05f
restore GIL_MINSIZE
picnixz Jun 16, 2025
55b2afa
correctly lock objects
picnixz Jun 16, 2025
5cd60d1
improve tests
picnixz Jun 16, 2025
a2fcbd5
fixup HMAC
picnixz Jun 16, 2025
417cee1
fixup
picnixz Jun 16, 2025
f350501
GIL protection
picnixz Jun 16, 2025
5c4009d
show WASI errors
picnixz Jun 16, 2025
8aec797
fix WASI
picnixz Jun 16, 2025
6db58dc
fix compilation
picnixz Jun 16, 2025
b1f9463
fix compilation
picnixz Jun 16, 2025
491b922
fix warnings
picnixz Jun 16, 2025
c048975
sync
picnixz Jun 17, 2025
c9044d2
fixup format string
picnixz Jun 17, 2025
6c08f0d
address review
picnixz Jun 17, 2025
7fd1396
reudce diff
picnixz Jun 20, 2025
f400a11
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
5e2daa8
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
4f9729e
Merge remote-tracking branch 'upstream/main' into perf/hashlib/mutex-…
picnixz Jun 20, 2025
06aaee0
Merge branch 'main' into perf/hashlib/mutex-135239
picnixz Jun 21, 2025
977c807
fixup
picnixz Jun 21, 2025
6d66fef
fixup
picnixz Jun 21, 2025
c9db0b1
make the test suite less slow
picnixz Jun 21, 2025
6ffdd1c
fix test when GIL_MINSIZE is changed
picnixz Jun 21, 2025
98ec915
defer cosmetics
picnixz Jun 21, 2025
398ddb3
Update Lib/test/test_hashlib.py
picnixz Jun 21, 2025
0ae70e9
improve test
picnixz Jun 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Modules/hashlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@
*/

#include "pythread.h"

#define HASHLIB_OBJECT_HEAD \
PyObject_HEAD \
/* prevent undefined behavior via multiple
* threads entering the C API */ \
bool use_mutex; \
PyMutex mutex;

#define ENTER_HASHLIB(obj) \
if ((obj)->use_mutex) { \
PyMutex_Lock(&(obj)->mutex); \
Expand Down
208 changes: 104 additions & 104 deletions Modules/md5module.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,7 @@

#include "Python.h"
#include "hashlib.h"

/*[clinic input]
module _md5
class MD5Type "MD5object *" "&PyType_Type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
#include "pycore_strhex.h" // _Py_strhex()

/* The MD5 block size and message digest sizes, in bytes */

Expand All @@ -36,65 +31,64 @@ class MD5Type "MD5object *" "&PyType_Type"

#include "_hacl/Hacl_Hash_MD5.h"


typedef struct {
PyObject_HEAD
// Prevents undefined behavior via multiple threads entering the C API.
bool use_mutex;
PyMutex mutex;
Hacl_Hash_MD5_state_t *hash_state;
HASHLIB_OBJECT_HEAD
Hacl_Hash_MD5_state_t *state;
} MD5object;

#define _MD5object_CAST(op) ((MD5object *)(op))

#include "clinic/md5module.c.h"


typedef struct {
PyTypeObject* md5_type;
} MD5State;
PyTypeObject *md5_type;
} md5module_state;

static inline MD5State*
md5_get_state(PyObject *module)
static inline md5module_state *
get_md5module_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (MD5State *)state;
return (md5module_state *)state;
}

/*[clinic input]
module _md5
class MD5Type "MD5object *" "&PyType_Type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/

#include "clinic/md5module.c.h"

static MD5object *
newMD5object(MD5State * st)
newMD5object(md5module_state *st)
{
MD5object *md5 = PyObject_GC_New(MD5object, st->md5_type);
if (!md5) {
MD5object *self = PyObject_GC_New(MD5object, st->md5_type);
if (self == NULL) {
return NULL;
}
HASHLIB_INIT_MUTEX(md5);

PyObject_GC_Track(md5);
return md5;
HASHLIB_INIT_MUTEX(self);
PyObject_GC_Track(self);
return self;
}

/* Internal methods for a hash object */
static int
MD5_traverse(PyObject *ptr, visitproc visit, void *arg)
MD5_traverse(PyObject *op, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(ptr));
Py_VISIT(Py_TYPE(op));
return 0;
}

static void
MD5_dealloc(PyObject *op)
{
MD5object *ptr = _MD5object_CAST(op);
Hacl_Hash_MD5_free(ptr->hash_state);
MD5object *self = _MD5object_CAST(op);
Hacl_Hash_MD5_free(self->state);
PyTypeObject *tp = Py_TYPE(op);
PyObject_GC_UnTrack(ptr);
PyObject_GC_Del(ptr);
PyObject_GC_UnTrack(self);
PyObject_GC_Del(self);
Py_DECREF(tp);
}


/* External methods for a hash object */

/*[clinic input]
Expand All @@ -109,21 +103,29 @@ static PyObject *
MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
/*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/
{
MD5State *st = PyType_GetModuleState(cls);
md5module_state *st = PyType_GetModuleState(cls);

MD5object *newobj;
if ((newobj = newMD5object(st)) == NULL) {
MD5object *copy = newMD5object(st);
if (copy == NULL) {
return NULL;
}

ENTER_HASHLIB(self);
newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state);
copy->state = Hacl_Hash_MD5_copy(self->state);
LEAVE_HASHLIB(self);
if (newobj->hash_state == NULL) {
Py_DECREF(self);
if (copy->state == NULL) {
Py_DECREF(copy);
return PyErr_NoMemory();
}
return (PyObject *)newobj;
return (PyObject *)copy;
}

static void
md5_digest_compute_cond_lock(MD5object *self, uint8_t *digest)
{
ENTER_HASHLIB(self);
Hacl_Hash_MD5_digest(self->state, digest);
LEAVE_HASHLIB(self);
}

/*[clinic input]
Expand All @@ -136,10 +138,8 @@ static PyObject *
MD5Type_digest_impl(MD5object *self)
/*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
{
unsigned char digest[MD5_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Hash_MD5_digest(self->hash_state, digest);
LEAVE_HASHLIB(self);
uint8_t digest[MD5_DIGESTSIZE];
md5_digest_compute_cond_lock(self, digest);
return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
}

Expand All @@ -153,24 +153,13 @@ static PyObject *
MD5Type_hexdigest_impl(MD5object *self)
/*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
{
unsigned char digest[MD5_DIGESTSIZE];
ENTER_HASHLIB(self);
Hacl_Hash_MD5_digest(self->hash_state, digest);
LEAVE_HASHLIB(self);

const char *hexdigits = "0123456789abcdef";
char digest_hex[MD5_DIGESTSIZE * 2];
char *str = digest_hex;
for (size_t i=0; i < MD5_DIGESTSIZE; i++) {
unsigned char byte = digest[i];
*str++ = hexdigits[byte >> 4];
*str++ = hexdigits[byte & 0x0f];
}
return PyUnicode_FromStringAndSize(digest_hex, sizeof(digest_hex));
uint8_t digest[MD5_DIGESTSIZE];
md5_digest_compute_cond_lock(self, digest);
return _Py_strhex((const char *)digest, MD5_DIGESTSIZE);
}

static void
update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len)
_hacl_md5_update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len)
{
/*
* Note: we explicitly ignore the error code on the basis that it would
Expand All @@ -188,6 +177,36 @@ update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len)
(void)Hacl_Hash_MD5_update(state, buf, (uint32_t)len);
}

static void
md5_update_state_with_lock(MD5object *self, uint8_t *buf, Py_ssize_t len)
{
Py_BEGIN_ALLOW_THREADS
PyMutex_Lock(&self->mutex); // unconditionally acquire a lock
_hacl_md5_update(self->state, buf, len);
PyMutex_Unlock(&self->mutex);
Py_END_ALLOW_THREADS
}

static void
md5_update_state_cond_lock(MD5object *self, uint8_t *buf, Py_ssize_t len)
{
ENTER_HASHLIB(self); // conditionally acquire a lock
_hacl_md5_update(self->state, buf, len);
LEAVE_HASHLIB(self);
}

static inline void
md5_update_state(MD5object *self, uint8_t *buf, Py_ssize_t len)
{
assert(buf != 0);
assert(len >= 0);
if (len != 0) {
len < HASHLIB_GIL_MINSIZE
? md5_update_state_cond_lock(self, buf, len)
: md5_update_state_with_lock(self, buf, len);
}
}

/*[clinic input]
MD5Type.update

Expand All @@ -204,20 +223,7 @@ MD5Type_update_impl(MD5object *self, PyObject *obj)
Py_buffer buf;

GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) {
self->use_mutex = true;
}
if (self->use_mutex) {
Py_BEGIN_ALLOW_THREADS
PyMutex_Lock(&self->mutex);
update(self->hash_state, buf.buf, buf.len);
PyMutex_Unlock(&self->mutex);
Py_END_ALLOW_THREADS
} else {
update(self->hash_state, buf.buf, buf.len);
}

md5_update_state(self, buf.buf, buf.len);
PyBuffer_Release(&buf);
Py_RETURN_NONE;
}
Expand All @@ -227,7 +233,7 @@ static PyMethodDef MD5_methods[] = {
MD5TYPE_DIGEST_METHODDEF
MD5TYPE_HEXDIGEST_METHODDEF
MD5TYPE_UPDATE_METHODDEF
{NULL, NULL} /* sentinel */
{NULL, NULL} /* sentinel */
};

static PyObject *
Expand All @@ -252,20 +258,20 @@ static PyGetSetDef MD5_getseters[] = {
{"block_size", MD5_get_block_size, NULL, NULL, NULL},
{"name", MD5_get_name, NULL, NULL, NULL},
{"digest_size", md5_get_digest_size, NULL, NULL, NULL},
{NULL} /* Sentinel */
{NULL} /* sentinel */
};

static PyType_Slot md5_type_slots[] = {
{Py_tp_dealloc, MD5_dealloc},
{Py_tp_methods, MD5_methods},
{Py_tp_getset, MD5_getseters},
{Py_tp_traverse, MD5_traverse},
{0,0}
{0, 0}
};

static PyType_Spec md5_type_spec = {
.name = "_md5.md5",
.basicsize = sizeof(MD5object),
.basicsize = sizeof(MD5object),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION |
Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC),
.slots = md5_type_slots
Expand Down Expand Up @@ -294,67 +300,61 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity,
return NULL;
}

MD5object *new;
Py_buffer buf;

if (string) {
GET_BUFFER_VIEW_OR_ERROUT(string, &buf);
}

MD5State *st = md5_get_state(module);
if ((new = newMD5object(st)) == NULL) {
if (string) {
PyBuffer_Release(&buf);
}
md5module_state *st = get_md5module_state(module);
MD5object *self = newMD5object(st);
if (self == NULL) {
return NULL;
}

new->hash_state = Hacl_Hash_MD5_malloc();
if (new->hash_state == NULL) {
Py_DECREF(new);
if (string) {
PyBuffer_Release(&buf);
}
self->state = Hacl_Hash_MD5_malloc();
if (self->state == NULL) {
Py_DECREF(self);
return PyErr_NoMemory();
}

if (string) {
Py_buffer buf;
GET_BUFFER_VIEW_OR_ERROR(string, &buf, goto error);
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* We do not initialize self->lock here as this is the constructor
* where it is not yet possible to have concurrent access. */
Py_BEGIN_ALLOW_THREADS
update(new->hash_state, buf.buf, buf.len);
_hacl_md5_update(self->state, buf.buf, buf.len);
Py_END_ALLOW_THREADS
}
else {
update(new->hash_state, buf.buf, buf.len);
_hacl_md5_update(self->state, buf.buf, buf.len);
}
PyBuffer_Release(&buf);
}

return (PyObject *)new;
return (PyObject *)self;

error:
Py_XDECREF(self);
return NULL;
}


/* List of functions exported by this module */

static struct PyMethodDef MD5_functions[] = {
_MD5_MD5_METHODDEF
{NULL, NULL} /* Sentinel */
{NULL, NULL} /* sentinel */
};

static int
_md5_traverse(PyObject *module, visitproc visit, void *arg)
{
MD5State *state = md5_get_state(module);
md5module_state *state = get_md5module_state(module);
Py_VISIT(state->md5_type);
return 0;
}

static int
_md5_clear(PyObject *module)
{
MD5State *state = md5_get_state(module);
md5module_state *state = get_md5module_state(module);
Py_CLEAR(state->md5_type);
return 0;
}
Expand All @@ -369,7 +369,7 @@ _md5_free(void *module)
static int
md5_exec(PyObject *m)
{
MD5State *st = md5_get_state(m);
md5module_state *st = get_md5module_state(m);

st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec(
m, &md5_type_spec, NULL);
Expand All @@ -395,7 +395,7 @@ static PyModuleDef_Slot _md5_slots[] = {
static struct PyModuleDef _md5module = {
PyModuleDef_HEAD_INIT,
.m_name = "_md5",
.m_size = sizeof(MD5State),
.m_size = sizeof(md5module_state),
.m_methods = MD5_functions,
.m_slots = _md5_slots,
.m_traverse = _md5_traverse,
Expand Down
Loading