Skip to content

Commit c07d8a0

Browse files
committed
Implement swisstable hash
1 parent 5543d9c commit c07d8a0

File tree

3 files changed

+479
-294
lines changed

3 files changed

+479
-294
lines changed

Include/internal/pycore_dict.h

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
5252
int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value);
5353

5454
#define DKIX_EMPTY (-1)
55-
#define DKIX_DUMMY (-2) /* Used internally */
5655
#define DKIX_ERROR (-3)
5756

5857
typedef enum {
@@ -61,6 +60,39 @@ typedef enum {
6160
DICT_KEYS_SPLIT = 2
6261
} DictKeysKind;
6362

63+
// Currently, we support only 8-wide.
64+
// But it is possible to use 16-wide when SSE2 is available, but NEON can not optimize it.
65+
#define GROUP_WIDTH (8)
66+
67+
typedef union {
68+
char c[GROUP_WIDTH];
69+
uint64_t u64;
70+
} group_control;
71+
72+
typedef struct {
73+
group_control control;
74+
uint8_t index[GROUP_WIDTH];
75+
} group8; // 16byte
76+
77+
typedef struct {
78+
group_control control;
79+
uint16_t index[GROUP_WIDTH];
80+
} group16; // 24byte
81+
82+
typedef struct {
83+
group_control control;
84+
uint32_t index[GROUP_WIDTH];
85+
} group32; // 40byte
86+
87+
typedef struct {
88+
group_control control;
89+
uint64_t index[GROUP_WIDTH];
90+
} group64; // 72byte
91+
92+
// todo: group64 can use uint8_t index[7*GROUP_WIDTH] instead.
93+
// Then sizeof(group64) become 64byte. It is friendly to cache line.
94+
95+
6496
/* See dictobject.c for actual layout of DictKeysObject */
6597
struct _dictkeysobject {
6698
Py_ssize_t dk_refcnt;
@@ -80,20 +112,7 @@ struct _dictkeysobject {
80112
/* Number of used entries in dk_entries. */
81113
Py_ssize_t dk_nentries;
82114

83-
/* Actual hash table of dk_size entries. It holds indices in dk_entries,
84-
or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).
85-
86-
Indices must be: 0 <= indice < USABLE_FRACTION(dk_size).
87-
88-
The size in bytes of an indice depends on dk_size:
89-
90-
- 1 byte if dk_size <= 0xff (char*)
91-
- 2 bytes if dk_size <= 0xffff (int16_t*)
92-
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
93-
- 8 bytes otherwise (int64_t*)
94-
95-
Dynamically sized, SIZEOF_VOID_P is minimum. */
96-
char dk_indices[]; /* char is required to avoid strict aliasing. */
115+
unsigned char dk_groups[]; /* char is required to avoid strict aliasing. */
97116

98117
/* "PyDictKeyEntry dk_entries[dk_usable];" array follows:
99118
see the DK_ENTRIES() macro */
@@ -113,23 +132,19 @@ struct _dictvalues {
113132
PyObject *values[1];
114133
};
115134

116-
#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size)
117-
#if SIZEOF_VOID_P > 4
135+
#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size+3)
136+
118137
#define DK_SIZE(dk) (((int64_t)1)<<DK_LOG_SIZE(dk))
119-
#define DK_IXSIZE(dk) \
120-
(DK_LOG_SIZE(dk) <= 7 ? \
121-
1 : DK_LOG_SIZE(dk) <= 15 ? \
122-
2 : DK_LOG_SIZE(dk) <= 31 ? \
123-
4 : sizeof(int64_t))
124-
#else
125-
#define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
126-
#define DK_IXSIZE(dk) \
127-
(DK_LOG_SIZE(dk) <= 7 ? \
128-
1 : DK_LOG_SIZE(dk) <= 15 ? \
129-
2 : sizeof(int32_t))
130-
#endif
138+
#define DK_GROUPS(dk) (((int64_t)1)<<((dk)->dk_log2_size))
139+
140+
#define DK_GROUP_SIZE(dk) \
141+
(DK_LOG_SIZE(dk) <= 8 ? sizeof(group8) \
142+
: DK_LOG_SIZE(dk) <= 16 ? sizeof(group16) \
143+
: DK_LOG_SIZE(dk) <= 32 ? sizeof(group32) \
144+
: sizeof(group64))
145+
131146
#define DK_ENTRIES(dk) \
132-
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
147+
((PyDictKeyEntry*)(&((dk)->dk_groups)[DK_GROUP_SIZE(dk) << ((dk)->dk_log2_size)]))
133148

134149
extern uint64_t _pydict_global_version;
135150

0 commit comments

Comments
 (0)