1266 lines
37 KiB
C
1266 lines
37 KiB
C
/* The following is adapted from CPython3.7.
|
|
The exact commit is:
|
|
|
|
- https://github.com/python/cpython/blob/44467e8ea4cea390b0718702291b4cfe8ddd67ed/Objects/dictobject.c
|
|
|
|
|
|
*/
|
|
|
|
/* Dictionary object implementation using a hash table */
|
|
|
|
/* The distribution includes a separate file, Objects/dictnotes.txt,
|
|
describing explorations into dictionary design and optimization.
|
|
It covers typical dictionary use patterns, the parameters for
|
|
tuning dictionaries, and several ideas for possible optimizations.
|
|
*/
|
|
|
|
/* PyDictKeysObject
|
|
|
|
This implements the dictionary's hashtable.
|
|
|
|
As of Python 3.6, this is compact and ordered. Basic idea is described here:
|
|
* https://mail.python.org/pipermail/python-dev/2012-December/123028.html
|
|
* https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
|
|
|
|
layout:
|
|
|
|
+---------------+
|
|
| dk_refcnt |
|
|
| dk_size |
|
|
| dk_lookup |
|
|
| dk_usable |
|
|
| dk_nentries |
|
|
+---------------+
|
|
| dk_indices |
|
|
| |
|
|
+---------------+
|
|
| dk_entries |
|
|
| |
|
|
+---------------+
|
|
|
|
dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
|
|
or DKIX_DUMMY(-2).
|
|
Size of indices is dk_size. Type of each index in indices is vary on dk_size:
|
|
|
|
* int8 for dk_size <= 128
|
|
* int16 for 256 <= dk_size <= 2**15
|
|
* int32 for 2**16 <= dk_size <= 2**31
|
|
* int64 for 2**32 <= dk_size
|
|
|
|
dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size).
|
|
DK_ENTRIES(dk) can be used to get pointer to entries.
|
|
|
|
NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
|
|
dk_indices entry is signed integer and int16 is used for table which
|
|
dk_size == 256.
|
|
*/
|
|
|
|
|
|
/*
|
|
The DictObject can be in one of two forms.
|
|
|
|
Either:
|
|
A combined table:
|
|
ma_values == NULL, dk_refcnt == 1.
|
|
Values are stored in the me_value field of the PyDictKeysObject.
|
|
Or:
|
|
|
|
(Numba dev notes: split table logic is removed)
|
|
|
|
A split table:
|
|
ma_values != NULL, dk_refcnt >= 1
|
|
Values are stored in the ma_values array.
|
|
Only string (unicode) keys are allowed.
|
|
All dicts sharing same key must have same insertion order.
|
|
|
|
There are four kinds of slots in the table (slot is index, and
|
|
DK_ENTRIES(keys)[index] if index >= 0):
|
|
|
|
1. Unused. index == DKIX_EMPTY
|
|
Does not hold an active (key, value) pair now and never did. Unused can
|
|
transition to Active upon key insertion. This is each slot's initial state.
|
|
|
|
2. Active. index >= 0, me_key != NULL and me_value != NULL
|
|
Holds an active (key, value) pair. Active can transition to Dummy or
|
|
Pending upon key deletion (for combined and split tables respectively).
|
|
This is the only case in which me_value != NULL.
|
|
|
|
3. Dummy. index == DKIX_DUMMY (combined only)
|
|
Previously held an active (key, value) pair, but that was deleted and an
|
|
active pair has not yet overwritten the slot. Dummy can transition to
|
|
Active upon key insertion. Dummy slots cannot be made Unused again
|
|
else the probe sequence in case of collision would have no way to know
|
|
they were once active.
|
|
|
|
4. Pending. index >= 0, key != NULL, and value == NULL (split only)
|
|
Not yet inserted in split-table.
|
|
*/
|
|
|
|
/*
|
|
Preserving insertion order
|
|
|
|
It's simple for combined table. Since dk_entries is mostly append only, we can
|
|
get insertion order by just iterating dk_entries.
|
|
|
|
One exception is .popitem(). It removes last item in dk_entries and decrement
|
|
dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
|
|
dk_indices, we can't increment dk_usable even though dk_nentries is
|
|
decremented.
|
|
|
|
In split table, inserting into pending entry is allowed only for dk_entries[ix]
|
|
where ix == mp->ma_used. Inserting into other index and deleting item cause
|
|
converting the dict to the combined table.
|
|
*/
|
|
|
|
|
|
/* D_MINSIZE (adapted from PyDict_MINSIZE)
|
|
* is the starting size for any new dict.
|
|
* 8 allows dicts with no more than 5 active entries; experiments suggested
|
|
* this suffices for the majority of dicts (consisting mostly of usually-small
|
|
* dicts created to pass keyword arguments).
|
|
* Making this 8, rather than 4 reduces the number of resizes for most
|
|
* dictionaries, without any significant extra memory use.
|
|
*/
|
|
#define D_MINSIZE 8
|
|
|
|
#include "dictobject.h"
|
|
|
|
|
|
#if defined(_MSC_VER)
|
|
# if _MSC_VER <= 1900 /* Visual Studio 2014 */
|
|
typedef __int8 int8_t;
|
|
typedef __int16 int16_t;
|
|
typedef __int32 int32_t;
|
|
typedef __int64 int64_t;
|
|
# endif
|
|
/* Use _alloca() to dynamically allocate on the stack on MSVC */
|
|
#define STACK_ALLOC(Type, Name, Size) Type * const Name = _alloca(Size);
|
|
#else
|
|
#define STACK_ALLOC(Type, Name, Size) Type Name[Size];
|
|
#endif
|
|
|
|
|
|
/*[clinic input]
|
|
class dict "PyDictObject *" "&PyDict_Type"
|
|
[clinic start generated code]*/
|
|
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
|
|
|
|
|
|
/*
|
|
To ensure the lookup algorithm terminates, there must be at least one Unused
|
|
slot (NULL key) in the table.
|
|
To avoid slowing down lookups on a near-full table, we resize the table when
|
|
it's USABLE_FRACTION (currently two-thirds) full.
|
|
*/
|
|
|
|
#define PERTURB_SHIFT 5
|
|
|
|
/*
|
|
Major subtleties ahead: Most hash schemes depend on having a "good" hash
|
|
function, in the sense of simulating randomness. Python doesn't: its most
|
|
important hash functions (for ints) are very regular in common
|
|
cases:
|
|
|
|
>>>[hash(i) for i in range(4)]
|
|
[0, 1, 2, 3]
|
|
|
|
This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
|
|
the low-order i bits as the initial table index is extremely fast, and there
|
|
are no collisions at all for dicts indexed by a contiguous range of ints. So
|
|
this gives better-than-random behavior in common cases, and that's very
|
|
desirable.
|
|
|
|
OTOH, when collisions occur, the tendency to fill contiguous slices of the
|
|
hash table makes a good collision resolution strategy crucial. Taking only
|
|
the last i bits of the hash code is also vulnerable: for example, consider
|
|
the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
|
|
their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
|
|
of every hash code are all 0: they *all* map to the same table index.
|
|
|
|
But catering to unusual cases should not slow the usual ones, so we just take
|
|
the last i bits anyway. It's up to collision resolution to do the rest. If
|
|
we *usually* find the key we're looking for on the first try (and, it turns
|
|
out, we usually do -- the table load factor is kept under 2/3, so the odds
|
|
are solidly in our favor), then it makes best sense to keep the initial index
|
|
computation dirt cheap.
|
|
|
|
The first half of collision resolution is to visit table indices via this
|
|
recurrence:
|
|
|
|
j = ((5*j) + 1) mod 2**i
|
|
|
|
For any initial j in range(2**i), repeating that 2**i times generates each
|
|
int in range(2**i) exactly once (see any text on random-number generation for
|
|
proof). By itself, this doesn't help much: like linear probing (setting
|
|
j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
|
|
order. This would be bad, except that's not the only thing we do, and it's
|
|
actually *good* in the common cases where hash keys are consecutive. In an
|
|
example that's really too small to make this entirely clear, for a table of
|
|
size 2**3 the order of indices is:
|
|
|
|
0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
|
|
|
|
If two things come in at index 5, the first place we look after is index 2,
|
|
not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
|
|
Linear probing is deadly in this case because there the fixed probe order
|
|
is the *same* as the order consecutive keys are likely to arrive. But it's
|
|
extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
|
|
and certain that consecutive hash codes do not.
|
|
|
|
The other half of the strategy is to get the other bits of the hash code
|
|
into play. This is done by initializing a (unsigned) vrbl "perturb" to the
|
|
full hash code, and changing the recurrence to:
|
|
|
|
perturb >>= PERTURB_SHIFT;
|
|
j = (5*j) + 1 + perturb;
|
|
use j % 2**i as the next table index;
|
|
|
|
Now the probe sequence depends (eventually) on every bit in the hash code,
|
|
and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
|
|
because it quickly magnifies small differences in the bits that didn't affect
|
|
the initial index. Note that because perturb is unsigned, if the recurrence
|
|
is executed often enough perturb eventually becomes and remains 0. At that
|
|
point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
|
|
that's certain to find an empty slot eventually (since it generates every int
|
|
in range(2**i), and we make sure there's always at least one empty slot).
|
|
|
|
Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
|
|
small so that the high bits of the hash code continue to affect the probe
|
|
sequence across iterations; but you want it large so that in really bad cases
|
|
the high-order hash bits have an effect on early iterations. 5 was "the
|
|
best" in minimizing total collisions across experiments Tim Peters ran (on
|
|
both normal and pathological cases), but 4 and 6 weren't significantly worse.
|
|
|
|
Historical: Reimer Behrends contributed the idea of using a polynomial-based
|
|
approach, using repeated multiplication by x in GF(2**n) where an irreducible
|
|
polynomial for each table size was chosen such that x was a primitive root.
|
|
Christian Tismer later extended that to use division by x instead, as an
|
|
efficient way to get the high bits of the hash code into play. This scheme
|
|
also gave excellent collision statistics, but was more expensive: two
|
|
if-tests were required inside the loop; computing "the next" index took about
|
|
the same number of operations but without as much potential parallelism
|
|
(e.g., computing 5*j can go on at the same time as computing 1+perturb in the
|
|
above, and then shifting perturb can be done while the table index is being
|
|
masked); and the PyDictObject struct required a member to hold the table's
|
|
polynomial. In Tim's experiments the current scheme ran faster, produced
|
|
equally good collision statistics, needed less code & used less memory.
|
|
|
|
*/
|
|
|
|
#define DKIX_EMPTY (-1)
|
|
#define DKIX_DUMMY (-2) /* Used internally */
|
|
#define DKIX_ERROR (-3)
|
|
|
|
typedef enum {
|
|
OK = 0,
|
|
OK_REPLACED = 1,
|
|
ERR_NO_MEMORY = -1,
|
|
ERR_DICT_MUTATED = -2,
|
|
ERR_ITER_EXHAUSTED = -3,
|
|
ERR_DICT_EMPTY = -4,
|
|
ERR_CMP_FAILED = -5,
|
|
} Status;
|
|
|
|
|
|
#ifndef NDEBUG
|
|
static
|
|
int mem_cmp_zeros(void *obj, size_t n){
|
|
int diff = 0;
|
|
char *mem = obj;
|
|
char *it;
|
|
for (it = mem; it < mem + n; ++it) {
|
|
if (*it != 0) diff += 1;
|
|
}
|
|
return diff;
|
|
}
|
|
#endif
|
|
|
|
#define D_MASK(dk) ((dk)->size-1)
|
|
#define D_GROWTH_RATE(d) ((d)->used*3)
|
|
|
|
static int
|
|
ix_size(Py_ssize_t size) {
|
|
if ( size < 0xff ) return 1;
|
|
if ( size < 0xffff ) return 2;
|
|
if ( size < 0xffffffff ) return 4;
|
|
return sizeof(int64_t);
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
/* NOTE: This function is only used in assert()s */
|
|
/* Align pointer *ptr* to pointer size */
|
|
static void*
|
|
aligned_pointer(void *ptr) {
|
|
return (void*)aligned_size((size_t)ptr);
|
|
}
|
|
#endif
|
|
|
|
/* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
|
|
static Py_ssize_t
|
|
get_index(NB_DictKeys *dk, Py_ssize_t i)
|
|
{
|
|
Py_ssize_t s = dk->size;
|
|
Py_ssize_t ix;
|
|
|
|
if (s <= 0xff) {
|
|
int8_t *indices = (int8_t*)(dk->indices);
|
|
assert (i < dk->size);
|
|
ix = indices[i];
|
|
}
|
|
else if (s <= 0xffff) {
|
|
int16_t *indices = (int16_t*)(dk->indices);
|
|
ix = indices[i];
|
|
}
|
|
#if SIZEOF_VOID_P > 4
|
|
else if (s > 0xffffffff) {
|
|
int64_t *indices = (int64_t*)(dk->indices);
|
|
ix = indices[i];
|
|
}
|
|
#endif
|
|
else {
|
|
int32_t *indices = (int32_t*)(dk->indices);
|
|
ix = indices[i];
|
|
}
|
|
assert(ix >= DKIX_DUMMY);
|
|
return ix;
|
|
}
|
|
|
|
/* write to indices. */
|
|
static void
|
|
set_index(NB_DictKeys *dk, Py_ssize_t i, Py_ssize_t ix)
|
|
{
|
|
Py_ssize_t s = dk->size;
|
|
|
|
assert(ix >= DKIX_DUMMY);
|
|
|
|
if (s <= 0xff) {
|
|
int8_t *indices = (int8_t*)(dk->indices);
|
|
assert(ix <= 0x7f);
|
|
indices[i] = (char)ix;
|
|
}
|
|
else if (s <= 0xffff) {
|
|
int16_t *indices = (int16_t*)(dk->indices);
|
|
assert(ix <= 0x7fff);
|
|
indices[i] = (int16_t)ix;
|
|
}
|
|
#if SIZEOF_VOID_P > 4
|
|
else if (s > 0xffffffff) {
|
|
int64_t *indices = (int64_t*)(dk->indices);
|
|
indices[i] = ix;
|
|
}
|
|
#endif
|
|
else {
|
|
int32_t *indices = (int32_t*)(dk->indices);
|
|
assert(ix <= 0x7fffffff);
|
|
indices[i] = (int32_t)ix;
|
|
}
|
|
}
|
|
|
|
|
|
/* USABLE_FRACTION is the maximum dictionary load.
|
|
* Increasing this ratio makes dictionaries more dense resulting in more
|
|
* collisions. Decreasing it improves sparseness at the expense of spreading
|
|
* indices over more cache lines and at the cost of total memory consumed.
|
|
*
|
|
* USABLE_FRACTION must obey the following:
|
|
* (0 < USABLE_FRACTION(n) < n) for all n >= 2
|
|
*
|
|
* USABLE_FRACTION should be quick to calculate.
|
|
* Fractions around 1/2 to 2/3 seem to work well in practice.
|
|
*/
|
|
|
|
#define USABLE_FRACTION(n) (((n) << 1)/3) // ratio: 2/3
|
|
|
|
/* Alternative fraction that is otherwise close enough to 2n/3 to make
|
|
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
|
|
* 32 * 2/3 = 21, 32 * 5/8 = 20.
|
|
* Its advantage is that it is faster to compute on machines with slow division.
|
|
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) // ratio: 5/8
|
|
*/
|
|
|
|
|
|
/* INV_USABLE_FRACTION gives the inverse of USABLE_FRACTION.
|
|
* Used for sizing a new dictionary to a specified number of keys.
|
|
*
|
|
* NOTE: If the denominator of the USABLE_FRACTION ratio is not a power
|
|
* of 2, must add 1 to the result of the inverse for correct sizing.
|
|
*
|
|
* For example, when USABLE_FRACTION ratio = 5/8 (8 is a power of 2):
|
|
* #define INV_USABLE_FRACTION(n) (((n) << 3)/5) // inv_ratio: 8/5
|
|
*
|
|
* When USABLE_FRACTION ratio = 5/7 (7 is not a power of 2):
|
|
* #define INV_USABLE_FRACTION(n) ((7*(n))/5 + 1) // inv_ratio: 7/5
|
|
*/
|
|
|
|
#define INV_USABLE_FRACTION(n) ((n) + ((n) >> 1) + 1) // inv_ratio: 3/2
|
|
|
|
|
|
/* GROWTH_RATE. Growth rate upon hitting maximum load.
|
|
* Currently set to used*3.
|
|
* This means that dicts double in size when growing without deletions,
|
|
* but have more head room when the number of deletions is on a par with the
|
|
* number of insertions. See also bpo-17563 and bpo-33205.
|
|
*
|
|
* GROWTH_RATE was set to used*4 up to version 3.2.
|
|
* GROWTH_RATE was set to used*2 in version 3.3.0
|
|
* GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0.
|
|
*/
|
|
#define GROWTH_RATE(d) ((d)->ma_used*3)
|
|
|
|
|
|
static NB_DictEntry*
|
|
get_entry(NB_DictKeys *dk, Py_ssize_t idx) {
|
|
Py_ssize_t offset;
|
|
char *ptr;
|
|
|
|
assert (idx < dk->size);
|
|
offset = idx * dk->entry_size;
|
|
ptr = dk->indices + dk->entry_offset + offset;
|
|
return (NB_DictEntry*)ptr;
|
|
}
|
|
|
|
static void
|
|
zero_key(NB_DictKeys *dk, char *data){
|
|
memset(data, 0, dk->key_size);
|
|
}
|
|
|
|
static void
|
|
zero_val(NB_DictKeys *dk, char *data){
|
|
memset(data, 0, dk->val_size);
|
|
}
|
|
|
|
static void
|
|
copy_key(NB_DictKeys *dk, char *dst, const char *src){
|
|
memcpy(dst, src, dk->key_size);
|
|
}
|
|
|
|
static void
|
|
copy_val(NB_DictKeys *dk, char *dst, const char *src){
|
|
memcpy(dst, src, dk->val_size);
|
|
}
|
|
|
|
/* Returns -1 for error; 0 for not equal; 1 for equal */
|
|
static int
|
|
key_equal(NB_DictKeys *dk, const char *lhs, const char *rhs) {
|
|
if ( dk->methods.key_equal ) {
|
|
return dk->methods.key_equal(lhs, rhs);
|
|
} else {
|
|
return memcmp(lhs, rhs, dk->key_size) == 0;
|
|
}
|
|
}
|
|
|
|
static char *
|
|
entry_get_key(NB_DictKeys *dk, NB_DictEntry* entry) {
|
|
char * out = entry->keyvalue;
|
|
assert (out == aligned_pointer(out));
|
|
return out;
|
|
}
|
|
|
|
static char *
|
|
entry_get_val(NB_DictKeys *dk, NB_DictEntry* entry) {
|
|
char * out = entry_get_key(dk, entry) + aligned_size(dk->key_size);
|
|
assert (out == aligned_pointer(out));
|
|
return out;
|
|
}
|
|
|
|
static void
|
|
dk_incref_key(NB_DictKeys *dk, const char *key) {
|
|
if ( dk->methods.key_incref ) {
|
|
dk->methods.key_incref(key);
|
|
}
|
|
}
|
|
|
|
static void
|
|
dk_decref_key(NB_DictKeys *dk, const char *key) {
|
|
if ( dk->methods.key_decref ) {
|
|
dk->methods.key_decref(key);
|
|
}
|
|
}
|
|
|
|
static void
|
|
dk_incref_val(NB_DictKeys *dk, const char *val) {
|
|
if ( dk->methods.value_incref ) {
|
|
dk->methods.value_incref(val);
|
|
}
|
|
}
|
|
|
|
static void
|
|
dk_decref_val(NB_DictKeys *dk, const char *val) {
|
|
if ( dk->methods.value_decref ) {
|
|
dk->methods.value_decref(val);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
numba_dictkeys_free(NB_DictKeys *dk) {
|
|
/* Clear all references from the entries */
|
|
Py_ssize_t i;
|
|
NB_DictEntry *ep;
|
|
|
|
for (i = 0; i < dk->nentries; i++) {
|
|
ep = get_entry(dk, i);
|
|
if (ep->hash != DKIX_EMPTY) {
|
|
dk_decref_key(dk, entry_get_key(dk, ep));
|
|
dk_decref_val(dk, entry_get_val(dk, ep));
|
|
}
|
|
}
|
|
/* Deallocate */
|
|
free(dk);
|
|
}
|
|
|
|
void
|
|
numba_dict_free(NB_Dict *d) {
|
|
numba_dictkeys_free(d->keys);
|
|
free(d);
|
|
}
|
|
|
|
Py_ssize_t
|
|
numba_dict_length(NB_Dict *d) {
|
|
return d->used;
|
|
}
|
|
|
|
/* Allocate new dictionary keys
|
|
|
|
Adapted from CPython's new_keys_object().
|
|
*/
|
|
int
|
|
numba_dictkeys_new(NB_DictKeys **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) {
|
|
Py_ssize_t usable = USABLE_FRACTION(size);
|
|
Py_ssize_t index_size = ix_size(size);
|
|
Py_ssize_t entry_size = aligned_size(sizeof(NB_DictEntry) + aligned_size(key_size) + aligned_size(val_size));
|
|
Py_ssize_t entry_offset = aligned_size(index_size * size);
|
|
Py_ssize_t alloc_size = sizeof(NB_DictKeys) + entry_offset + entry_size * usable;
|
|
|
|
NB_DictKeys *dk = malloc(aligned_size(alloc_size));
|
|
if (!dk) return ERR_NO_MEMORY;
|
|
|
|
assert ( size >= D_MINSIZE );
|
|
|
|
dk->size = size;
|
|
dk->usable = usable;
|
|
dk->nentries = 0;
|
|
dk->key_size = key_size;
|
|
dk->val_size = val_size;
|
|
dk->entry_offset = entry_offset;
|
|
dk->entry_size = entry_size;
|
|
|
|
assert (aligned_pointer(dk->indices) == dk->indices );
|
|
/* Ensure that the method table is all nulls */
|
|
memset(&dk->methods, 0x00, sizeof(type_based_methods_table));
|
|
/* Ensure hash is (-1) for empty entry */
|
|
memset(dk->indices, 0xff, entry_offset + entry_size * usable);
|
|
|
|
*out = dk;
|
|
return OK;
|
|
}
|
|
|
|
|
|
/* Allocate new dictionary */
|
|
int
|
|
numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) {
|
|
NB_DictKeys *dk;
|
|
NB_Dict *d;
|
|
int status = numba_dictkeys_new(&dk, size, key_size, val_size);
|
|
if (status != OK) return status;
|
|
|
|
d = malloc(sizeof(NB_Dict));
|
|
if (!d) {
|
|
numba_dictkeys_free(dk);
|
|
return ERR_NO_MEMORY;
|
|
}
|
|
|
|
d->used = 0;
|
|
d->keys = dk;
|
|
*out = d;
|
|
return OK;
|
|
}
|
|
|
|
|
|
/*
|
|
Adapted from CPython lookdict_index().
|
|
|
|
Search index of hash table from offset of entry table
|
|
*/
|
|
static Py_ssize_t
|
|
lookdict_index(NB_DictKeys *dk, Py_hash_t hash, Py_ssize_t index)
|
|
{
|
|
size_t mask = D_MASK(dk);
|
|
size_t perturb = (size_t)hash;
|
|
size_t i = (size_t)hash & mask;
|
|
|
|
for (;;) {
|
|
Py_ssize_t ix = get_index(dk, i);
|
|
if (ix == index) {
|
|
return i;
|
|
}
|
|
if (ix == DKIX_EMPTY) {
|
|
return DKIX_EMPTY;
|
|
}
|
|
perturb >>= PERTURB_SHIFT;
|
|
i = mask & (i*5 + perturb + 1);
|
|
}
|
|
assert(0 && "unreachable");
|
|
}
|
|
|
|
/*
|
|
|
|
Adapted from the CPython3.7 lookdict().
|
|
|
|
The basic lookup function used by all operations.
|
|
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
|
|
Open addressing is preferred over chaining since the link overhead for
|
|
chaining would be substantial (100% with typical malloc overhead).
|
|
|
|
The initial probe index is computed as hash mod the table size. Subsequent
|
|
probe indices are computed as explained earlier.
|
|
|
|
All arithmetic on hash should ignore overflow.
|
|
|
|
The details in this version are due to Tim Peters, building on many past
|
|
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
|
|
Christian Tismer.
|
|
|
|
lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
|
|
comparison raises an exception.
|
|
lookdict_unicode() below is specialized to string keys, comparison of which can
|
|
never raise an exception; that function can never return DKIX_ERROR when key
|
|
is string. Otherwise, it falls back to lookdict().
|
|
lookdict_unicode_nodummy is further specialized for string keys that cannot be
|
|
the <dummy> value.
|
|
For both, when the key isn't found a DKIX_EMPTY is returned.
|
|
*/
|
|
Py_ssize_t
|
|
numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes)
|
|
{
|
|
NB_DictKeys *dk = d->keys;
|
|
size_t mask = D_MASK(dk);
|
|
size_t perturb = hash;
|
|
size_t i = (size_t)hash & mask;
|
|
|
|
for (;;) {
|
|
Py_ssize_t ix = get_index(dk, i);
|
|
if (ix == DKIX_EMPTY) {
|
|
zero_val(dk, oldval_bytes);
|
|
return ix;
|
|
}
|
|
if (ix >= 0) {
|
|
NB_DictEntry *ep = get_entry(dk, ix);
|
|
const char *startkey = NULL;
|
|
if (ep->hash == hash) {
|
|
int cmp;
|
|
|
|
startkey = entry_get_key(dk, ep);
|
|
cmp = key_equal(dk, startkey, key_bytes);
|
|
if (cmp < 0) {
|
|
// error'ed in comparison
|
|
memset(oldval_bytes, 0, dk->val_size);
|
|
return DKIX_ERROR;
|
|
}
|
|
if (cmp > 0) {
|
|
// key is equal; retrieve the value.
|
|
copy_val(dk, oldval_bytes, entry_get_val(dk, ep));
|
|
return ix;
|
|
}
|
|
}
|
|
}
|
|
perturb >>= PERTURB_SHIFT;
|
|
i = (i*5 + perturb + 1) & mask;
|
|
}
|
|
assert(0 && "unreachable");
|
|
}
|
|
|
|
|
|
/* Internal function to find slot for an item from its hash
|
|
when it is known that the key is not present in the dict.
|
|
|
|
The dict must be combined. */
|
|
static Py_ssize_t
|
|
find_empty_slot(NB_DictKeys *dk, Py_hash_t hash){
|
|
size_t mask;
|
|
size_t i;
|
|
Py_ssize_t ix;
|
|
size_t perturb;
|
|
|
|
assert(dk != NULL);
|
|
|
|
mask = D_MASK(dk);
|
|
i = hash & mask;
|
|
ix = get_index(dk, i);
|
|
for (perturb = hash; ix >= 0;) {
|
|
perturb >>= PERTURB_SHIFT;
|
|
i = (i*5 + perturb + 1) & mask;
|
|
ix = get_index(dk, i);
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static int
|
|
insertion_resize(NB_Dict *d)
|
|
{
|
|
return numba_dict_resize(d, D_GROWTH_RATE(d));
|
|
}
|
|
|
|
int
|
|
numba_dict_insert(
|
|
NB_Dict *d,
|
|
const char *key_bytes,
|
|
Py_hash_t hash,
|
|
const char *val_bytes,
|
|
char *oldval_bytes
|
|
)
|
|
{
|
|
|
|
NB_DictKeys *dk = d->keys;
|
|
|
|
Py_ssize_t ix = numba_dict_lookup(d, key_bytes, hash, oldval_bytes);
|
|
if (ix == DKIX_ERROR) {
|
|
// exception in key comparison in lookup.
|
|
return ERR_CMP_FAILED;
|
|
}
|
|
|
|
if (ix == DKIX_EMPTY) {
|
|
/* Insert into new slot */
|
|
Py_ssize_t hashpos;
|
|
NB_DictEntry *ep;
|
|
|
|
if (dk->usable <= 0) {
|
|
/* Need to resize */
|
|
if (insertion_resize(d) != OK)
|
|
return ERR_NO_MEMORY;
|
|
else
|
|
dk = d->keys; // reload
|
|
}
|
|
hashpos = find_empty_slot(dk, hash);
|
|
ep = get_entry(dk, dk->nentries);
|
|
set_index(dk, hashpos, dk->nentries);
|
|
copy_key(dk, entry_get_key(dk, ep), key_bytes);
|
|
assert ( hash != -1 );
|
|
ep->hash = hash;
|
|
copy_val(dk, entry_get_val(dk, ep), val_bytes);
|
|
|
|
/* incref */
|
|
dk_incref_key(dk, key_bytes);
|
|
dk_incref_val(dk, val_bytes);
|
|
|
|
d->used += 1;
|
|
dk->usable -= 1;
|
|
dk->nentries += 1;
|
|
assert (dk->usable >= 0);
|
|
return OK;
|
|
} else {
|
|
/* Replace existing value in the slot at ix */
|
|
/* decref old value */
|
|
dk_decref_val(dk, oldval_bytes);
|
|
// Replace the previous value
|
|
copy_val(dk, entry_get_val(dk, get_entry(dk, ix)), val_bytes);
|
|
|
|
/* incref */
|
|
dk_incref_val(dk, val_bytes);
|
|
return OK_REPLACED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Adapted from build_indices().
|
|
Internal routine used by dictresize() to build a hashtable of entries.
|
|
*/
|
|
void
|
|
build_indices(NB_DictKeys *keys, Py_ssize_t n) {
|
|
size_t mask = (size_t)D_MASK(keys);
|
|
Py_ssize_t ix;
|
|
for (ix = 0; ix != n; ix++) {
|
|
size_t perturb;
|
|
Py_hash_t hash = get_entry(keys, ix)->hash;
|
|
size_t i = hash & mask;
|
|
for (perturb = hash; get_index(keys, i) != DKIX_EMPTY;) {
|
|
perturb >>= PERTURB_SHIFT;
|
|
i = mask & (i*5 + perturb + 1);
|
|
}
|
|
set_index(keys, i, ix);
|
|
}
|
|
}
|
|
|
|
/*
|
|
|
|
Adapted from CPython dictresize().
|
|
|
|
Restructure the table by allocating a new table and reinserting all
|
|
items again. When entries have been deleted, the new table may
|
|
actually be smaller than the old one.
|
|
If a table is split (its keys and hashes are shared, its values are not),
|
|
then the values are temporarily copied into the table, it is resized as
|
|
a combined table, then the me_value slots in the old table are NULLed out.
|
|
After resizing a table is always combined,
|
|
but can be resplit by make_keys_shared().
|
|
*/
|
|
int
|
|
numba_dict_resize(NB_Dict *d, Py_ssize_t minsize) {
|
|
Py_ssize_t newsize, numentries;
|
|
NB_DictKeys *oldkeys;
|
|
int status;
|
|
|
|
/* Find the smallest table size > minused. */
|
|
for (newsize = D_MINSIZE;
|
|
newsize < minsize && newsize > 0;
|
|
newsize <<= 1)
|
|
;
|
|
if (newsize <= 0) {
|
|
return ERR_NO_MEMORY;
|
|
}
|
|
oldkeys = d->keys;
|
|
|
|
/* NOTE: Current odict checks mp->ma_keys to detect resize happen.
|
|
* So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
|
|
* TODO: Try reusing oldkeys when reimplement odict.
|
|
*/
|
|
|
|
/* Allocate a new table. */
|
|
status = numba_dictkeys_new(
|
|
&d->keys, newsize, oldkeys->key_size, oldkeys->val_size
|
|
);
|
|
if (status != OK) {
|
|
d->keys = oldkeys;
|
|
return status;
|
|
}
|
|
// New table must be large enough.
|
|
assert(d->keys->usable >= d->used);
|
|
// Copy method table
|
|
memcpy(&d->keys->methods, &oldkeys->methods, sizeof(type_based_methods_table));
|
|
|
|
numentries = d->used;
|
|
|
|
if (oldkeys->nentries == numentries) {
|
|
NB_DictEntry *oldentries, *newentries;
|
|
|
|
oldentries = get_entry(oldkeys, 0);
|
|
newentries = get_entry(d->keys, 0);
|
|
memcpy(newentries, oldentries, numentries * oldkeys->entry_size);
|
|
// to avoid decref
|
|
memset(oldentries, 0xff, numentries * oldkeys->entry_size);
|
|
}
|
|
else {
|
|
Py_ssize_t i;
|
|
size_t epi = 0;
|
|
for (i=0; i<numentries; ++i) {
|
|
/*
|
|
ep->hash == (-1) hash means it is empty
|
|
|
|
Here, we skip until a non empty entry is encountered.
|
|
*/
|
|
while( get_entry(oldkeys, epi)->hash == DKIX_EMPTY ) {
|
|
assert( mem_cmp_zeros(entry_get_val(oldkeys, get_entry(oldkeys, epi)), oldkeys->val_size) == 0 );
|
|
epi += 1;
|
|
}
|
|
memcpy(
|
|
get_entry(d->keys, i),
|
|
get_entry(oldkeys, epi),
|
|
oldkeys->entry_size
|
|
);
|
|
get_entry(oldkeys, epi)->hash = DKIX_EMPTY; // to avoid decref
|
|
epi += 1;
|
|
|
|
}
|
|
|
|
}
|
|
numba_dictkeys_free(oldkeys);
|
|
|
|
build_indices(d->keys, numentries);
|
|
d->keys->usable -= numentries;
|
|
d->keys->nentries = numentries;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
Adapted from CPython delitem_common
|
|
*/
|
|
int
|
|
numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix)
|
|
{
|
|
Py_ssize_t hashpos;
|
|
NB_DictEntry *ep;
|
|
NB_DictKeys *dk = d->keys;
|
|
|
|
hashpos = lookdict_index(dk, hash, ix);
|
|
assert(hashpos >= 0);
|
|
|
|
d->used -= 1;
|
|
ep = get_entry(dk, ix);
|
|
set_index(dk, hashpos, DKIX_DUMMY);
|
|
|
|
/* decref */
|
|
dk_decref_key(dk, entry_get_key(dk, ep));
|
|
dk_decref_val(dk, entry_get_val(dk, ep));
|
|
|
|
/* zero the entries */
|
|
zero_key(dk, entry_get_key(dk, ep));
|
|
zero_val(dk, entry_get_val(dk, ep));
|
|
ep->hash = DKIX_EMPTY; // to mark it as empty;
|
|
|
|
return OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* Adapted from dict_popitem
|
|
*
|
|
*/
|
|
int
|
|
numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes)
|
|
{
|
|
Py_ssize_t i, j;
|
|
char *key_ptr, *val_ptr;
|
|
NB_DictEntry *ep = NULL;
|
|
|
|
if (d->used == 0) {
|
|
return ERR_DICT_EMPTY;
|
|
}
|
|
|
|
/* Pop last item */
|
|
i = d->keys->nentries - 1;
|
|
while (i >= 0 && (ep = get_entry(d->keys, i))->hash == DKIX_EMPTY ) {
|
|
i--;
|
|
}
|
|
assert(i >= 0);
|
|
|
|
j = lookdict_index(d->keys, ep->hash, i);
|
|
assert(j >= 0);
|
|
assert(get_index(d->keys, j) == i);
|
|
set_index(d->keys, j, DKIX_DUMMY);
|
|
|
|
key_ptr = entry_get_key(d->keys, ep);
|
|
val_ptr = entry_get_val(d->keys, ep);
|
|
|
|
copy_key(d->keys, key_bytes, key_ptr);
|
|
copy_val(d->keys, val_bytes, val_ptr);
|
|
|
|
zero_key(d->keys, key_ptr);
|
|
zero_val(d->keys, val_ptr);
|
|
|
|
/* We can't dk_usable++ since there is DKIX_DUMMY in indices */
|
|
d->keys->nentries = i;
|
|
d->used--;
|
|
|
|
return OK;
|
|
}
|
|
|
|
void
|
|
numba_dict_dump(NB_Dict *d) {
|
|
long long i, j, k;
|
|
long long size, n;
|
|
char *cp;
|
|
NB_DictEntry *ep;
|
|
NB_DictKeys *dk = d->keys;
|
|
|
|
n = d->used;
|
|
size = dk->nentries;
|
|
|
|
printf("Dict dump\n");
|
|
printf(" key_size = %lld\n", (long long)d->keys->key_size);
|
|
printf(" val_size = %lld\n", (long long)d->keys->val_size);
|
|
|
|
for (i = 0, j = 0; i < size; i++) {
|
|
ep = get_entry(dk, i);
|
|
if (ep->hash != DKIX_EMPTY) {
|
|
long long hash = ep->hash;
|
|
printf(" key=");
|
|
for (cp=entry_get_key(dk, ep), k=0; k < d->keys->key_size; ++k, ++cp){
|
|
printf("%02x ", ((int)*cp) & 0xff);
|
|
}
|
|
printf(" hash=%llu value=", hash);
|
|
for (cp=entry_get_val(dk, ep), k=0; k < d->keys->val_size; ++k, ++cp){
|
|
printf("%02x ", ((int)*cp) & 0xff);
|
|
}
|
|
printf("\n");
|
|
j++;
|
|
}
|
|
}
|
|
printf("j = %lld; n = %lld\n", j, n);
|
|
assert(j == n);
|
|
}
|
|
|
|
size_t
|
|
numba_dict_iter_sizeof() {
|
|
return sizeof(NB_DictIter);
|
|
}
|
|
|
|
void
|
|
numba_dict_iter(NB_DictIter *it, NB_Dict *d) {
|
|
it->parent = d;
|
|
it->parent_keys = d->keys;
|
|
it->size = d->used;
|
|
it->pos = 0;
|
|
}
|
|
|
|
int
|
|
numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr) {
|
|
/* Detect dictionary mutation during iteration */
|
|
NB_DictKeys *dk;
|
|
if (it->parent->keys != it->parent_keys ||
|
|
it->parent->used != it->size) {
|
|
return ERR_DICT_MUTATED;
|
|
}
|
|
dk = it->parent_keys;
|
|
while ( it->pos < dk->nentries ) {
|
|
NB_DictEntry *ep = get_entry(dk, it->pos++);
|
|
if ( ep->hash != DKIX_EMPTY ) {
|
|
*key_ptr = entry_get_key(dk, ep);
|
|
*val_ptr = entry_get_val(dk, ep);
|
|
return OK;
|
|
}
|
|
}
|
|
return ERR_ITER_EXHAUSTED;
|
|
}
|
|
|
|
int
|
|
numba_dict_insert_ez(
|
|
NB_Dict *d,
|
|
const char *key_bytes,
|
|
Py_hash_t hash,
|
|
const char *val_bytes
|
|
)
|
|
{
|
|
STACK_ALLOC(char, old, d->keys->val_size);
|
|
return numba_dict_insert(d, key_bytes, hash, val_bytes, old);
|
|
}
|
|
|
|
|
|
/* Allocate a new dictionary with enough space to hold n_keys without resizes */
|
|
int
|
|
numba_dict_new_sized(NB_Dict **out, Py_ssize_t n_keys, Py_ssize_t key_size, Py_ssize_t val_size) {
|
|
|
|
/* Respect D_MINSIZE */
|
|
if (n_keys <= USABLE_FRACTION(D_MINSIZE)) {
|
|
return numba_dict_new(out, D_MINSIZE, key_size, val_size);
|
|
}
|
|
|
|
/* Adjust for load factor */
|
|
Py_ssize_t size = INV_USABLE_FRACTION(n_keys) - 1;
|
|
|
|
/* Round up size to the nearest power of 2. */
|
|
for (unsigned int shift = 1; shift < sizeof(Py_ssize_t) * CHAR_BIT; shift <<= 1) {
|
|
size |= (size >> shift);
|
|
}
|
|
size++;
|
|
|
|
/* Handle overflows */
|
|
if (size <= 0) {
|
|
return ERR_NO_MEMORY;
|
|
}
|
|
|
|
return numba_dict_new(out, size, key_size, val_size);
|
|
}
|
|
|
|
|
|
void
|
|
numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods)
|
|
{
|
|
memcpy(&d->keys->methods, methods, sizeof(type_based_methods_table));
|
|
}
|
|
|
|
|
|
#define CHECK(CASE) { \
|
|
if ( !(CASE) ) { \
|
|
printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \
|
|
return 1; \
|
|
} \
|
|
}
|
|
|
|
int
|
|
numba_test_dict(void) {
|
|
NB_Dict *d;
|
|
int status;
|
|
Py_ssize_t ix;
|
|
Py_ssize_t usable;
|
|
Py_ssize_t it_count;
|
|
const char *it_key, *it_val;
|
|
NB_DictIter iter;
|
|
|
|
#if defined(_MSC_VER)
|
|
/* So that VS2008 compiler is happy */
|
|
char *got_key, *got_value;
|
|
got_key = _alloca(4);
|
|
got_value = _alloca(8);
|
|
#else
|
|
char got_key[4];
|
|
char got_value[8];
|
|
#endif
|
|
puts("test_dict");
|
|
|
|
status = numba_dict_new(&d, D_MINSIZE, 4, 8);
|
|
CHECK(status == OK);
|
|
CHECK(d->keys->size == D_MINSIZE);
|
|
CHECK(d->keys->key_size == 4);
|
|
CHECK(d->keys->val_size == 8);
|
|
CHECK(ix_size(d->keys->size) == 1);
|
|
printf("aligned_size(index_size * size) = %d\n", (int)(aligned_size(ix_size(d->keys->size) * d->keys->size)));
|
|
|
|
printf("d %p\n", d);
|
|
printf("d->usable = %u\n", (int)d->keys->usable);
|
|
usable = d->keys->usable;
|
|
printf("d[0] %d\n", (int)((char*)get_entry(d->keys, 0) - (char*)d->keys));
|
|
CHECK ((char*)get_entry(d->keys, 0) - (char*)d->keys->indices == d->keys->entry_offset);
|
|
printf("d[1] %d\n", (int)((char*)get_entry(d->keys, 1) - (char*)d->keys));
|
|
CHECK ((char*)get_entry(d->keys, 1) - (char*)d->keys->indices == d->keys->entry_offset + d->keys->entry_size);
|
|
|
|
ix = numba_dict_lookup(d, "bef", 0xbeef, got_value);
|
|
printf("ix = %d\n", (int)ix);
|
|
CHECK (ix == DKIX_EMPTY);
|
|
|
|
// insert 1st key
|
|
status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 1);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert same key
|
|
status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value);
|
|
CHECK (status == OK_REPLACED);
|
|
printf("got_value %s\n", got_value);
|
|
CHECK (d->used == 1);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert 2nd key
|
|
status = numba_dict_insert(d, "beg", 0xbeef, "1234568", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 2);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert 3rd key
|
|
status = numba_dict_insert(d, "beh", 0xcafe, "1234569", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 3);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// replace key "bef"'s value
|
|
status = numba_dict_insert(d, "bef", 0xbeef, "7654321", got_value);
|
|
CHECK (status == OK_REPLACED);
|
|
CHECK (d->used == 3);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert 4th key
|
|
status = numba_dict_insert(d, "bei", 0xcafe, "0_0_0_1", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 4);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert 5th key
|
|
status = numba_dict_insert(d, "bej", 0xcafe, "0_0_0_2", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 5);
|
|
CHECK (d->keys->usable == usable - d->used);
|
|
|
|
// insert 6th key & triggers resize
|
|
status = numba_dict_insert(d, "bek", 0xcafe, "0_0_0_3", got_value);
|
|
CHECK (status == OK);
|
|
CHECK (d->used == 6);
|
|
CHECK (d->keys->usable == USABLE_FRACTION(d->keys->size) - d->used);
|
|
|
|
// Dump
|
|
numba_dict_dump(d);
|
|
|
|
// Make sure everything are still in there
|
|
ix = numba_dict_lookup(d, "bef", 0xbeef, got_value);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "7654321", d->keys->val_size));
|
|
|
|
ix = numba_dict_lookup(d, "beg", 0xbeef, got_value);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "1234567", d->keys->val_size));
|
|
|
|
ix = numba_dict_lookup(d, "beh", 0xcafe, got_value);
|
|
printf("ix = %d\n", (int)ix);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "1234569", d->keys->val_size));
|
|
|
|
ix = numba_dict_lookup(d, "bei", 0xcafe, got_value);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "0_0_0_1", d->keys->val_size));
|
|
|
|
ix = numba_dict_lookup(d, "bej", 0xcafe, got_value);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "0_0_0_2", d->keys->val_size));
|
|
|
|
ix = numba_dict_lookup(d, "bek", 0xcafe, got_value);
|
|
CHECK (ix >= 0);
|
|
CHECK (memcpy(got_value, "0_0_0_3", d->keys->val_size));
|
|
|
|
// Test delete
|
|
ix = numba_dict_lookup(d, "beg", 0xbeef, got_value);
|
|
status = numba_dict_delitem(d, 0xbeef, ix);
|
|
CHECK (status == OK);
|
|
|
|
ix = numba_dict_lookup(d, "beg", 0xbeef, got_value);
|
|
CHECK (ix == DKIX_EMPTY); // not found
|
|
|
|
ix = numba_dict_lookup(d, "bef", 0xbeef, got_value);
|
|
CHECK (ix >= 0);
|
|
ix = numba_dict_lookup(d, "beh", 0xcafe, got_value);
|
|
CHECK (ix >= 0);
|
|
|
|
|
|
// Test popitem
|
|
// They are always the last item
|
|
status = numba_dict_popitem(d, got_key, got_value);
|
|
CHECK(status == OK);
|
|
CHECK(memcmp("bek", got_key, d->keys->key_size) == 0);
|
|
CHECK(memcmp("0_0_0_3", got_value, d->keys->val_size) == 0);
|
|
|
|
status = numba_dict_popitem(d, got_key, got_value);
|
|
CHECK(status == OK);
|
|
CHECK(memcmp("bej", got_key, d->keys->key_size) == 0);
|
|
CHECK(memcmp("0_0_0_2", got_value, d->keys->val_size) == 0);
|
|
|
|
// Test iterator
|
|
CHECK( d->used > 0 );
|
|
numba_dict_iter(&iter, d);
|
|
it_count = 0;
|
|
while ( (status = numba_dict_iter_next(&iter, &it_key, &it_val)) == OK) {
|
|
it_count += 1; // valid items
|
|
CHECK(it_key != NULL);
|
|
CHECK(it_val != NULL);
|
|
}
|
|
|
|
CHECK(status == ERR_ITER_EXHAUSTED);
|
|
CHECK(d->used == it_count);
|
|
|
|
numba_dict_free(d);
|
|
|
|
/* numba_dict_new_sized() */
|
|
|
|
Py_ssize_t target_size;
|
|
Py_ssize_t n_keys;
|
|
|
|
// Test if minsize dict returned with n_keys=0
|
|
target_size = D_MINSIZE;
|
|
n_keys = 0;
|
|
|
|
numba_dict_new_sized(&d, n_keys, 1, 1);
|
|
CHECK(d->keys->size == target_size);
|
|
CHECK(d->keys->usable == USABLE_FRACTION(target_size));
|
|
numba_dict_free(d);
|
|
|
|
// Test sizing at power of 2 boundary
|
|
target_size = D_MINSIZE * 2;
|
|
n_keys = USABLE_FRACTION(target_size);
|
|
|
|
numba_dict_new_sized(&d, n_keys, 1, 1);
|
|
CHECK(d->keys->size == target_size);
|
|
CHECK(d->keys->usable == n_keys);
|
|
numba_dict_free(d);
|
|
|
|
target_size *= 2;
|
|
n_keys++;
|
|
|
|
numba_dict_new_sized(&d, n_keys, 1, 1);
|
|
CHECK(d->keys->size == target_size);
|
|
CHECK(d->keys->usable > n_keys);
|
|
CHECK(d->keys->usable == USABLE_FRACTION(target_size));
|
|
numba_dict_free(d);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#undef CHECK
|