/* The following is adapted from CPython3.7. The exact commit is: - https://github.com/python/cpython/blob/44467e8ea4cea390b0718702291b4cfe8ddd67ed/Objects/dictobject.c */ /* Dictionary object implementation using a hash table */ /* The distribution includes a separate file, Objects/dictnotes.txt, describing explorations into dictionary design and optimization. It covers typical dictionary use patterns, the parameters for tuning dictionaries, and several ideas for possible optimizations. */ /* PyDictKeysObject This implements the dictionary's hashtable. As of Python 3.6, this is compact and ordered. Basic idea is described here: * https://mail.python.org/pipermail/python-dev/2012-December/123028.html * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html layout: +---------------+ | dk_refcnt | | dk_size | | dk_lookup | | dk_usable | | dk_nentries | +---------------+ | dk_indices | | | +---------------+ | dk_entries | | | +---------------+ dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). Size of indices is dk_size. Type of each index in indices is vary on dk_size: * int8 for dk_size <= 128 * int16 for 256 <= dk_size <= 2**15 * int32 for 2**16 <= dk_size <= 2**31 * int64 for 2**32 <= dk_size dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size). DK_ENTRIES(dk) can be used to get pointer to entries. NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of dk_indices entry is signed integer and int16 is used for table which dk_size == 256. */ /* The DictObject can be in one of two forms. Either: A combined table: ma_values == NULL, dk_refcnt == 1. Values are stored in the me_value field of the PyDictKeysObject. Or: (Numba dev notes: split table logic is removed) A split table: ma_values != NULL, dk_refcnt >= 1 Values are stored in the ma_values array. Only string (unicode) keys are allowed. All dicts sharing same key must have same insertion order. There are four kinds of slots in the table (slot is index, and DK_ENTRIES(keys)[index] if index >= 0): 1. Unused. index == DKIX_EMPTY Does not hold an active (key, value) pair now and never did. Unused can transition to Active upon key insertion. This is each slot's initial state. 2. Active. index >= 0, me_key != NULL and me_value != NULL Holds an active (key, value) pair. Active can transition to Dummy or Pending upon key deletion (for combined and split tables respectively). This is the only case in which me_value != NULL. 3. Dummy. index == DKIX_DUMMY (combined only) Previously held an active (key, value) pair, but that was deleted and an active pair has not yet overwritten the slot. Dummy can transition to Active upon key insertion. Dummy slots cannot be made Unused again else the probe sequence in case of collision would have no way to know they were once active. 4. Pending. index >= 0, key != NULL, and value == NULL (split only) Not yet inserted in split-table. */ /* Preserving insertion order It's simple for combined table. Since dk_entries is mostly append only, we can get insertion order by just iterating dk_entries. One exception is .popitem(). It removes last item in dk_entries and decrement dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in dk_indices, we can't increment dk_usable even though dk_nentries is decremented. In split table, inserting into pending entry is allowed only for dk_entries[ix] where ix == mp->ma_used. Inserting into other index and deleting item cause converting the dict to the combined table. */ /* D_MINSIZE (adapted from PyDict_MINSIZE) * is the starting size for any new dict. * 8 allows dicts with no more than 5 active entries; experiments suggested * this suffices for the majority of dicts (consisting mostly of usually-small * dicts created to pass keyword arguments). * Making this 8, rather than 4 reduces the number of resizes for most * dictionaries, without any significant extra memory use. */ #define D_MINSIZE 8 #include "dictobject.h" #if defined(_MSC_VER) # if _MSC_VER <= 1900 /* Visual Studio 2014 */ typedef __int8 int8_t; typedef __int16 int16_t; typedef __int32 int32_t; typedef __int64 int64_t; # endif /* Use _alloca() to dynamically allocate on the stack on MSVC */ #define STACK_ALLOC(Type, Name, Size) Type * const Name = _alloca(Size); #else #define STACK_ALLOC(Type, Name, Size) Type Name[Size]; #endif /*[clinic input] class dict "PyDictObject *" "&PyDict_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/ /* To ensure the lookup algorithm terminates, there must be at least one Unused slot (NULL key) in the table. To avoid slowing down lookups on a near-full table, we resize the table when it's USABLE_FRACTION (currently two-thirds) full. */ #define PERTURB_SHIFT 5 /* Major subtleties ahead: Most hash schemes depend on having a "good" hash function, in the sense of simulating randomness. Python doesn't: its most important hash functions (for ints) are very regular in common cases: >>>[hash(i) for i in range(4)] [0, 1, 2, 3] This isn't necessarily bad! To the contrary, in a table of size 2**i, taking the low-order i bits as the initial table index is extremely fast, and there are no collisions at all for dicts indexed by a contiguous range of ints. So this gives better-than-random behavior in common cases, and that's very desirable. OTOH, when collisions occur, the tendency to fill contiguous slices of the hash table makes a good collision resolution strategy crucial. Taking only the last i bits of the hash code is also vulnerable: for example, consider the list [i << 16 for i in range(20000)] as a set of keys. Since ints are their own hash codes, and this fits in a dict of size 2**15, the last 15 bits of every hash code are all 0: they *all* map to the same table index. But catering to unusual cases should not slow the usual ones, so we just take the last i bits anyway. It's up to collision resolution to do the rest. If we *usually* find the key we're looking for on the first try (and, it turns out, we usually do -- the table load factor is kept under 2/3, so the odds are solidly in our favor), then it makes best sense to keep the initial index computation dirt cheap. The first half of collision resolution is to visit table indices via this recurrence: j = ((5*j) + 1) mod 2**i For any initial j in range(2**i), repeating that 2**i times generates each int in range(2**i) exactly once (see any text on random-number generation for proof). By itself, this doesn't help much: like linear probing (setting j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed order. This would be bad, except that's not the only thing we do, and it's actually *good* in the common cases where hash keys are consecutive. In an example that's really too small to make this entirely clear, for a table of size 2**3 the order of indices is: 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating] If two things come in at index 5, the first place we look after is index 2, not 6, so if another comes in at index 6 the collision at 5 didn't hurt it. Linear probing is deadly in this case because there the fixed probe order is the *same* as the order consecutive keys are likely to arrive. But it's extremely unlikely hash codes will follow a 5*j+1 recurrence by accident, and certain that consecutive hash codes do not. The other half of the strategy is to get the other bits of the hash code into play. This is done by initializing a (unsigned) vrbl "perturb" to the full hash code, and changing the recurrence to: perturb >>= PERTURB_SHIFT; j = (5*j) + 1 + perturb; use j % 2**i as the next table index; Now the probe sequence depends (eventually) on every bit in the hash code, and the pseudo-scrambling property of recurring on 5*j+1 is more valuable, because it quickly magnifies small differences in the bits that didn't affect the initial index. Note that because perturb is unsigned, if the recurrence is executed often enough perturb eventually becomes and remains 0. At that point (very rarely reached) the recurrence is on (just) 5*j+1 again, and that's certain to find an empty slot eventually (since it generates every int in range(2**i), and we make sure there's always at least one empty slot). Selecting a good value for PERTURB_SHIFT is a balancing act. You want it small so that the high bits of the hash code continue to affect the probe sequence across iterations; but you want it large so that in really bad cases the high-order hash bits have an effect on early iterations. 5 was "the best" in minimizing total collisions across experiments Tim Peters ran (on both normal and pathological cases), but 4 and 6 weren't significantly worse. Historical: Reimer Behrends contributed the idea of using a polynomial-based approach, using repeated multiplication by x in GF(2**n) where an irreducible polynomial for each table size was chosen such that x was a primitive root. Christian Tismer later extended that to use division by x instead, as an efficient way to get the high bits of the hash code into play. This scheme also gave excellent collision statistics, but was more expensive: two if-tests were required inside the loop; computing "the next" index took about the same number of operations but without as much potential parallelism (e.g., computing 5*j can go on at the same time as computing 1+perturb in the above, and then shifting perturb can be done while the table index is being masked); and the PyDictObject struct required a member to hold the table's polynomial. In Tim's experiments the current scheme ran faster, produced equally good collision statistics, needed less code & used less memory. */ #define DKIX_EMPTY (-1) #define DKIX_DUMMY (-2) /* Used internally */ #define DKIX_ERROR (-3) typedef enum { OK = 0, OK_REPLACED = 1, ERR_NO_MEMORY = -1, ERR_DICT_MUTATED = -2, ERR_ITER_EXHAUSTED = -3, ERR_DICT_EMPTY = -4, ERR_CMP_FAILED = -5, } Status; #ifndef NDEBUG static int mem_cmp_zeros(void *obj, size_t n){ int diff = 0; char *mem = obj; char *it; for (it = mem; it < mem + n; ++it) { if (*it != 0) diff += 1; } return diff; } #endif #define D_MASK(dk) ((dk)->size-1) #define D_GROWTH_RATE(d) ((d)->used*3) static int ix_size(Py_ssize_t size) { if ( size < 0xff ) return 1; if ( size < 0xffff ) return 2; if ( size < 0xffffffff ) return 4; return sizeof(int64_t); } #ifndef NDEBUG /* NOTE: This function is only used in assert()s */ /* Align pointer *ptr* to pointer size */ static void* aligned_pointer(void *ptr) { return (void*)aligned_size((size_t)ptr); } #endif /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */ static Py_ssize_t get_index(NB_DictKeys *dk, Py_ssize_t i) { Py_ssize_t s = dk->size; Py_ssize_t ix; if (s <= 0xff) { int8_t *indices = (int8_t*)(dk->indices); assert (i < dk->size); ix = indices[i]; } else if (s <= 0xffff) { int16_t *indices = (int16_t*)(dk->indices); ix = indices[i]; } #if SIZEOF_VOID_P > 4 else if (s > 0xffffffff) { int64_t *indices = (int64_t*)(dk->indices); ix = indices[i]; } #endif else { int32_t *indices = (int32_t*)(dk->indices); ix = indices[i]; } assert(ix >= DKIX_DUMMY); return ix; } /* write to indices. */ static void set_index(NB_DictKeys *dk, Py_ssize_t i, Py_ssize_t ix) { Py_ssize_t s = dk->size; assert(ix >= DKIX_DUMMY); if (s <= 0xff) { int8_t *indices = (int8_t*)(dk->indices); assert(ix <= 0x7f); indices[i] = (char)ix; } else if (s <= 0xffff) { int16_t *indices = (int16_t*)(dk->indices); assert(ix <= 0x7fff); indices[i] = (int16_t)ix; } #if SIZEOF_VOID_P > 4 else if (s > 0xffffffff) { int64_t *indices = (int64_t*)(dk->indices); indices[i] = ix; } #endif else { int32_t *indices = (int32_t*)(dk->indices); assert(ix <= 0x7fffffff); indices[i] = (int32_t)ix; } } /* USABLE_FRACTION is the maximum dictionary load. * Increasing this ratio makes dictionaries more dense resulting in more * collisions. Decreasing it improves sparseness at the expense of spreading * indices over more cache lines and at the cost of total memory consumed. * * USABLE_FRACTION must obey the following: * (0 < USABLE_FRACTION(n) < n) for all n >= 2 * * USABLE_FRACTION should be quick to calculate. * Fractions around 1/2 to 2/3 seem to work well in practice. */ #define USABLE_FRACTION(n) (((n) << 1)/3) // ratio: 2/3 /* Alternative fraction that is otherwise close enough to 2n/3 to make * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. * 32 * 2/3 = 21, 32 * 5/8 = 20. * Its advantage is that it is faster to compute on machines with slow division. * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) // ratio: 5/8 */ /* INV_USABLE_FRACTION gives the inverse of USABLE_FRACTION. * Used for sizing a new dictionary to a specified number of keys. * * NOTE: If the denominator of the USABLE_FRACTION ratio is not a power * of 2, must add 1 to the result of the inverse for correct sizing. * * For example, when USABLE_FRACTION ratio = 5/8 (8 is a power of 2): * #define INV_USABLE_FRACTION(n) (((n) << 3)/5) // inv_ratio: 8/5 * * When USABLE_FRACTION ratio = 5/7 (7 is not a power of 2): * #define INV_USABLE_FRACTION(n) ((7*(n))/5 + 1) // inv_ratio: 7/5 */ #define INV_USABLE_FRACTION(n) ((n) + ((n) >> 1) + 1) // inv_ratio: 3/2 /* GROWTH_RATE. Growth rate upon hitting maximum load. * Currently set to used*3. * This means that dicts double in size when growing without deletions, * but have more head room when the number of deletions is on a par with the * number of insertions. See also bpo-17563 and bpo-33205. * * GROWTH_RATE was set to used*4 up to version 3.2. * GROWTH_RATE was set to used*2 in version 3.3.0 * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0. */ #define GROWTH_RATE(d) ((d)->ma_used*3) static NB_DictEntry* get_entry(NB_DictKeys *dk, Py_ssize_t idx) { Py_ssize_t offset; char *ptr; assert (idx < dk->size); offset = idx * dk->entry_size; ptr = dk->indices + dk->entry_offset + offset; return (NB_DictEntry*)ptr; } static void zero_key(NB_DictKeys *dk, char *data){ memset(data, 0, dk->key_size); } static void zero_val(NB_DictKeys *dk, char *data){ memset(data, 0, dk->val_size); } static void copy_key(NB_DictKeys *dk, char *dst, const char *src){ memcpy(dst, src, dk->key_size); } static void copy_val(NB_DictKeys *dk, char *dst, const char *src){ memcpy(dst, src, dk->val_size); } /* Returns -1 for error; 0 for not equal; 1 for equal */ static int key_equal(NB_DictKeys *dk, const char *lhs, const char *rhs) { if ( dk->methods.key_equal ) { return dk->methods.key_equal(lhs, rhs); } else { return memcmp(lhs, rhs, dk->key_size) == 0; } } static char * entry_get_key(NB_DictKeys *dk, NB_DictEntry* entry) { char * out = entry->keyvalue; assert (out == aligned_pointer(out)); return out; } static char * entry_get_val(NB_DictKeys *dk, NB_DictEntry* entry) { char * out = entry_get_key(dk, entry) + aligned_size(dk->key_size); assert (out == aligned_pointer(out)); return out; } static void dk_incref_key(NB_DictKeys *dk, const char *key) { if ( dk->methods.key_incref ) { dk->methods.key_incref(key); } } static void dk_decref_key(NB_DictKeys *dk, const char *key) { if ( dk->methods.key_decref ) { dk->methods.key_decref(key); } } static void dk_incref_val(NB_DictKeys *dk, const char *val) { if ( dk->methods.value_incref ) { dk->methods.value_incref(val); } } static void dk_decref_val(NB_DictKeys *dk, const char *val) { if ( dk->methods.value_decref ) { dk->methods.value_decref(val); } } void numba_dictkeys_free(NB_DictKeys *dk) { /* Clear all references from the entries */ Py_ssize_t i; NB_DictEntry *ep; for (i = 0; i < dk->nentries; i++) { ep = get_entry(dk, i); if (ep->hash != DKIX_EMPTY) { dk_decref_key(dk, entry_get_key(dk, ep)); dk_decref_val(dk, entry_get_val(dk, ep)); } } /* Deallocate */ free(dk); } void numba_dict_free(NB_Dict *d) { numba_dictkeys_free(d->keys); free(d); } Py_ssize_t numba_dict_length(NB_Dict *d) { return d->used; } /* Allocate new dictionary keys Adapted from CPython's new_keys_object(). */ int numba_dictkeys_new(NB_DictKeys **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { Py_ssize_t usable = USABLE_FRACTION(size); Py_ssize_t index_size = ix_size(size); Py_ssize_t entry_size = aligned_size(sizeof(NB_DictEntry) + aligned_size(key_size) + aligned_size(val_size)); Py_ssize_t entry_offset = aligned_size(index_size * size); Py_ssize_t alloc_size = sizeof(NB_DictKeys) + entry_offset + entry_size * usable; NB_DictKeys *dk = malloc(aligned_size(alloc_size)); if (!dk) return ERR_NO_MEMORY; assert ( size >= D_MINSIZE ); dk->size = size; dk->usable = usable; dk->nentries = 0; dk->key_size = key_size; dk->val_size = val_size; dk->entry_offset = entry_offset; dk->entry_size = entry_size; assert (aligned_pointer(dk->indices) == dk->indices ); /* Ensure that the method table is all nulls */ memset(&dk->methods, 0x00, sizeof(type_based_methods_table)); /* Ensure hash is (-1) for empty entry */ memset(dk->indices, 0xff, entry_offset + entry_size * usable); *out = dk; return OK; } /* Allocate new dictionary */ int numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { NB_DictKeys *dk; NB_Dict *d; int status = numba_dictkeys_new(&dk, size, key_size, val_size); if (status != OK) return status; d = malloc(sizeof(NB_Dict)); if (!d) { numba_dictkeys_free(dk); return ERR_NO_MEMORY; } d->used = 0; d->keys = dk; *out = d; return OK; } /* Adapted from CPython lookdict_index(). Search index of hash table from offset of entry table */ static Py_ssize_t lookdict_index(NB_DictKeys *dk, Py_hash_t hash, Py_ssize_t index) { size_t mask = D_MASK(dk); size_t perturb = (size_t)hash; size_t i = (size_t)hash & mask; for (;;) { Py_ssize_t ix = get_index(dk, i); if (ix == index) { return i; } if (ix == DKIX_EMPTY) { return DKIX_EMPTY; } perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); } assert(0 && "unreachable"); } /* Adapted from the CPython3.7 lookdict(). The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. Open addressing is preferred over chaining since the link overhead for chaining would be substantial (100% with typical malloc overhead). The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained earlier. All arithmetic on hash should ignore overflow. The details in this version are due to Tim Peters, building on many past contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and Christian Tismer. lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a comparison raises an exception. lookdict_unicode() below is specialized to string keys, comparison of which can never raise an exception; that function can never return DKIX_ERROR when key is string. Otherwise, it falls back to lookdict(). lookdict_unicode_nodummy is further specialized for string keys that cannot be the value. For both, when the key isn't found a DKIX_EMPTY is returned. */ Py_ssize_t numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes) { NB_DictKeys *dk = d->keys; size_t mask = D_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; for (;;) { Py_ssize_t ix = get_index(dk, i); if (ix == DKIX_EMPTY) { zero_val(dk, oldval_bytes); return ix; } if (ix >= 0) { NB_DictEntry *ep = get_entry(dk, ix); const char *startkey = NULL; if (ep->hash == hash) { int cmp; startkey = entry_get_key(dk, ep); cmp = key_equal(dk, startkey, key_bytes); if (cmp < 0) { // error'ed in comparison memset(oldval_bytes, 0, dk->val_size); return DKIX_ERROR; } if (cmp > 0) { // key is equal; retrieve the value. copy_val(dk, oldval_bytes, entry_get_val(dk, ep)); return ix; } } } perturb >>= PERTURB_SHIFT; i = (i*5 + perturb + 1) & mask; } assert(0 && "unreachable"); } /* Internal function to find slot for an item from its hash when it is known that the key is not present in the dict. The dict must be combined. */ static Py_ssize_t find_empty_slot(NB_DictKeys *dk, Py_hash_t hash){ size_t mask; size_t i; Py_ssize_t ix; size_t perturb; assert(dk != NULL); mask = D_MASK(dk); i = hash & mask; ix = get_index(dk, i); for (perturb = hash; ix >= 0;) { perturb >>= PERTURB_SHIFT; i = (i*5 + perturb + 1) & mask; ix = get_index(dk, i); } return i; } static int insertion_resize(NB_Dict *d) { return numba_dict_resize(d, D_GROWTH_RATE(d)); } int numba_dict_insert( NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes, char *oldval_bytes ) { NB_DictKeys *dk = d->keys; Py_ssize_t ix = numba_dict_lookup(d, key_bytes, hash, oldval_bytes); if (ix == DKIX_ERROR) { // exception in key comparison in lookup. return ERR_CMP_FAILED; } if (ix == DKIX_EMPTY) { /* Insert into new slot */ Py_ssize_t hashpos; NB_DictEntry *ep; if (dk->usable <= 0) { /* Need to resize */ if (insertion_resize(d) != OK) return ERR_NO_MEMORY; else dk = d->keys; // reload } hashpos = find_empty_slot(dk, hash); ep = get_entry(dk, dk->nentries); set_index(dk, hashpos, dk->nentries); copy_key(dk, entry_get_key(dk, ep), key_bytes); assert ( hash != -1 ); ep->hash = hash; copy_val(dk, entry_get_val(dk, ep), val_bytes); /* incref */ dk_incref_key(dk, key_bytes); dk_incref_val(dk, val_bytes); d->used += 1; dk->usable -= 1; dk->nentries += 1; assert (dk->usable >= 0); return OK; } else { /* Replace existing value in the slot at ix */ /* decref old value */ dk_decref_val(dk, oldval_bytes); // Replace the previous value copy_val(dk, entry_get_val(dk, get_entry(dk, ix)), val_bytes); /* incref */ dk_incref_val(dk, val_bytes); return OK_REPLACED; } } /* Adapted from build_indices(). Internal routine used by dictresize() to build a hashtable of entries. */ void build_indices(NB_DictKeys *keys, Py_ssize_t n) { size_t mask = (size_t)D_MASK(keys); Py_ssize_t ix; for (ix = 0; ix != n; ix++) { size_t perturb; Py_hash_t hash = get_entry(keys, ix)->hash; size_t i = hash & mask; for (perturb = hash; get_index(keys, i) != DKIX_EMPTY;) { perturb >>= PERTURB_SHIFT; i = mask & (i*5 + perturb + 1); } set_index(keys, i, ix); } } /* Adapted from CPython dictresize(). Restructure the table by allocating a new table and reinserting all items again. When entries have been deleted, the new table may actually be smaller than the old one. If a table is split (its keys and hashes are shared, its values are not), then the values are temporarily copied into the table, it is resized as a combined table, then the me_value slots in the old table are NULLed out. After resizing a table is always combined, but can be resplit by make_keys_shared(). */ int numba_dict_resize(NB_Dict *d, Py_ssize_t minsize) { Py_ssize_t newsize, numentries; NB_DictKeys *oldkeys; int status; /* Find the smallest table size > minused. */ for (newsize = D_MINSIZE; newsize < minsize && newsize > 0; newsize <<= 1) ; if (newsize <= 0) { return ERR_NO_MEMORY; } oldkeys = d->keys; /* NOTE: Current odict checks mp->ma_keys to detect resize happen. * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. * TODO: Try reusing oldkeys when reimplement odict. */ /* Allocate a new table. */ status = numba_dictkeys_new( &d->keys, newsize, oldkeys->key_size, oldkeys->val_size ); if (status != OK) { d->keys = oldkeys; return status; } // New table must be large enough. assert(d->keys->usable >= d->used); // Copy method table memcpy(&d->keys->methods, &oldkeys->methods, sizeof(type_based_methods_table)); numentries = d->used; if (oldkeys->nentries == numentries) { NB_DictEntry *oldentries, *newentries; oldentries = get_entry(oldkeys, 0); newentries = get_entry(d->keys, 0); memcpy(newentries, oldentries, numentries * oldkeys->entry_size); // to avoid decref memset(oldentries, 0xff, numentries * oldkeys->entry_size); } else { Py_ssize_t i; size_t epi = 0; for (i=0; ihash == (-1) hash means it is empty Here, we skip until a non empty entry is encountered. */ while( get_entry(oldkeys, epi)->hash == DKIX_EMPTY ) { assert( mem_cmp_zeros(entry_get_val(oldkeys, get_entry(oldkeys, epi)), oldkeys->val_size) == 0 ); epi += 1; } memcpy( get_entry(d->keys, i), get_entry(oldkeys, epi), oldkeys->entry_size ); get_entry(oldkeys, epi)->hash = DKIX_EMPTY; // to avoid decref epi += 1; } } numba_dictkeys_free(oldkeys); build_indices(d->keys, numentries); d->keys->usable -= numentries; d->keys->nentries = numentries; return OK; } /* Adapted from CPython delitem_common */ int numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix) { Py_ssize_t hashpos; NB_DictEntry *ep; NB_DictKeys *dk = d->keys; hashpos = lookdict_index(dk, hash, ix); assert(hashpos >= 0); d->used -= 1; ep = get_entry(dk, ix); set_index(dk, hashpos, DKIX_DUMMY); /* decref */ dk_decref_key(dk, entry_get_key(dk, ep)); dk_decref_val(dk, entry_get_val(dk, ep)); /* zero the entries */ zero_key(dk, entry_get_key(dk, ep)); zero_val(dk, entry_get_val(dk, ep)); ep->hash = DKIX_EMPTY; // to mark it as empty; return OK; } /** * Adapted from dict_popitem * */ int numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes) { Py_ssize_t i, j; char *key_ptr, *val_ptr; NB_DictEntry *ep = NULL; if (d->used == 0) { return ERR_DICT_EMPTY; } /* Pop last item */ i = d->keys->nentries - 1; while (i >= 0 && (ep = get_entry(d->keys, i))->hash == DKIX_EMPTY ) { i--; } assert(i >= 0); j = lookdict_index(d->keys, ep->hash, i); assert(j >= 0); assert(get_index(d->keys, j) == i); set_index(d->keys, j, DKIX_DUMMY); key_ptr = entry_get_key(d->keys, ep); val_ptr = entry_get_val(d->keys, ep); copy_key(d->keys, key_bytes, key_ptr); copy_val(d->keys, val_bytes, val_ptr); zero_key(d->keys, key_ptr); zero_val(d->keys, val_ptr); /* We can't dk_usable++ since there is DKIX_DUMMY in indices */ d->keys->nentries = i; d->used--; return OK; } void numba_dict_dump(NB_Dict *d) { long long i, j, k; long long size, n; char *cp; NB_DictEntry *ep; NB_DictKeys *dk = d->keys; n = d->used; size = dk->nentries; printf("Dict dump\n"); printf(" key_size = %lld\n", (long long)d->keys->key_size); printf(" val_size = %lld\n", (long long)d->keys->val_size); for (i = 0, j = 0; i < size; i++) { ep = get_entry(dk, i); if (ep->hash != DKIX_EMPTY) { long long hash = ep->hash; printf(" key="); for (cp=entry_get_key(dk, ep), k=0; k < d->keys->key_size; ++k, ++cp){ printf("%02x ", ((int)*cp) & 0xff); } printf(" hash=%llu value=", hash); for (cp=entry_get_val(dk, ep), k=0; k < d->keys->val_size; ++k, ++cp){ printf("%02x ", ((int)*cp) & 0xff); } printf("\n"); j++; } } printf("j = %lld; n = %lld\n", j, n); assert(j == n); } size_t numba_dict_iter_sizeof() { return sizeof(NB_DictIter); } void numba_dict_iter(NB_DictIter *it, NB_Dict *d) { it->parent = d; it->parent_keys = d->keys; it->size = d->used; it->pos = 0; } int numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr) { /* Detect dictionary mutation during iteration */ NB_DictKeys *dk; if (it->parent->keys != it->parent_keys || it->parent->used != it->size) { return ERR_DICT_MUTATED; } dk = it->parent_keys; while ( it->pos < dk->nentries ) { NB_DictEntry *ep = get_entry(dk, it->pos++); if ( ep->hash != DKIX_EMPTY ) { *key_ptr = entry_get_key(dk, ep); *val_ptr = entry_get_val(dk, ep); return OK; } } return ERR_ITER_EXHAUSTED; } int numba_dict_insert_ez( NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes ) { STACK_ALLOC(char, old, d->keys->val_size); return numba_dict_insert(d, key_bytes, hash, val_bytes, old); } /* Allocate a new dictionary with enough space to hold n_keys without resizes */ int numba_dict_new_sized(NB_Dict **out, Py_ssize_t n_keys, Py_ssize_t key_size, Py_ssize_t val_size) { /* Respect D_MINSIZE */ if (n_keys <= USABLE_FRACTION(D_MINSIZE)) { return numba_dict_new(out, D_MINSIZE, key_size, val_size); } /* Adjust for load factor */ Py_ssize_t size = INV_USABLE_FRACTION(n_keys) - 1; /* Round up size to the nearest power of 2. */ for (unsigned int shift = 1; shift < sizeof(Py_ssize_t) * CHAR_BIT; shift <<= 1) { size |= (size >> shift); } size++; /* Handle overflows */ if (size <= 0) { return ERR_NO_MEMORY; } return numba_dict_new(out, size, key_size, val_size); } void numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods) { memcpy(&d->keys->methods, methods, sizeof(type_based_methods_table)); } #define CHECK(CASE) { \ if ( !(CASE) ) { \ printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \ return 1; \ } \ } int numba_test_dict(void) { NB_Dict *d; int status; Py_ssize_t ix; Py_ssize_t usable; Py_ssize_t it_count; const char *it_key, *it_val; NB_DictIter iter; #if defined(_MSC_VER) /* So that VS2008 compiler is happy */ char *got_key, *got_value; got_key = _alloca(4); got_value = _alloca(8); #else char got_key[4]; char got_value[8]; #endif puts("test_dict"); status = numba_dict_new(&d, D_MINSIZE, 4, 8); CHECK(status == OK); CHECK(d->keys->size == D_MINSIZE); CHECK(d->keys->key_size == 4); CHECK(d->keys->val_size == 8); CHECK(ix_size(d->keys->size) == 1); printf("aligned_size(index_size * size) = %d\n", (int)(aligned_size(ix_size(d->keys->size) * d->keys->size))); printf("d %p\n", d); printf("d->usable = %u\n", (int)d->keys->usable); usable = d->keys->usable; printf("d[0] %d\n", (int)((char*)get_entry(d->keys, 0) - (char*)d->keys)); CHECK ((char*)get_entry(d->keys, 0) - (char*)d->keys->indices == d->keys->entry_offset); printf("d[1] %d\n", (int)((char*)get_entry(d->keys, 1) - (char*)d->keys)); CHECK ((char*)get_entry(d->keys, 1) - (char*)d->keys->indices == d->keys->entry_offset + d->keys->entry_size); ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); printf("ix = %d\n", (int)ix); CHECK (ix == DKIX_EMPTY); // insert 1st key status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); CHECK (status == OK); CHECK (d->used == 1); CHECK (d->keys->usable == usable - d->used); // insert same key status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); CHECK (status == OK_REPLACED); printf("got_value %s\n", got_value); CHECK (d->used == 1); CHECK (d->keys->usable == usable - d->used); // insert 2nd key status = numba_dict_insert(d, "beg", 0xbeef, "1234568", got_value); CHECK (status == OK); CHECK (d->used == 2); CHECK (d->keys->usable == usable - d->used); // insert 3rd key status = numba_dict_insert(d, "beh", 0xcafe, "1234569", got_value); CHECK (status == OK); CHECK (d->used == 3); CHECK (d->keys->usable == usable - d->used); // replace key "bef"'s value status = numba_dict_insert(d, "bef", 0xbeef, "7654321", got_value); CHECK (status == OK_REPLACED); CHECK (d->used == 3); CHECK (d->keys->usable == usable - d->used); // insert 4th key status = numba_dict_insert(d, "bei", 0xcafe, "0_0_0_1", got_value); CHECK (status == OK); CHECK (d->used == 4); CHECK (d->keys->usable == usable - d->used); // insert 5th key status = numba_dict_insert(d, "bej", 0xcafe, "0_0_0_2", got_value); CHECK (status == OK); CHECK (d->used == 5); CHECK (d->keys->usable == usable - d->used); // insert 6th key & triggers resize status = numba_dict_insert(d, "bek", 0xcafe, "0_0_0_3", got_value); CHECK (status == OK); CHECK (d->used == 6); CHECK (d->keys->usable == USABLE_FRACTION(d->keys->size) - d->used); // Dump numba_dict_dump(d); // Make sure everything are still in there ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "7654321", d->keys->val_size)); ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "1234567", d->keys->val_size)); ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); printf("ix = %d\n", (int)ix); CHECK (ix >= 0); CHECK (memcpy(got_value, "1234569", d->keys->val_size)); ix = numba_dict_lookup(d, "bei", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_1", d->keys->val_size)); ix = numba_dict_lookup(d, "bej", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_2", d->keys->val_size)); ix = numba_dict_lookup(d, "bek", 0xcafe, got_value); CHECK (ix >= 0); CHECK (memcpy(got_value, "0_0_0_3", d->keys->val_size)); // Test delete ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); status = numba_dict_delitem(d, 0xbeef, ix); CHECK (status == OK); ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); CHECK (ix == DKIX_EMPTY); // not found ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); CHECK (ix >= 0); ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); CHECK (ix >= 0); // Test popitem // They are always the last item status = numba_dict_popitem(d, got_key, got_value); CHECK(status == OK); CHECK(memcmp("bek", got_key, d->keys->key_size) == 0); CHECK(memcmp("0_0_0_3", got_value, d->keys->val_size) == 0); status = numba_dict_popitem(d, got_key, got_value); CHECK(status == OK); CHECK(memcmp("bej", got_key, d->keys->key_size) == 0); CHECK(memcmp("0_0_0_2", got_value, d->keys->val_size) == 0); // Test iterator CHECK( d->used > 0 ); numba_dict_iter(&iter, d); it_count = 0; while ( (status = numba_dict_iter_next(&iter, &it_key, &it_val)) == OK) { it_count += 1; // valid items CHECK(it_key != NULL); CHECK(it_val != NULL); } CHECK(status == ERR_ITER_EXHAUSTED); CHECK(d->used == it_count); numba_dict_free(d); /* numba_dict_new_sized() */ Py_ssize_t target_size; Py_ssize_t n_keys; // Test if minsize dict returned with n_keys=0 target_size = D_MINSIZE; n_keys = 0; numba_dict_new_sized(&d, n_keys, 1, 1); CHECK(d->keys->size == target_size); CHECK(d->keys->usable == USABLE_FRACTION(target_size)); numba_dict_free(d); // Test sizing at power of 2 boundary target_size = D_MINSIZE * 2; n_keys = USABLE_FRACTION(target_size); numba_dict_new_sized(&d, n_keys, 1, 1); CHECK(d->keys->size == target_size); CHECK(d->keys->usable == n_keys); numba_dict_free(d); target_size *= 2; n_keys++; numba_dict_new_sized(&d, n_keys, 1, 1); CHECK(d->keys->size == target_size); CHECK(d->keys->usable > n_keys); CHECK(d->keys->usable == USABLE_FRACTION(target_size)); numba_dict_free(d); return 0; } #undef CHECK