# cython: infer_types=True # cython: cdivision=True # cimport cython DEF EMPTY_KEY = 0 DEF DELETED_KEY = 1 cdef class PreshMap: """Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t. Uses open addressing with linear probing. Usage map = PreshMap() # Create a table map = PreshMap(initial_size=1024) # Create with initial size (efficiency) map[key] = value # Set a value to a key value = map[key] # Get a value given a key for key, value in map.items(): # Iterate over items len(map) # Get number of inserted keys """ def __init__(self, size_t initial_size=8): # Size must be power of two if initial_size == 0: initial_size = 8 if initial_size & (initial_size - 1) != 0: power = 1 while power < initial_size: power *= 2 initial_size = power self.mem = Pool() self.c_map = self.mem.alloc(1, sizeof(MapStruct)) map_init(self.mem, self.c_map, initial_size) property capacity: def __get__(self): return self.c_map.length def items(self): cdef key_t key cdef void* value cdef int i = 0 while map_iter(self.c_map, &i, &key, &value): yield key, value def keys(self): for key, _ in self.items(): yield key def values(self): for _, value in self.items(): yield value def pop(self, key_t key, default=None): cdef Result result = map_get_unless_missing(self.c_map, key) map_clear(self.c_map, key) if result.found: return result.value else: return default def __getitem__(self, key_t key): cdef Result result = map_get_unless_missing(self.c_map, key) if result.found: return result.value else: return None def __setitem__(self, key_t key, size_t value): map_set(self.mem, self.c_map, key, value) def __delitem__(self, key_t key): map_clear(self.c_map, key) def __len__(self): return self.c_map.filled def __contains__(self, key_t key): cdef Result result = map_get_unless_missing(self.c_map, key) return True if result.found else False def __iter__(self): for key in self.keys(): yield key cdef inline void* get(self, key_t key) nogil: return map_get(self.c_map, key) cdef void set(self, key_t key, void* value) except *: map_set(self.mem, self.c_map, key, value) cdef class PreshMapArray: """An array of hash tables that assume keys come pre-hashed. Each table uses open addressing with linear probing. """ def __init__(self, size_t length, size_t initial_size=8): self.mem = Pool() self.length = length self.maps = self.mem.alloc(length, sizeof(MapStruct)) for i in range(length): map_init(self.mem, &self.maps[i], initial_size) cdef inline void* get(self, size_t i, key_t key) nogil: return map_get(&self.maps[i], key) cdef void set(self, size_t i, key_t key, void* value) except *: map_set(self.mem, &self.maps[i], key, value) cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *: map_.length = length map_.filled = 0 map_.cells = mem.alloc(length, sizeof(Cell)) cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *: cdef Cell* cell if key == EMPTY_KEY: map_.value_for_empty_key = value map_.is_empty_key_set = True elif key == DELETED_KEY: map_.value_for_del_key = value map_.is_del_key_set = True else: cell = _find_cell_for_insertion(map_.cells, map_.length, key) if cell.key == EMPTY_KEY: map_.filled += 1 cell.key = key cell.value = value if (map_.filled + 1) * 5 >= (map_.length * 3): _resize(mem, map_) cdef void* map_get(const MapStruct* map_, const key_t key) nogil: if key == EMPTY_KEY: return map_.value_for_empty_key elif key == DELETED_KEY: return map_.value_for_del_key cdef Cell* cell = _find_cell(map_.cells, map_.length, key) return cell.value cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil: cdef Result result cdef Cell* cell result.found = 0 result.value = NULL if key == EMPTY_KEY: if map_.is_empty_key_set: result.found = 1 result.value = map_.value_for_empty_key elif key == DELETED_KEY: if map_.is_del_key_set: result.found = 1 result.value = map_.value_for_del_key else: cell = _find_cell(map_.cells, map_.length, key) if cell.key == key: result.found = 1 result.value = cell.value return result cdef void* map_clear(MapStruct* map_, const key_t key) nogil: if key == EMPTY_KEY: value = map_.value_for_empty_key if map_.is_empty_key_set else NULL map_.is_empty_key_set = False return value elif key == DELETED_KEY: value = map_.value_for_del_key if map_.is_del_key_set else NULL map_.is_del_key_set = False return value else: cell = _find_cell(map_.cells, map_.length, key) cell.key = DELETED_KEY # We shouldn't decrement the "filled" value here, as we're not actually # making "empty" values -- deleted values aren't quite the same. # Instead if we manage to insert into a deleted slot, we don't increment # the fill rate. return cell.value cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values, int n) nogil: cdef int i for i in range(n): values[i] = map_get(map_, keys[i]) cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil: '''Iterate over the filled items, setting the current place in i, and the key and value. Return False when iteration finishes. ''' cdef const Cell* cell while i[0] < map_.length: cell = &map_.cells[i[0]] i[0] += 1 if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY: key[0] = cell[0].key value[0] = cell[0].value return True # Remember to check for cells keyed by the special empty and deleted keys if i[0] == map_.length: i[0] += 1 if map_.is_empty_key_set: key[0] = EMPTY_KEY value[0] = map_.value_for_empty_key return True if i[0] == map_.length + 1: i[0] += 1 if map_.is_del_key_set: key[0] = DELETED_KEY value[0] = map_.value_for_del_key return True return False @cython.cdivision cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil: # Modulo for powers-of-two via bitwise & cdef key_t i = (key & (size - 1)) while cells[i].key != EMPTY_KEY and cells[i].key != key: i = (i + 1) & (size - 1) return &cells[i] @cython.cdivision cdef inline Cell* _find_cell_for_insertion(Cell* cells, const key_t size, const key_t key) nogil: """Find the correct cell to insert a value, which could be a previously deleted cell. If we cross a deleted cell and the key is in the table, we mark the later cell as deleted, and return the earlier one.""" cdef Cell* deleted = NULL # Modulo for powers-of-two via bitwise & cdef key_t i = (key & (size - 1)) while cells[i].key != EMPTY_KEY and cells[i].key != key: if cells[i].key == DELETED_KEY: deleted = &cells[i] i = (i + 1) & (size - 1) if deleted is not NULL: if cells[i].key == key: # We need to ensure we don't end up with the key in the table twice. # If we're using a deleted cell and we also have the key, we mark # the later cell as deleted. cells[i].key = DELETED_KEY return deleted return &cells[i] cdef void _resize(Pool mem, MapStruct* map_) except *: cdef size_t new_size = map_.length * 2 cdef Cell* old_cells = map_.cells cdef size_t old_size = map_.length map_.length = new_size map_.filled = 0 map_.cells = mem.alloc(new_size, sizeof(Cell)) cdef size_t i cdef size_t slot for i in range(old_size): if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY: map_set(mem, map_, old_cells[i].key, old_cells[i].value) mem.free(old_cells)