1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
import array import collections
FREE = -1 DUMMY = -2
class (collections.MutableMapping): 'Space efficient dictionary with fast iteration and cheap resizes.'
def _gen_probes(hashvalue, mask): 'Same sequence of probes used in the current dictionary design' PERTURB_SHIFT = 5 if hashvalue < 0: hashvalue = -hashvalue i = hashvalue & mask yield i perturb = hashvalue while True: i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF yield i & mask perturb >>= PERTURB_SHIFT
def _lookup(self, key, hashvalue): 'Same lookup logic as currently used in real dicts' assert self.filled < len(self.indices) freeslot = None for i in self._gen_probes(hashvalue, len(self.indices) - 1): index = self.indices[i] if index == FREE: return (FREE, i) if freeslot is None else (DUMMY, freeslot) elif index == DUMMY: if freeslot is None: freeslot = i elif (self.keylist[index] is key or self.hashlist[index] == hashvalue and self.keylist[index] == key): return (index, i)
def _make_index(n): 'New sequence of indices using the smallest possible datatype' if n <= 2**7: return array.array('b', [FREE]) * n if n <= 2**15: return array.array('h', [FREE]) * n if n <= 2**31: return array.array('l', [FREE]) * n return [FREE] * n
def _resize(self, n): '''Reindex the existing hash/key/value entries. Entries do not get moved, they only get new indices. No calls are made to hash() or __eq__().
''' n = 2 ** n.bit_length() self.indices = self._make_index(n) for index, hashvalue in enumerate(self.hashlist): for i in Dict._gen_probes(hashvalue, n - 1): if self.indices[i] == FREE: break self.indices[i] = index self.filled = self.used
def clear(self): self.indices = self._make_index(8) self.hashlist = [] self.keylist = [] self.valuelist = [] self.used = 0 self.filled = 0
def __getitem__(self, key): hashvalue = hash(key) index, i = self._lookup(key, hashvalue) if index < 0: raise KeyError(key) return self.valuelist[index]
def __setitem__(self, key, value): hashvalue = hash(key) index, i = self._lookup(key, hashvalue) if index < 0: self.indices[i] = self.used self.hashlist.append(hashvalue) self.keylist.append(key) self.valuelist.append(value) self.used += 1 if index == FREE: self.filled += 1 if self.filled * 3 > len(self.indices) * 2: self._resize(4 * len(self)) else: self.valuelist[index] = value
def __delitem__(self, key): hashvalue = hash(key) index, i = self._lookup(key, hashvalue) if index < 0: raise KeyError(key) self.indices[i] = DUMMY self.used -= 1 if index != self.used: lasthash = self.hashlist[-1] lastkey = self.keylist[-1] lastvalue = self.valuelist[-1] lastindex, j = self._lookup(lastkey, lasthash) assert lastindex >= 0 and i != j self.indices[j] = index self.hashlist[index] = lasthash self.keylist[index] = lastkey self.valuelist[index] = lastvalue self.hashlist.pop() self.keylist.pop() self.valuelist.pop()
def __init__(self, *args, **kwds): if not hasattr(self, 'keylist'): self.clear() self.update(*args, **kwds)
def __len__(self): return self.used
def __iter__(self): return iter(self.keylist)
def iterkeys(self): return iter(self.keylist)
def keys(self): return list(self.keylist)
def itervalues(self): return iter(self.valuelist)
def values(self): return list(self.valuelist)
def items(self): return zip(self.keylist, self.valuelist)
def __contains__(self, key): index, i = self._lookup(key, hash(key)) return index >= 0
def get(self, key, default=None): index, i = self._lookup(key, hash(key)) return self.valuelist[index] if index >= 0 else default
def popitem(self): if not self.keylist: raise KeyError('popitem(): dictionary is empty') key = self.keylist[-1] value = self.valuelist[-1] del self[key] return key, value
def __repr__(self): return f'Dict({list(self.items())})'
def show_structure(self): 'Diagnostic method. Not part of the API.' print('=' * 50) print(self) print('Indices:', self.indices) for i, row in enumerate( zip(self.hashlist, self.keylist, self.valuelist)): print(i, row) print('-' * 50)
if __name__ == '__main__': d = Dict([(a,a) for a in range(1)]) d.show_structure()
|
近期评论