import array
import collections
import itertools
# Placeholder constants
FREE = -1
DUMMY = -2
class Dict(collections.MutableMapping):
"Space efficient dictionary with fast iteration and cheap resizes."
@staticmethod
def _gen_probes(hashvalue, mask):
"Same sequence of probes used in the current dictionary design"
PERTURB_SHIFT = 5
if hashvalue < 0:
hashvalue = -hashvalue
i = hashvalue & mask
yield i
perturb = hashvalue
while True:
i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF
yield i & mask
perturb >>= PERTURB_SHIFT
def _lookup(self, key, hashvalue):
"Same lookup logic as currently used in real dicts"
assert self.filled < len(self.indices) # At least one open slot
freeslot = None
for i in self._gen_probes(hashvalue, len(self.indices) - 1):
index = self.indices[i]
if index == FREE:
return (FREE, i) if freeslot is None else (DUMMY, freeslot)
elif index == DUMMY:
if freeslot is None:
freeslot = i
elif (
self.keylist[index] is key
or self.hashlist[index] == hashvalue
and self.keylist[index] == key
):
return (index, i)
@staticmethod
def _make_index(n):
"New sequence of indices using the smallest possible datatype"
if n <= 2 ** 7:
return array.array("b", [FREE]) * n # signed char
if n <= 2 ** 15:
return array.array("h", [FREE]) * n # signed short
if n <= 2 ** 31:
return array.array("l", [FREE]) * n # signed long
return [FREE] * n # python integers
def _resize(self, n):
"""Reindex the existing hash/key/value entries.
Entries do not get moved, they only get new indices.
No calls are made to hash() or __eq__().
"""
n = 2 ** n.bit_length() # round-up to power-of-two
self.indices = self._make_index(n)
for index, hashvalue in enumerate(self.hashlist):
for i in Dict._gen_probes(hashvalue, n - 1):
if self.indices[i] == FREE:
break
self.indices[i] = index
self.filled = self.used
def clear(self):
self.indices = self._make_index(8)
self.hashlist = []
self.keylist = []
self.valuelist = []
self.used = 0
self.filled = 0 # used + dummies
def __getitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
return self.valuelist[index]
def __setitem__(self, key, value):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
self.indices[i] = self.used
self.hashlist.append(hashvalue)
self.keylist.append(key)
self.valuelist.append(value)
self.used += 1
if index == FREE:
self.filled += 1
if self.filled * 3 > len(self.indices) * 2:
self._resize(4 * len(self))
else:
self.valuelist[index] = value
def __delitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
self.indices[i] = DUMMY
self.used -= 1
# If needed, swap with the lastmost entry to avoid leaving a "hole"
if index != self.used:
lasthash = self.hashlist[-1]
lastkey = self.keylist[-1]
lastvalue = self.valuelist[-1]
lastindex, j = self._lookup(lastkey, lasthash)
assert lastindex >= 0 and i != j
self.indices[j] = index
self.hashlist[index] = lasthash
self.keylist[index] = lastkey
self.valuelist[index] = lastvalue
# Remove the lastmost entry
self.hashlist.pop()
self.keylist.pop()
self.valuelist.pop()
def __init__(self, *args, **kwds):
if not hasattr(self, "keylist"):
self.clear()
self.update(*args, **kwds)
def __len__(self):
return self.used
def __iter__(self):
return iter(self.keylist)
def iterkeys(self):
return iter(self.keylist)
def keys(self):
return list(self.keylist)
def itervalues(self):
return iter(self.valuelist)
def values(self):
return list(self.valuelist)
def iteritems(self):
return itertools.izip(self.keylist, self.valuelist)
def items(self):
return zip(self.keylist, self.valuelist)
def __contains__(self, key):
index, i = self._lookup(key, hash(key))
return index >= 0
def get(self, key, default=None):
index, i = self._lookup(key, hash(key))
return self.valuelist[index] if index >= 0 else default
def popitem(self):
if not self.keylist:
raise KeyError("popitem(): dictionary is empty")
key = self.keylist[-1]
value = self.valuelist[-1]
del self[key]
return key, value
def __repr__(self):
return "Dict(%r)" % self.items()
def show_structure(self):
"Diagnostic method. Not part of the API."
print("=" * 50)
print(self)
print("Indices:", self.indices)
for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
print(i, row)
print("-" * 50)
if __name__ == "__main__":
d = Dict([("timmy", "red"), ("barry", "green"), ("guido", "blue")])
d.show_structure()
In [6]: x = '[{"tzid": "000000000", "phone": "000000", "service": "test", "statu
...: s": "TZ_NUM_PREPARE"}]'
In [7]: x
Out[7]: '[{"tzid": "000000000", "phone": "000000", "service": "test", "status": "TZ_NUM_PREPARE"}]'
In [8]: json.loads(x)
Out[8]:
[{'tzid': '000000000',
'phone': '000000',
'service': 'test',
'status': 'TZ_NUM_PREPARE'}]
from concurrent.futures import ProcessPoolExecutor
D = {'x': [1, 2, 3], 'y': [4, 5, 6], 'z': [7, 8, 9]}
def process(arg):
key, values = arg
return key, [v * 10 for v in values]
if __name__ == "__main__":
with ProcessPoolExecutor() as executor:
result = executor.map(process, D.items())
print(dict(result))
from concurrent.futures import ProcessPoolExecutor
from time import sleep
D = {'x': [1, 2, 3], 'y': [4, 5, 6], 'z': [7, 8, 9]}
def process(arg):
key, values = arg
print("executing", key, values)
sleep(1)
return key, [v * 10 for v in values]
if __name__ == "__main__":
with ProcessPoolExecutor(2) as executor:
result = executor.map(process, D.items())
print(dict(result))
import sys
def fs(s):
if s >= 2 ** 20:
return f"{s/(2**20):.2f}МБ"
if s >= 2 ** 10:
return f"{s/(2**10):.2f}КБ"
return f"{s}Б"
def g():
n = int((2**16) * (2/3))
d = dict.fromkeys(range(n))
print("Размер словаря:", fs(sys.getsizeof(d)), "Элементов:", len(d))
for i in range(n):
del d[i]
print("Размер словаря всё ещё", fs(sys.getsizeof(d)), "хотя он пуст. Элементов:", len(d))
# В уже пустой словарь весом 1.25мб вставляем всего лишь один элемент
d["hello"] = "world"
newsize = sys.getsizeof(d)
print("Хэш-таблица превысила load factor (2/3), реаллокация. Новый размер:", fs(newsize), "Элементов:", len(d))
g()
class Book:
def __init__(self, name):
self.name = name
def __repr__(self):
return f"<Book: {self.name}>"
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
return self.name == other.name
class Lib:
def __init__(self, books=None):
if books is None:
self.books = []
else:
self.books = books[:]
def __iadd__(self, other):
if not isinstance(other, Book):
raise TypeError("Not a book")
self.books.append(other)
return self
def __iter__(self):
return iter(self.books)
lib = Lib()
book1 = Book("Fluent Python")
book2 = Book("Mein Kampf")
lib += book1
lib += book2
for book in lib:
print(book)
In [1]: x = ['a', 'aaa', 'A', 'AA', 'AAA', 'AaA']
In [2]: sorted(x)
Out[2]: ['A', 'AA', 'AAA', 'AaA', 'a', 'aaa']
In [3]: [[ord(i) for i in el] for el in sorted(x)]
Out[3]: [[65], [65, 65], [65, 65, 65], [65, 97, 65], [97], [97, 97, 97]]
def measure(fn):
n = 10
p = []
for _ in range(10):
p.append([0] * n)
for _ in range(10**6):
seq = fn(n)
for i, el in enumerate(seq):
p[el][i] += 1
return p
>>> measure(get_seq1)
[[99919, 100094, 99918, 100106, 100275, 100068, 100154, 99995, 100254, 99217],
[99766, 100119, 100263, 99692, 99904, 99946, 100378, 99573, 100052, 100307],
[100470, 100170, 99583, 100699, 99723, 99924, 99743, 100296, 99856, 99536],
[100373, 100060, 99779, 99566, 99761, 99850, 100135, 100109, 100081, 100286],
[100187, 99933, 99528, 100120, 99986, 99897, 99798, 100082, 100220, 100249],
[100357, 99866, 99828, 99928, 100218, 100322, 100546, 99774, 99675, 99486],
[99533, 99710, 100332, 99507, 100526, 100117, 99435, 100356, 100378, 100106],
[99571, 100246, 99968, 100280, 100162, 99406, 99907, 100185, 99752, 100523],
[99913, 99821, 100573, 99876, 99931, 100207, 99895, 99962, 100054, 99768],
[99911, 99981, 100228, 100226, 99514, 100263, 100009, 99668, 99678, 100522]]
>>> measure(get_seq2)
[[99740, 100055, 99943, 99346, 99970, 100129, 100306, 99887, 100170, 100454],
[100324, 100029, 99635, 100189, 99822, 100019, 99970, 100613, 99778, 99621],
[99487, 100227, 100431, 99973, 99767, 99982, 100256, 100325, 99777, 99775],
[99974, 99739, 100295, 100221, 99926, 100097, 99134, 100275, 99841, 100498],
[100091, 99770, 99578, 99967, 100364, 100097, 99820, 99565, 100747, 100001],
[99874, 99914, 100011, 99799, 99947, 99854, 100629, 99938, 100135, 99899],
[100143, 100200, 99946, 100157, 99754, 99598, 100223, 99860, 99747, 100372],
[100095, 100058, 100037, 100209, 100549, 100335, 99759, 99231, 100087, 99640],
[100117, 99971, 99967, 100017, 99682, 99696, 100147, 100634, 99514, 100255],
[100155, 100037, 100157, 100122, 100219, 100193, 99756, 99672, 100204, 99485]]
pip3 install fuzzywuzzy[speedup]
from fuzzywuzzy import fuzz
x = "0 .. трубок использовали для прожигания стальковша.Замена воронки 18м 8сл. Разлита полностью."
y = "трубок использооали для прожигания"
fuzz.partial_ratio(x, y)