from ast import literal_eval as eval # ast.literal_eval() безопасный, обычный eval() - нет
with open('input.txt', 'r') as fi, open('output.txt', 'w') as fo:
cache = set()
for line in fi:
title = eval(line).get('title')
if title not in cache:
cache.add(title)
fo.write(line)from ast import literal_eval as eval
from functools import lru_cache
@lru_cache(None)
def process(title):
print(record, file=fo)
with open('input.txt', 'r') as fi, open('output.txt', 'w') as fo:
for record in map(eval, fi):
process(record['title'])>>> process.cache_info()
CacheInfo(hits=994960, misses=5040, maxsize=None, currsize=5040)
>>> data = urlopen('http://devcave.ru/json.php').read()
>>> data.decode('utf-8')
u'{"key":"\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0439 \\u044f\\u0437\\u044b\\u043a"}'
>>> data.decode('cp1251')
u'{"key":"\\u0440\\u0443\\u0441\\u0441\\u043a\\u0438\\u0439 \\u044f\\u0437\\u044b\\u043a"}'
def tokenize(data):
cleanup = lambda entry: entry.replace(':', '').strip()
for entry in data.strip(';').split(';'):
entry = map(cleanup, entry.rsplit(':',1))
if len(entry) == 1:
entry.append(True)
yield entry
input = 'name1: value1; name2: value2; name3; prefix: name4: value4;'
print dict(tokenize(input)){'prefix name4': 'value4', 'name2': 'value2', 'name3': True, 'name1': 'value1'}>>> dict(re.findall('\s*([\w\s:]+?)\s*(?::\s*([\w\s]*)\s*)?(?=[;$])', input))
{'prefix: name4': 'value4', 'name2': 'value2', 'name3': '', 'name1': 'value1'}