from pathlib import Path
path = Path('/path/to/folder')
path.mkdir(mode=511, parents=True, exist_ok=True) # Создаст все вложенные папки в пути
'CWYCWKAY'.replace('W', 'H').replace('AY', 'AN')
'CWYCWKAY'.replace('W', 'H').replace('Y', 'N').replace('N', 'Y', 1)
'CWYCWKAY'.translate(str.maketrans({'W': 'H', 'Y': 'N'})).replace('N', 'Y', 1)
'CWYCWKAY'.translate({87: 'H', 89: 'N'}).replace('N', 'Y', 1)
'CWYCWKAY'.replace('CWYCWKAY', 'CHYCHKAN')
def replace(word='CWYCWKAY'):
return 'CHYCHKAN'
import pandas as pd
from random choices, randint
from string import ascii_uppercase
items = [''.join(choices(ascii_uppercase, k=randint(4, 11))) for i in range(28000)]
def generate():
for receipt_id in range(3700000):
if receipt_id % 100000 == 0:
print(receipt_id)
for item in sample(items, randint(2, 15)):
yield receipt_id, item
data = pd.DataFrame(generate(), columns=['receipt_id', 'item_name'])
from collections import Counter
from itertools import combinations
# from sys import intern
# В эмуляции все строки в DataFrame уже интернированы по умолчанию
# data['item_name'] = data.item_name.map(intern)
statistics = Counter(
pair_of_items # Для экономии памяти можно взять hash(pair_of_items)
for items_in_receipt in data.groupby('receipt_id', sort=False).item_name.agg(sorted)
for pair_of_items in combinations(items_in_receipt, 2) # сортировка сохраняется
)