я просто хочу чтобы добавился столбце id, который бы шел по порядку с той таблицы которая у меня уже есть.
import re
from pprint import pprint
def replace_old_struct(struct):
changes = {'struct<': 'dict(',
'array<': 'list(',
'>': ')',
':': ': ',
',': ', '}
for key in changes:
struct = struct.replace(key, changes[key])
return struct
def set_mark(struct, patterns):
for pattern in patterns:
regex = re.compile(pattern)
words = set(regex.findall(struct))
for word in words:
repl = f"'{word}'"
struct = struct.replace(word, repl)
return struct
def pair_rbracket(struct: str, rbracket):
counter = 0
for number, symbol in enumerate(struct):
if symbol == '(':
counter += 1
elif symbol == ')':
counter -= 1
if counter < 0:
return struct[:number] + rbracket + struct[number + 1:]
def pair_lbracket(struct, lbracket):
return f'{lbracket}{struct[1:]}'
def pair_brackets(struct, struct_type='dict'):
changes = {'dict': {'(': '{', ')': '}'},
'list': {'(': '[', ')': ']'}}
lbracket = changes[struct_type]['(']
rbracket = changes[struct_type][')']
if struct[0] == '(':
struct = pair_lbracket(struct, lbracket)
struct = pair_rbracket(struct, rbracket)
return struct
def set_struct(struct, struct_type):
structs = struct.split(struct_type, 1)
structs[1] = pair_brackets(structs[1], struct_type)
return ''.join(structs)
def replace_struct(struct, struct_type):
for _ in range(struct.count(struct_type)):
struct = set_struct(struct, struct_type)
return struct
if __name__ == '__main__':
data = ('food',
'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>')
my_struct = data[1]
patterns = [r"([\w]+)\:", r"\: ([\w]+)\)"]
struct = set_mark(replace_old_struct(my_struct), patterns)
struct_types = ['dict', 'list']
for struct_type in struct_types:
struct = replace_struct(struct, struct_type)
struct = eval(struct)
pprint(struct)
def tokenize(line: str):
token = []
for ch in line:
if ch.isalnum() or ch == '_':
token.append(ch)
else:
if token:
yield ''.join(token)
token.clear()
yield ch
if token:
yield ''.join(token)
data = 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>'
tokens = list(tokenize(data))
print(tokens)
import typing as t
import enum
class Parser:
@staticmethod
def tokenize(line: str) -> t.Iterable[str]:
token = []
for ch in line:
if ch.isalnum() or ch == '_':
token.append(ch)
else:
if token:
yield ''.join(token)
token.clear()
yield ch
if token:
yield ''.join(token)
def process(self, line: str):
tokens = list(self.tokenize(line))
result = self.do_type(tokens)
if tokens:
raise ValueError('Extra tokens at the end of the line: {0!r}'.format(''.join(tokens)))
return result
def _consume(self, tokens: t.List[str], expected: t.Optional[str] = None):
token = tokens.pop(0)
if expected is not None and token != expected:
raise ValueError(f'Expected {expected!r}, got {token!r}')
return token
def do_type(self, tokens: t.List[str]) -> t.Union[str, t.Dict]:
if tokens[0] == 'struct':
return self.do_struct(tokens)
elif tokens[0] == 'array':
return self.do_array(tokens)
else:
return self._consume(tokens)
def do_struct(self, tokens: t.List[str]) -> t.Dict:
self._consume(tokens, 'struct')
self._consume(tokens, '<')
result = {}
while True:
name, value = self.do_struct_part(tokens)
result[name] = value
if tokens[0] == ',':
self._consume(tokens, ',')
else:
break
self._consume(tokens, '>')
return result
def do_struct_part(self, tokens: t.List[str]) -> t.Tuple[str, t.Union[str, t.Dict]]:
name = self._consume(tokens)
self._consume(tokens, ':')
value = self.do_type(tokens)
return name, value
def do_array(self, tokens: t.List[str]) -> t.Dict:
self._consume(tokens, 'array')
self._consume(tokens, '<')
result = { None: self.do_type(tokens) }
self._consume(tokens, '>')
return result
data = 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>'
p = Parser()
print(p.process(data))
import ast
data = {
'Action': [...],
...
}
sorted(map(ast.literal_eval, data['Action']), key=lambda x:(x[1], x[0]), reverse=True)
# [('Terminator Genisys', '2015'),
# ('Terminator Salvation', '2009'),
# ('Terminator 3: Rise of the Machines', '2003'),
# ('Terminal Velocity', '1994'),
# ('Terminator 2: Judgment Day', '1991'),
# ('Terminator, The', '1984')]
from ast import literal_eval
s = '''["A Dog's Purpose;2017", 'Fist Fight;2017', 'The Lego Batman Movie;2017', 'Jim Gaffigan: Cinco;2017', 'The Big Sick;2017', 'The Boss Baby;2017', 'Bill Burr: Walk Your Way Out;2017', 'Neal Brennan: 3 Mics;2017', 'Dave Chappelle: The Age of Spin;2017', 'CHiPS;2017', 'Table 19;2017', 'Dave Chappelle: Deep in the Heart of Texas;2017', 'Baywatch;2017', 'Snatched;2017', 'Sandy Wexler;2017', 'Win It All;2017', 'Captain Underpants: The First Epic Movie;2017', 'Cars 3;2017', 'Rough Night;2017', 'Despicable Me 3;2017', 'The Meyerowitz Stories;2017', 'War Machine;2017', 'Vir Das: Abroad Understanding;2017', "Norm Macdonald: Hitler's Dog, Gossip & Trickery;2017", 'Goon: Last of the Enforcers;2017', 'Oh, Hello: On Broadway;2017', 'The House;2017', 'Logan Lucky;2017', 'The Emoji Movie;2017', 'Rory Scovel Tries Stand-Up for the First Time;2017', "The Hitman's Bodyguard;2017", 'Self-criticism of a Bourgeois Dog;2017', 'Kingsman: The Golden Circle;2017', 'Ari Shaffir: Double Negative;2017', 'The Nut Job 2: Nutty by Nature;2017', 'Little Boxes;2017', 'Maz Jobrani: Immigrant;2017', 'Lady Bird;2017', 'Paddington 2;2017', 'The Death of Stalin;2017', 'Christina P: Mother Inferior;2017', 'Creep 2;2017', 'A Bad Moms Christmas;2017', "Daddy's Home 2;2017", 'The Disaster Artist;2017', 'Ferdinand;2017', 'Jack Whitehall: At Large;2017', 'Lynne Koplitz: Hormonal Beast;2017', 'Craig Ferguson: Tickle Fight;2017', 'Judd Apatow: The Return;2017', 'Dave Chappelle: Equanimity;2017', 'Dave Chappelle: The Bird Revelation;2017', 'Game Night;2018', 'Isle of Dogs;2018', 'The Clapper;2018', 'Tom Segura: Disgraceful;2018', 'When We First Met;2018', 'Fred Armisen: Standup for Drummers;2018', 'Love, Simon;2018', 'Game Over, Man!;2018', 'Blockers;2018', 'Deadpool 2;2018', 'Sorry to Bother You;2018', 'Ant-Man and the Wasp;2018', 'Mamma Mia: Here We Go Again!;2018', 'Tag;2018', 'The Man Who Killed Don Quixote;2018', 'Boundaries;2018', 'BlacKkKlansman;2018', 'Jeff Ross Roasts the Border;2017', 'Gintama;2017', 'Black Butler: Book of the Atlantic;2017', 'No Game No Life: Zero;2017']'''
s = literal_eval(s)
def fucking_filter(arr, max_count, key=lambda n: n):
filtered = []
count = {}
for n in arr:
k = key(n)
count[k] = count.get(k, 0) + 1
if count[k] <= max_count:
filtered.append(n)
return filtered
for n in fucking_filter(genres, N):
print(n)
gener=connection.escape(gener)
вы получаете в запросе LIKE "%'blabla'%"
.def escape(self, obj, mapping=None):
"""Escape whatever value is passed.
Non-standard, for internal use; do not use this in your applications.
"""
cursor.execute(script.format(year_from=args.year_from,
year_to=args.year_to,
name="%{}%".format( (args.regexp.translate(str.maketrans({'%': '\%', '_': '\_'})),) ),
gener="%{}%".format( (gener.translate(str.maketrans({'%': '\%', '_': '\_'})),) ),
rating=1,
N=args.N))
SELECT m.title, year, avg(r.rating)
FROM movies AS m
JOIN rating AS r
ON m.movie_id = r.movie_id
WHERE m.year BETWEEN {year_from} and {year_to} AND m.title LIKE {name} AND m.genres LIKE {gener}
GROUP BY r.movie_id, m.title, m.year
HAVING avg(r.rating) > {rating}
ORDER BY avg(r.rating) DESC, m.year DESC, m.title asc
LIMIT {N};
Есть папка, которая хранит в себе sql файлы для создания бд, таблиц в этой бд, импорт данных в эти таблицы.
Как автоматизировать запуск этих sql скриптов?
from collections import defaultdict
total = defaultdict(float)
count = defaultdict(int)
for line in data_rating:
total[line[1]] += float(line[2])
count[line[1]] += 1
averages = { id: total[id]/count[id] for id in count } #средние рейтинги
#и сортируем список фильмов по убыванию рейтинга
data_movies.sort(key=lambda item: averages.get(item[1], 0), reverse=True)