('food', 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>')
'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>'
'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>' распарсить на [Milk, Oil, batter,]
def get_query(tuple):
result = f'{tuple[0]}.*'
return result
columns = []
for i in df.dtypes: # здесь i это и есть наш тюпл и я хочу сделать так, чтобы функция get_query() выдавала мне не food.* а 'food.Milk', 'food.Oil', 'food.batter'
if 'struct' in i[1][:6]:
columns.append(get_query(i))
else:
columns.append(i[0])
# в итоге этого кода columns у меня равен ['food.*', 'id', 'name', 'ppu', 'topping', 'type']
# но смог реализовать только возврат food.* , а мне вместо этого нужно 'food.Milk', 'food.Oil', 'food.batter'
# Но это просто пример моего кода, здесь не реализован функцияонал по получению из строки 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>' распарсить на [Milk, Oil, batter,] того результата который мне нужен
# ничего толкового так и не смог написать
def tokenize(line: str):
token = []
for ch in line:
if ch.isalnum() or ch == '_':
token.append(ch)
else:
if token:
yield ''.join(token)
token.clear()
yield ch
if token:
yield ''.join(token)
data = 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>'
tokens = list(tokenize(data))
print(tokens)
import typing as t
import enum
class Parser:
@staticmethod
def tokenize(line: str) -> t.Iterable[str]:
token = []
for ch in line:
if ch.isalnum() or ch == '_':
token.append(ch)
else:
if token:
yield ''.join(token)
token.clear()
yield ch
if token:
yield ''.join(token)
def process(self, line: str):
tokens = list(self.tokenize(line))
result = self.do_type(tokens)
if tokens:
raise ValueError('Extra tokens at the end of the line: {0!r}'.format(''.join(tokens)))
return result
def _consume(self, tokens: t.List[str], expected: t.Optional[str] = None):
token = tokens.pop(0)
if expected is not None and token != expected:
raise ValueError(f'Expected {expected!r}, got {token!r}')
return token
def do_type(self, tokens: t.List[str]) -> t.Union[str, t.Dict]:
if tokens[0] == 'struct':
return self.do_struct(tokens)
elif tokens[0] == 'array':
return self.do_array(tokens)
else:
return self._consume(tokens)
def do_struct(self, tokens: t.List[str]) -> t.Dict:
self._consume(tokens, 'struct')
self._consume(tokens, '<')
result = {}
while True:
name, value = self.do_struct_part(tokens)
result[name] = value
if tokens[0] == ',':
self._consume(tokens, ',')
else:
break
self._consume(tokens, '>')
return result
def do_struct_part(self, tokens: t.List[str]) -> t.Tuple[str, t.Union[str, t.Dict]]:
name = self._consume(tokens)
self._consume(tokens, ':')
value = self.do_type(tokens)
return name, value
def do_array(self, tokens: t.List[str]) -> t.Dict:
self._consume(tokens, 'array')
self._consume(tokens, '<')
result = { None: self.do_type(tokens) }
self._consume(tokens, '>')
return result
data = 'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>'
p = Parser()
print(p.process(data))
import re
from pprint import pprint
def replace_old_struct(struct):
changes = {'struct<': 'dict(',
'array<': 'list(',
'>': ')',
':': ': ',
',': ', '}
for key in changes:
struct = struct.replace(key, changes[key])
return struct
def set_mark(struct, patterns):
for pattern in patterns:
regex = re.compile(pattern)
words = set(regex.findall(struct))
for word in words:
repl = f"'{word}'"
struct = struct.replace(word, repl)
return struct
def pair_rbracket(struct: str, rbracket):
counter = 0
for number, symbol in enumerate(struct):
if symbol == '(':
counter += 1
elif symbol == ')':
counter -= 1
if counter < 0:
return struct[:number] + rbracket + struct[number + 1:]
def pair_lbracket(struct, lbracket):
return f'{lbracket}{struct[1:]}'
def pair_brackets(struct, struct_type='dict'):
changes = {'dict': {'(': '{', ')': '}'},
'list': {'(': '[', ')': ']'}}
lbracket = changes[struct_type]['(']
rbracket = changes[struct_type][')']
if struct[0] == '(':
struct = pair_lbracket(struct, lbracket)
struct = pair_rbracket(struct, rbracket)
return struct
def set_struct(struct, struct_type):
structs = struct.split(struct_type, 1)
structs[1] = pair_brackets(structs[1], struct_type)
return ''.join(structs)
def replace_struct(struct, struct_type):
for _ in range(struct.count(struct_type)):
struct = set_struct(struct, struct_type)
return struct
if __name__ == '__main__':
data = ('food',
'struct<Milk:array<struct<id:string,type:string>>,Oil:string,batter:array<struct<id:string,type:string>>>')
my_struct = data[1]
patterns = [r"([\w]+)\:", r"\: ([\w]+)\)"]
struct = set_mark(replace_old_struct(my_struct), patterns)
struct_types = ['dict', 'list']
for struct_type in struct_types:
struct = replace_struct(struct, struct_type)
struct = eval(struct)
pprint(struct)