SELECT * FROM memes WHERE tags LIKE "%anime%" OR tags LIKE "%eng%"
SELECT * FROM memes WHERE tags LIKE '%eng%' AND tags LIKE '%history%' AND tags LIKE '%anime%' ORDER BY id DESC LIMIT 20
SELECT * FROM memes WHERE tags LIKE '% eng %' AND tags LIKE '% history %' AND tags LIKE '% anime %' ORDER BY id DESC LIMIT 20
SELECT * FROM memes WHERE tags LIKE '% rus\_eng %' ESCAPE "\" AND tags LIKE '% 100\% %' ESCAPE "\" ORDER BY id DESC LIMIT 20
SELECT * FROM memes WHERE tags LIKE '%,eng,%' AND tags LIKE '%,history,%' AND tags LIKE '%,anime,%' ORDER BY id DESC LIMIT 20
SELECT * FROM memes WHERE instr(tags, ',eng,') AND instr(tags, ',history,') AND instr(tags, ',anime,') ORDER BY id DESC LIMIT 20
SELECT * FROM memes WHERE instr(tags, ' eng ') AND instr(tags, ' history ') AND instr(tags, ' anime ') ORDER BY id DESC LIMIT 20
html = '''<LTTextBoxHorizontal y0="677.457" y1="698.418" x0="47.208" x1="86.76" width="39.552" height="20.962" bbox="[47.208, 677.457, 86.76, 698.418]" index="3">
<LTTextLineHorizontal y0="688.668" y1="698.418" x0="53.168" x1="86.76" width="33.592" height="9.75" bbox="[53.168, 688.668, 86.76, 698.418]" word_margin="0.1">Patient </LTTextLineHorizontal>
<LTTextLineHorizontal y0="677.457" y1="687.207" x0="47.208" x1="86.76" width="39.552" height="9.75" bbox="[47.208, 677.457, 86.76, 687.207]" word_margin="0.1">Jonson </LTTextLineHorizontal>
</LTTextBoxHorizontal>'''
import bs4
text = bs4.BeautifulSoup( html, 'html.parser' ).get_text()
>> print( text )
Patient
Jonson
>> text
'\nPatient \nJonson \n'
import re
text = re.sub( r'<[^>]+>', '', html)
s = "aaa\n\nbbb\tccc" # Строка с запрещёнными символами
rs = repr(s) # "Закодированная" строка: "'aaa\\n\\nbbb\\tccc'"
# Чтобы раскодировать:
import ast
decoded = ast.literal_eval(rs)
>> print(s)
aaa
bbb ccc
>> print(rs)
'aaa\n\nbbb\tccc'
>> print(decoded)
aaa
bbb ccc
data='234567890' # str / bytes / bytearray / list / tuple
for start in range(0,len(data)):
for finish in range (start,len(data)):
print( data[ start : finish+1 ], '\t\t start:',start,' finish:',finish)
2 start: 0 finish: 0
23 start: 0 finish: 1
234 start: 0 finish: 2
2345 start: 0 finish: 3
23456 start: 0 finish: 4
234567 start: 0 finish: 5
2345678 start: 0 finish: 6
23456789 start: 0 finish: 7
234567890 start: 0 finish: 8
3 start: 1 finish: 1
34 start: 1 finish: 2
345 start: 1 finish: 3
3456 start: 1 finish: 4
34567 start: 1 finish: 5
345678 start: 1 finish: 6
3456789 start: 1 finish: 7
34567890 start: 1 finish: 8
4 start: 2 finish: 2
45 start: 2 finish: 3
456 start: 2 finish: 4
4567 start: 2 finish: 5
45678 start: 2 finish: 6
456789 start: 2 finish: 7
4567890 start: 2 finish: 8
5 start: 3 finish: 3
56 start: 3 finish: 4
567 start: 3 finish: 5
5678 start: 3 finish: 6
56789 start: 3 finish: 7
567890 start: 3 finish: 8
6 start: 4 finish: 4
67 start: 4 finish: 5
678 start: 4 finish: 6
6789 start: 4 finish: 7
67890 start: 4 finish: 8
7 start: 5 finish: 5
78 start: 5 finish: 6
789 start: 5 finish: 7
7890 start: 5 finish: 8
8 start: 6 finish: 6
89 start: 6 finish: 7
890 start: 6 finish: 8
9 start: 7 finish: 7
90 start: 7 finish: 8
0 start: 8 finish: 8
pattern = r'(<a rel=")(.+?)(">)'
splitted = re.split( pattern, html_str )
# splitted == [ '<html>...', '<a rel="', 'http://site.com/image1.jpg', '">', '<div>...', '<a rel="', 'http://site.com/image2.jpg', '">', ... ]
urls = splitted[2::4]
# urls == ['http://site.com/image1.jpg', 'http://site.com/image2.jpg', ... ]
[ '<a rel="', 'http://site.com/image2.jpg', '">' ]
из списка splitted или заменяете их на что-то (например, на "название ссылки").cleaned_html_str = ''.join(splitted)
s=' a b \r\n c d \n e f '
result = '\n'.join(s.splitlines()[1:])
Правда, переносы строк изменятся, если они были не '\n', но зато всё будет работать чётко и по лучшим рецептам Unicode.import re
s=' a b \r\n c d \r e f '
template = r"[^\n\r]*(\r\n|\n\r|[\n\r])?"
result = re.sub( template, "", s, count=1)
Здесь после первой строки всё сохранится как было неприкосновенным, даже нестандартные переводы строк (которые не '\n').import re
s=' a b \u0000 c d \n e f '
template = r"[^\n\r\v\f\x1c\x1d\x1e\x85\u2028\u2029\x00]*(\r\n|\n\r|[\n\r\v\f\x1c\x1d\x1e\x85\u2028\u2029\x00])?"
result = re.sub( template, "", s, count=1)
import re
s=' a b \r\n c d \n e f '
template = r"\r\n|\n\r|[\n\r\v\f\x1c\x1d\x1e\x85\u2028\u2029\x00]"
result = "".join( re.split( template, s, maxsplit=1 )[1:] )