import textract
text = textract.process("path/to/file.extension")url ='https://www.uralprombank.ru/files/misc/admiralgorshkov-15.doc'
file = req.get(url, verify=False).contentfile = BytesIO(file).read()file = file.decode('cp65001', 'ignore') (\+?\d?\(?\d{3}?\)?\d{3}-?\d+-?\d+)|([8]?-?\d{3}-?\d+-?\d+)