import textract
text = textract.process("path/to/file.extension")
url ='https://www.uralprombank.ru/files/misc/admiralgorshkov-15.doc'
file = req.get(url, verify=False).content
file = BytesIO(file).read()
file = file.decode('cp65001', 'ignore')
(\+?\d?\(?\d{3}?\)?\d{3}-?\d+-?\d+)|([8]?-?\d{3}-?\d+-?\d+)