У меня есть файл update2022-03.7z
import py7zr
with py7zr.SevenZipFile('/dbfs/mnt/databricks/Staging/FTP/NEW/update2022-03.7z', 'r') as z:
z.extractall(path="/dbfs/mnt/databricks/Staging/FTP/2022_new/")
Ошибка:
Bad7zFile: invalid header data
Если развернуть эту ошибку в терминале, то вижу следующее:
Bad7zFile Traceback (most recent call last)
<command-2400507450692075> in <module>
1 import py7zr
2
----> 3 with py7zr.SevenZipFile('/dbfs/mnt/databricks/Staging/FTP/NEW/update2022-03.7z', 'r') as z:
4 z.extractall(path="/dbfs/mnt/databricks/Staging/FTP/2022_new/")
/local_disk0/.ephemeral_nfs/envs/pythonEnv-1d939ae4-9aff-498d-843f-aecb1fe35d18/lib/python3.8/site-packages/py7zr/py7zr.py in __init__(self, file, mode, filters, dereference, password, header_encryption, blocksize, mp)
389 except Exception as e:
390 self._fpclose()
--> 391 raise e
392 self._dict: Dict[str, IO[Any]] = {}
393 self.dereference = dereference
/local_disk0/.ephemeral_nfs/envs/pythonEnv-1d939ae4-9aff-498d-843f-aecb1fe35d18/lib/python3.8/site-packages/py7zr/py7zr.py in __init__(self, file, mode, filters, dereference, password, header_encryption, blocksize, mp)
375 try:
376 if mode == "r":
--> 377 self._real_get_contents(password)
378 self.fp.seek(self.afterheader) # seek into start of payload and prepare worker to extract
379 self.worker = Worker(self.files, self.afterheader, self.header, self.mp)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-1d939ae4-9aff-498d-843f-aecb1fe35d18/lib/python3.8/site-packages/py7zr/py7zr.py in _real_get_contents(self, password)
410 if not self._check_7zfile(self.fp):
411 raise Bad7zFile("not a 7z file")
--> 412 self.sig_header = SignatureHeader.retrieve(self.fp)
413 self.afterheader: int = self.fp.tell()
414 self.fp.seek(self.sig_header.nextheaderofs, os.SEEK_CUR)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-1d939ae4-9aff-498d-843f-aecb1fe35d18/lib/python3.8/site-packages/py7zr/archiveinfo.py in retrieve(cls, file)
1092 def retrieve(cls, file: BinaryIO):
1093 obj = cls()
-> 1094 obj._read(file)
1095 return obj
1096
/local_disk0/.ephemeral_nfs/envs/pythonEnv-1d939ae4-9aff-498d-843f-aecb1fe35d18/lib/python3.8/site-packages/py7zr/archiveinfo.py in _read(self, file)
1108 crc = calculate_crc32(data, crc)
1109 if crc != self.startheadercrc:
-> 1110 raise Bad7zFile("invalid header data")
1111
1112 def calccrc(self, length: int, header_crc: int):