Fix fs2json.py and copy-to-sha256.py for the new shorter hash
This commit is contained in:
parent
7853d8f191
commit
0247e75674
|
@ -8,12 +8,13 @@ import hashlib
|
|||
import shutil
|
||||
import tarfile
|
||||
|
||||
HASH_LENGTH = 8
|
||||
|
||||
def hash_file(filename):
|
||||
def hash_file(filename) -> str:
|
||||
with open(filename, "rb", buffering=0) as f:
|
||||
return hash_fileobj(f)
|
||||
|
||||
def hash_fileobj(f):
|
||||
def hash_fileobj(f) -> str:
|
||||
h = hashlib.sha256()
|
||||
for b in iter(lambda: f.read(128*1024), b""):
|
||||
h.update(b)
|
||||
|
@ -42,9 +43,9 @@ def main():
|
|||
if tar:
|
||||
handle_tar(logger, tar, to_path)
|
||||
else:
|
||||
handle_dir(logger, path, to_path)
|
||||
handle_dir(logger, from_path, to_path)
|
||||
|
||||
def handle_dir(logger, from_path, to_path):
|
||||
def handle_dir(logger, from_path: str, to_path: str):
|
||||
def onerror(oserror):
|
||||
logger.warning(oserror)
|
||||
|
||||
|
@ -62,8 +63,9 @@ def handle_dir(logger, from_path, to_path):
|
|||
if stat.S_ISLNK(mode) or stat.S_ISCHR(mode) or stat.S_ISBLK(mode) or stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode):
|
||||
continue
|
||||
|
||||
sha256 = hash_file(absname)
|
||||
to_abs = os.path.join(to_path, sha256)
|
||||
file_hash = hash_file(absname)
|
||||
filename = file_hash[0:HASH_LENGTH] + ".bin"
|
||||
to_abs = os.path.join(to_path, filename)
|
||||
|
||||
if os.path.exists(to_abs):
|
||||
logger.info("Exists, skipped {} ({})".format(to_abs, absname))
|
||||
|
@ -71,13 +73,13 @@ def handle_dir(logger, from_path, to_path):
|
|||
logger.info("cp {} {}".format(absname, to_abs))
|
||||
shutil.copyfile(absname, to_abs)
|
||||
|
||||
def handle_tar(logger, tar, to_path):
|
||||
def handle_tar(logger, tar, to_path: str):
|
||||
for member in tar.getmembers():
|
||||
if member.isfile() or member.islnk():
|
||||
f = tar.extractfile(member)
|
||||
sha256 = hash_fileobj(f)
|
||||
|
||||
to_abs = os.path.join(to_path, sha256)
|
||||
file_hash = hash_fileobj(f)
|
||||
filename = file_hash[0:HASH_LENGTH] + ".bin"
|
||||
to_abs = os.path.join(to_path, filename)
|
||||
|
||||
if os.path.exists(to_abs):
|
||||
logger.info("Exists, skipped {} ({})".format(to_abs, member.name))
|
||||
|
|
|
@ -26,19 +26,21 @@ IDX_GID = 5
|
|||
|
||||
# target for symbolic links
|
||||
# child nodes for directories
|
||||
# sha256 for files
|
||||
# filename for files
|
||||
IDX_TARGET = 6
|
||||
IDX_SHA256 = 6
|
||||
IDX_FILENAME = 6
|
||||
|
||||
HASH_LENGTH = 8
|
||||
|
||||
S_IFLNK = 0xA000
|
||||
S_IFREG = 0x8000
|
||||
S_IFDIR = 0x4000
|
||||
|
||||
def hash_file(filename):
|
||||
def hash_file(filename) -> str:
|
||||
with open(filename, "rb", buffering=0) as f:
|
||||
return hash_fileobj(f)
|
||||
|
||||
def hash_fileobj(f):
|
||||
def hash_fileobj(f) -> str:
|
||||
h = hashlib.sha256()
|
||||
for b in iter(lambda: f.read(128*1024), b""):
|
||||
h.update(b)
|
||||
|
@ -115,6 +117,7 @@ def handle_dir(logger, path, exclude):
|
|||
prevpath = []
|
||||
|
||||
mainroot = []
|
||||
filename_to_hash = {}
|
||||
total_size = 0
|
||||
rootstack = [mainroot]
|
||||
|
||||
|
@ -193,7 +196,12 @@ def handle_dir(logger, path, exclude):
|
|||
target = os.readlink(absname)
|
||||
obj[IDX_TARGET] = target
|
||||
elif isfile:
|
||||
obj[IDX_SHA256] = hash_file(absname)
|
||||
file_hash = hash_file(absname)
|
||||
filename = file_hash[0:HASH_LENGTH] + ".bin"
|
||||
existing = filename_to_hash.get(filename)
|
||||
assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash)
|
||||
filename_to_hash[filename] = file_hash
|
||||
obj[IDX_FILENAME] = filename
|
||||
|
||||
while obj[-1] is None:
|
||||
obj.pop()
|
||||
|
@ -206,6 +214,7 @@ def handle_dir(logger, path, exclude):
|
|||
|
||||
def handle_tar(logger, tar):
|
||||
mainroot = []
|
||||
filename_to_hash = {}
|
||||
total_size = 0
|
||||
|
||||
for member in tar.getmembers():
|
||||
|
@ -230,7 +239,12 @@ def handle_tar(logger, tar):
|
|||
if member.isfile() or member.islnk():
|
||||
obj[IDX_MODE] |= S_IFREG
|
||||
f = tar.extractfile(member)
|
||||
obj[IDX_SHA256] = hash_fileobj(f)
|
||||
file_hash = hash_fileobj(f)
|
||||
filename = file_hash[0:HASH_LENGTH] + ".bin"
|
||||
existing = filename_to_hash.get(filename)
|
||||
assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash)
|
||||
filename_to_hash[filename] = file_hash
|
||||
obj[IDX_FILENAME] = filename
|
||||
if member.islnk():
|
||||
# fix size for hard links
|
||||
f.seek(0, os.SEEK_END)
|
||||
|
|
Loading…
Reference in a new issue