From 251d1afca33e91f5a268dd6eb89f47afe8a48e06 Mon Sep 17 00:00:00 2001 From: Fabio Erculiani Date: Sat, 28 Jul 2012 00:21:28 +0200 Subject: [PATCH] [entropy.tools] use less memory when calling uncompress_tarball with huge tarballs --- lib/entropy/tools.py | 60 +++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/lib/entropy/tools.py b/lib/entropy/tools.py index a1ef5e16f..b9ddfa50a 100644 --- a/lib/entropy/tools.py +++ b/lib/entropy/tools.py @@ -2137,6 +2137,30 @@ def uncompress_tarball(filepath, extract_path = None, catch_empty = False): if not os.path.isfile(filepath): raise FileNotFound('FileNotFound: archive does not exist') + def _setup_file_metadata(tarinfo, epath): + try: + tar.chown(tarinfo, epath) + _fix_uid_gid(tarinfo, epath) + + # no longer touch utime using Tarinfo, behaviour seems + # buggy and introduces an unwanted delay on some conditions. + # match /bin/tar behaviour to not fuck touch mtime/atime at all + # I wonder who are the idiots who didn't even test how + # tar.utime behaves. Or perhaps it's just me that I've found + # a new bug. Issue is, packages are prepared on PC A, and + # mtime is checked on PC B. + # tar.utime(tarinfo, epath) + + # mode = tarinfo.mode + # xorg-server /usr/bin/X symlink of /usr/bin/Xorg + # which is setuid. Symlinks don't need chmod. PERIOD! + if not os.path.islink(epath): + tar.chmod(tarinfo, epath) + + except tarfile.ExtractError: + if tar.errorlevel > 1: + raise + is_python_3 = const_is_python3() tar = None extracted_something = False @@ -2159,7 +2183,6 @@ def uncompress_tarball(filepath, extract_path = None, catch_empty = False): deleter_counter = 3 for tarinfo in tar: epath = os.path.join(encoded_path, tarinfo.name) - entries.append((tarinfo, epath,)) if tarinfo.isdir(): # Extract directory with a safe mode, so that @@ -2174,6 +2197,18 @@ def uncompress_tarball(filepath, extract_path = None, catch_empty = False): set_attrs=not tarinfo.isdir()) else: tar.extract(tarinfo, encoded_path) + + if tarinfo.isreg(): + # apply metadata to files instantly + # not wasting RAM growing entries. + _setup_file_metadata(tarinfo, epath) + else: + # delay file metadata setup for dirs + # or syms that might be dirs or other + # things. This because entries can grow + # big and use a lot of RAM. + entries.append((tarinfo, epath)) + extracted_something = True if not is_python_3: @@ -2194,28 +2229,7 @@ def uncompress_tarball(filepath, extract_path = None, catch_empty = False): # we need to check both files and directories because # we have to fix uid and gid from broken archives for tarinfo, epath in entries: - try: - tar.chown(tarinfo, epath) - _fix_uid_gid(tarinfo, epath) - - # no longer touch utime using Tarinfo, behaviour seems - # buggy and introduces an unwanted delay on some conditions. - # match /bin/tar behaviour to not fuck touch mtime/atime at all - # I wonder who are the idiots who didn't even test how - # tar.utime behaves. Or perhaps it's just me that I've found - # a new bug. Issue is, packages are prepared on PC A, and - # mtime is checked on PC B. - # tar.utime(tarinfo, epath) - - # mode = tarinfo.mode - # xorg-server /usr/bin/X symlink of /usr/bin/Xorg - # which is setuid. Symlinks don't need chmod. PERIOD! - if not os.path.islink(epath): - tar.chmod(tarinfo, epath) - - except tarfile.ExtractError: - if tar.errorlevel > 1: - raise + _setup_file_metadata(tarinfo, epath) except EOFError: return -1