From a0d8c18b49ddfefd2dd79386c6138720ac1fd8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Nizio?= Date: Wed, 14 Nov 2018 09:10:34 +0100 Subject: [PATCH] [entropy.spm] speed up xpak finding Now it's chunk based, so fewer reads and seeks, and searching itself is improved as well. --- .../interfaces/portage_plugin/xpaktools.py | 73 ++++++++++--------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/lib/entropy/spm/plugins/interfaces/portage_plugin/xpaktools.py b/lib/entropy/spm/plugins/interfaces/portage_plugin/xpaktools.py index 399b70303..6da07d422 100644 --- a/lib/entropy/spm/plugins/interfaces/portage_plugin/xpaktools.py +++ b/lib/entropy/spm/plugins/interfaces/portage_plugin/xpaktools.py @@ -2,6 +2,7 @@ """ @author: Fabio Erculiani + @author: Slawomir Nizio @contact: lxnay@sabayon.org @copyright: Fabio Erculiani @license: GPL-2 @@ -107,52 +108,58 @@ def suck_xpak(tbz2file, xpakpath): @return: @rtype: """ + if const_is_python3(): + xpak_end = b"XPAKSTOP" + xpak_start = b"XPAKPACK" + else: + xpak_end = "XPAKSTOP" + xpak_start = "XPAKPACK" + + chunk_size = 2048 + + # Sanity check: makes the position calculations easier (seek_length below). + assert len(xpak_end) == len(xpak_start) + old, db = None, None try: old = open(tbz2file, "rb") db = open(xpakpath, "wb") - # position old to the end - old.seek(0, os.SEEK_END) - # read backward until we find - n_bytes = old.tell() - counter = n_bytes - 1 - if const_is_python3(): - xpak_end = b"XPAKSTOP" - xpak_start = b"XPAKPACK" - xpak_entry_point = b"X" - else: - xpak_end = "XPAKSTOP" - xpak_start = "XPAKPACK" - xpak_entry_point = "X" - - xpak_tag_len = len(xpak_start) - chunk_len = 3 data_start_position = None data_end_position = None + # position old to the end + old.seek(0, os.SEEK_END) + n_bytes = old.tell() - while counter >= (0 - chunk_len): + chunk_size = min(chunk_size, n_bytes) - old.seek(counter - n_bytes, os.SEEK_END) - if (n_bytes - (abs(counter - n_bytes))) < chunk_len: - chunk_len = 1 - read_bytes = old.read(chunk_len) - read_len = len(read_bytes) + # position one chunk from the end, then continue + seek_pos = n_bytes - chunk_size - entry_idx = read_bytes.rfind(xpak_entry_point) - if entry_idx != -1: + while True: + old.seek(seek_pos, os.SEEK_SET) + read_bytes = old.read(chunk_size) - cut_gotten = read_bytes[entry_idx:] - offset = xpak_tag_len - len(cut_gotten) - chunk = cut_gotten + old.read(offset) + end_idx = read_bytes.rfind(xpak_end) + if end_idx != -1: + if data_start_position is None: + data_end_position = seek_pos + end_idx + len(xpak_end) + # avoid START after END in rfind() + read_bytes = read_bytes[:end_idx] - if (chunk == xpak_end) and (data_start_position is None): - data_end_position = old.tell() - - elif (chunk == xpak_start) and (data_end_position is not None): - data_start_position = old.tell() - xpak_tag_len + start_idx = read_bytes.rfind(xpak_start) + if start_idx != -1: + if data_end_position is not None: + data_start_position = seek_pos + start_idx break - counter -= read_len + if seek_pos == 0: + break + + # Make sure the seeks are so that there is enough overlap. + seek_length = chunk_size - (len(xpak_start) - 1) + seek_pos -= seek_length + if seek_pos < 0: + seek_pos = 0 if data_start_position is None: return False