From 831237b7fcfd77791b1cf744a5e4b8f95891c152 Mon Sep 17 00:00:00 2001 From: Fabio Erculiani Date: Wed, 20 Mar 2013 15:21:31 +0000 Subject: [PATCH] [entropy.server] insert the package SHA1 checksum into the file name This commit partially reverts commit d6b7a21314fdaa53563b399aa25feb34a817066e. Package revision is no longer incremented across package moves. This has two advantages: - much less traffic generated on the mirror infrastructure - less updates for sabayon-limbo users when packages are moved to main Bumping the revision was required by sabayon-weekly, which had the problem that some package files were replaced during normal activity on sabayonlinux.org and sabayon-limbo on the mirror. This generated checksum errors, thus adding the checksum in the package file name ensures that Entropy Server will never overwrite package file names unless the checksum also matches. Having a SHA1 checksum in the file name is also good for security, and we may even want to create a SHA1 from the GPG signature in future. --- client/solo/commands/pkg.py | 7 +- lib/entropy/client/interfaces/methods.py | 15 ++-- lib/entropy/dep.py | 56 ++++++++++-- lib/entropy/server/interfaces/main.py | 86 +++++++++---------- .../interfaces/portage_plugin/__init__.py | 3 +- lib/tests/dep.py | 37 ++++++-- services/entropy-pkgdelta-generator | 39 +++++++-- 7 files changed, 171 insertions(+), 72 deletions(-) diff --git a/client/solo/commands/pkg.py b/client/solo/commands/pkg.py index 8d8a19f4d..0e758d9f7 100644 --- a/client/solo/commands/pkg.py +++ b/client/solo/commands/pkg.py @@ -330,6 +330,10 @@ Execute advanced tasks on Entropy packages and the running system. # append development revision number # and create final package file name + sha1 = None + signatures = pkg_data.get('signatures') + if signatures is not None: + sha1 = signatures['sha1'] pkg_data['revision'] = etpConst['spmetprev'] download_dirpath = entropy.tools.create_package_dirpath( pkg_data['branch'], nonfree=False, restricted=False) @@ -337,7 +341,8 @@ Execute advanced tasks on Entropy packages and the running system. pkg_data['category'], pkg_data['name'], pkg_data['version'], pkg_data['versiontag'], ext=etpConst['packagesext'], - revision=pkg_data['revision']) + revision=pkg_data['revision'], + sha1=sha1) pkg_data['download'] = download_dirpath + "/" + download_name # migrate to the proper format diff --git a/lib/entropy/client/interfaces/methods.py b/lib/entropy/client/interfaces/methods.py index 51000fc8e..fa2f03473 100644 --- a/lib/entropy/client/interfaces/methods.py +++ b/lib/entropy/client/interfaces/methods.py @@ -2383,13 +2383,18 @@ class MiscMixin: elif not shiftpath: shiftpath = os.path.sep - version = entropy_package_metadata['version'] - version += "%s%s" % (etpConst['entropyrevisionprefix'], - entropy_package_metadata['revision'],) + signatures = entropy_package_metadata.get('signatures') + package_sha1 = None + if signatures: + package_sha1 = signatures['sha1'] pkgname = entropy.dep.create_package_filename( entropy_package_metadata['category'], - entropy_package_metadata['name'], version, - entropy_package_metadata['versiontag']) + entropy_package_metadata['name'], + entropy_package_metadata['version'], + entropy_package_metadata['versiontag'], + revision = entropy_package_metadata['revision'], + sha1 = package_sha1 + ) pkg_path = os.path.join(save_directory, pkgname) if os.path.isfile(pkg_path): diff --git a/lib/entropy/dep.py b/lib/entropy/dep.py index a6b7eebaf..6a8b08a91 100644 --- a/lib/entropy/dep.py +++ b/lib/entropy/dep.py @@ -716,8 +716,41 @@ def get_entropy_newer_version(versions): """ return _generic_sorter(versions, entropy_compare_versions) +sha1_re = re.compile(r"(.*)\.([a-f\d]{40})(.*)") +def get_entropy_package_sha1(package_name): + """ + Extract the SHA1 checksum from a package file name. + + @param package_name: package file name + @type package_name: string + @return: the package SHA1 checksum, if any, or None + @rtype: string or None + """ + match = sha1_re.match(package_name) + if match: + groups = match.groups() + if len(groups) != 3: + return + return groups[1] + +def remove_entropy_package_sha1(package_name): + """ + Remove the SHA1 checksum from a package file name. + + @param package_name: package file name + @type package_name: string + """ + match = sha1_re.match(package_name) + if match: + groups = match.groups() + if len(groups) != 3: + return package_name + return groups[0] + return package_name + def create_package_filename(category, name, version, package_tag, - ext = None, revision = None): + ext = None, revision = None, + sha1 = None): """ Create package filename string. @@ -731,6 +764,8 @@ def create_package_filename(category, name, version, package_tag, @type package_tag: string or None @keyword ext: alternative package file extension @type ext: string + @keyword sha1: a SHA1 checksum to add to the file name + @type sha1: string @return: package file name string @rtype: string """ @@ -743,6 +778,8 @@ def create_package_filename(category, name, version, package_tag, package_name += package_tag if ext is None: ext = etpConst['packagesext'] + if sha1 is not None: + package_name += ".%s" % (sha1,) if revision is not None: package_name += "~%d" % (revision,) package_name += ext @@ -762,10 +799,10 @@ def strip_entropy_package_extension(pkg_path): def exploit_package_filename(package_name): """ This is the inverse function of create_package_filename, and returns - a tuple composed by category, name, version, package_tag (None if not set) - and additional revision (as int). + a tuple composed by category, name, version, package_tag (None if not set), + SHA1 checksum (None if not set), and additional revision (as int). package_name should be a string like this: - :-[~[#]][.tbz2] + :-[.][~[#]][.tbz2] @param package_name: package file name @type package_name: string @@ -777,17 +814,20 @@ def exploit_package_filename(package_name): pkg_str = strip_entropy_package_extension(package_name) pkg_str = pkg_str.replace(":", "/") pkg_str = strip_entropy_package_extension(pkg_str) - etp_tag = dep_gettag(pkg_str) - pkg_str = remove_tag(pkg_str) etp_rev = dep_get_entropy_revision(pkg_str) pkg_str = remove_entropy_revision(pkg_str) + etp_sha1 = get_entropy_package_sha1(pkg_str) + pkg_str = remove_entropy_package_sha1(pkg_str) + etp_tag = dep_gettag(pkg_str) + pkg_str = remove_tag(pkg_str) split_data = catpkgsplit(pkg_str) if split_data is None: - raise AttributeError("invalid package name passed: %s" % (package_name,)) + raise AttributeError("invalid package name passed: %s" % ( + package_name,)) etp_cat, etp_name, ver, rev = split_data if rev != "r0": ver += "-" + rev - return etp_cat, etp_name, ver, etp_tag, etp_rev + return etp_cat, etp_name, ver, etp_tag, etp_sha1, etp_rev def create_package_atom_string(category, name, version, package_tag): """ diff --git a/lib/entropy/server/interfaces/main.py b/lib/entropy/server/interfaces/main.py index 755ac3c01..2621354d6 100644 --- a/lib/entropy/server/interfaces/main.py +++ b/lib/entropy/server/interfaces/main.py @@ -2751,29 +2751,6 @@ class Server(Client): ) return None - # determine the maximum revision for package, this avoids - # overwriting tbz2s already on mirrors, see bug #3904. - def _get_rev(rev_repo_id): - repo = self.open_repository(rev_repo_id) - pkg_ids = repo.getPackageIds(match_atom) - revs = [-1] # so that revs is not empty - revs += [repo.retrieveRevision(x) for x in pkg_ids] - return max(revs) - - # given that we look into the current repo as well, this - # cannot be -1 - max_rev = max([_get_rev(x) for x in self.repositories()]) - assert max_rev != -1, "max_rev cannot be -1" - # increase by one and we have the new target revision we must use - new_package_revision = max_rev + 1 - - to_package_rel_path = entropy.dep.create_package_filename( - dbconn.retrieveCategory(package_id), - dbconn.retrieveName(package_id), - dbconn.retrieveVersion(package_id), - dbconn.retrieveTag(package_id), - revision = new_package_revision) - # we need to ask SpmPlugin to re-extract metadata from pkg file # and grab the new "download" metadatum value using our # license check callback. It has to be done here because @@ -2789,7 +2766,7 @@ class Server(Client): # the logic. updated_package_rel_path = os.path.join( os.path.dirname(tmp_data['download']), - os.path.basename(to_package_rel_path)) + os.path.basename(package_rel_path)) del tmp_data to_file = self.complete_local_upload_package_path( @@ -2797,12 +2774,19 @@ class Server(Client): if new_tag is not None: + signatures = dbconn.retrieveSignatures(package_id) + packge_sha1 = None + if signatures: + package_sha1, _ignore, _ignore, _ignore = signatures + tagged_package_filename = \ entropy.dep.create_package_filename( dbconn.retrieveCategory(package_id), dbconn.retrieveName(package_id), dbconn.retrieveVersion(package_id), - new_tag, revision = new_package_revision) + new_tag, + revision = dbconn.retrieveRevision(package_id), + sha1 = package_sha1) to_file = self.complete_local_upload_package_path( updated_package_rel_path, to_repository_id) @@ -2871,8 +2855,6 @@ class Server(Client): # need to set back data['download'], because pkg path might got # changed, due to license re-validation data['download'] = updated_package_rel_path - # force our own revision, to avoid file name collisions - data['revision'] = new_package_revision # GPG # before inserting new pkg, drop GPG signature and re-sign @@ -2922,8 +2904,7 @@ class Server(Client): ) data['original_repository'] = to_repository_id # force our own revision, to avoid file name collisions - new_package_id = todbconn.handlePackage( - data, forcedRevision = new_package_revision) + new_package_id = todbconn.handlePackage(data) del data todbconn.commit() @@ -3142,7 +3123,6 @@ class Server(Client): def _inject_database_into_packages(self, repository_id, injection_data): - # now inject metadata into tbz2 packages self.output( "[%s] %s:" % ( darkgreen(repository_id), @@ -3258,7 +3238,19 @@ class Server(Client): dbconn.setSignatures(idpackage, signatures['sha1'], signatures['sha256'], signatures['sha512'], gpg_sign) + + # recompute the package file name and download url + # to match the final SHA1. + download_url = self._setup_repository_package_filename( + dbconn, idpackage) + package_dir = os.path.dirname(package_path) + new_package_path = os.path.join( + package_dir, os.path.basename(download_url)) + os.rename(package_path, new_package_path) + package_path = new_package_path + dbconn.commit() + const_setup_file(package_path, etpConst['entropygid'], 0o664) self.output( "[%s|%s] %s: %s" % ( @@ -5656,26 +5648,32 @@ class Server(Client): destination_paths.reverse() return idpackage, destination_paths - def _setup_repository_package_filename(self, dbconn, idpackage): + def _setup_repository_package_filename(self, repo, package_id): """ Setup a new repository file name using current package metadata. """ + category = repo.retrieveCategory(package_id) + name = repo.retrieveName(package_id) + version = repo.retrieveVersion(package_id) + tag = repo.retrieveTag(package_id) + revision = repo.retrieveRevision(package_id) + signatures = repo.retrieveSignatures(package_id) + sha1 = None + if signatures: + sha1, _ignore, _ignore, _ignore = signatures - downloadurl = dbconn.retrieveDownloadURL(idpackage) - packagerev = dbconn.retrieveRevision(idpackage) - downloaddir = os.path.dirname(downloadurl) - downloadfile = os.path.basename(downloadurl) - # add revision - pkg_ext = etpConst['packagesext'] - downloadfile = downloadfile[:-len(pkg_ext)]+"~%s%s" % (packagerev, - pkg_ext,) - downloadurl = os.path.join(downloaddir, downloadfile) + old_download_url = repo.retrieveDownloadURL(package_id) + download_dir = os.path.dirname(old_download_url) - # update url - dbconn.setDownloadURL(idpackage, downloadurl) - dbconn.commit() + package_name = entropy.dep.create_package_filename( + category, name, version, tag, ext = etpConst['packagesext'], + revision = revision, sha1 = sha1) - return downloadurl + download_url = os.path.join(download_dir, package_name) + repo.setDownloadURL(package_id, download_url) + repo.commit() + + return download_url def __user_filter_out_missing_deps(self, pkg_repo, entropy_repository, missing_map, ask): diff --git a/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py b/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py index 8779f80e4..907a0db46 100644 --- a/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py +++ b/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py @@ -1642,7 +1642,8 @@ class PortagePlugin(SpmPlugin): data['download'] = os.path.join(data['download'], entropy.dep.create_package_filename( data['category'], data['name'], data['version'], - data['versiontag'])) + data['versiontag'], + sha1=data['signatures']['sha1'])) # removing temporary directory shutil.rmtree(tmp_dir, True) diff --git a/lib/tests/dep.py b/lib/tests/dep.py index 869664df0..2a1676973 100644 --- a/lib/tests/dep.py +++ b/lib/tests/dep.py @@ -142,13 +142,28 @@ class DepTest(unittest.TestCase): self.assertEqual(et.get_entropy_newer_version(vers), out_vers) def test_create_package_filename(self): - category = "app-foo" - name = "foo" - version = "1.2.3" + package_category = "app-foo" + package_name = "foo" + package_version = "1.2.3" package_tag = "abc" - result = 'app-foo:foo-1.2.3#abc.tbz2' - self.assertEqual(et.create_package_filename(category, name, version, - package_tag), result) + package_sha1 = "c85320d9ddb90c13f4a215f1f0a87b531ab33310" + package_rev = 123 + + result = "app-foo:foo-1.2.3#abc.c85320d9ddb90c13f4a215f1f0a87b531ab33310~123.tbz2" + self.assertEqual(et.create_package_filename( + package_category, package_name, package_version, + package_tag, revision = package_rev, + sha1 = package_sha1), result) + + # verify the inverse function + cat, name, ver, tag, sha1, rev = et.exploit_package_filename( + result) + self.assertEqual(cat, package_category) + self.assertEqual(name, package_name) + self.assertEqual(ver, package_version) + self.assertEqual(tag, package_tag) + self.assertEqual(sha1, package_sha1) + self.assertEqual(rev, package_rev) def test_create_package_atom_string(self): category = "app-foo" @@ -234,6 +249,16 @@ class DepTest(unittest.TestCase): result, outcome = parser.parse() self.assertEqual(outcome, expected_outcome) + def test_get_entropy_package_sha1(self): + names = [ + ("app-foo:bar-123.eda9a5004ce8eb127d939de6ec394571a407f863~1.tbz2", + "eda9a5004ce8eb127d939de6ec394571a407f863"), + ] + + for name, expected_outcome in names: + outcome = et.get_entropy_package_sha1(name) + self.assertEqual(outcome, expected_outcome) + if __name__ == '__main__': unittest.main() et.kill_threads() diff --git a/services/entropy-pkgdelta-generator b/services/entropy-pkgdelta-generator index fc976ba1c..554547834 100755 --- a/services/entropy-pkgdelta-generator +++ b/services/entropy-pkgdelta-generator @@ -28,13 +28,14 @@ def generate_pkg_map(packages_directory): if not pkg_file.endswith(etpConst['packagesext']): continue try: - cat, name, ver, tag, rev = entropy.dep.exploit_package_filename( + (cat, name, ver, tag, + sha1, rev) = entropy.dep.exploit_package_filename( pkg_file) except AttributeError: # skip invalid crap continue obj = pkg_map.setdefault((cat, name), set()) - obj.add((ver, tag, rev, pkg_file)) + obj.add((ver, tag, sha1, rev, pkg_file)) return pkg_map def sort_packages(pkg_map_items): @@ -42,20 +43,44 @@ def sort_packages(pkg_map_items): Sort packages by version, tag, revision and return a sort map (dict) and a sorted list of them (list) """ - def _generate_from_to(cat_name_map, sorted_pkg_list): + cat_name_map = {} + + def _generate_from_to(sorted_pkg_list): for pkg_idx in range(len(sorted_pkg_list)): pkg_key = sorted_pkg_list[pkg_idx] next_pkgs = set(sorted_pkg_list[pkg_idx:]) next_pkgs.discard(pkg_key) sorted_next = sorted(next_pkgs, key = lambda x: cat_name_map[x]) + + ver_tag_rev = pkg_key[0], pkg_key[1], pkg_key[3] for next_pkg_key in sorted_next: + next_ver_tag_rev = (next_pkg_key[0], next_pkg_key[1], + next_pkg_key[3]) + if ver_tag_rev == next_ver_tag_rev: + # do not create an edelta between packages + # with the same version tag and revision. + continue yield (cat_name_map[pkg_key], cat_name_map[next_pkg_key]) - cat_name_map = dict((((ver, tag, rev), pkg_path) \ - for ver, tag, rev, pkg_path in pkg_map_items)) - sorted_pkgs = entropy.dep.get_entropy_newer_version(list(cat_name_map)) + sort_name_map = {} + sort_pkgs = set() + for ver, tag, sha1, rev, pkg_path in pkg_map_items: + full_key = (ver, tag, sha1, rev) + cat_name_map[full_key] = pkg_path + key = (ver, tag, rev) + sort_pkgs.add(key) + + obj = sort_name_map.setdefault(key, set()) + obj.add(full_key) + + sorted_pkgs = entropy.dep.get_entropy_newer_version( + list(sort_pkgs)) sorted_pkgs.reverse() - return _generate_from_to(cat_name_map, sorted_pkgs) + + full_sorted_pkgs = [] + for key in sorted_pkgs: + full_sorted_pkgs.extend(sort_name_map[key]) + return _generate_from_to(full_sorted_pkgs) def generate_package_deltas(directory, quiet): """