Files
entropy/services/repository-webinstall-generator
Sławomir Nizio 41ba9b6d63 [*] be strict about Python modules paths being loaded
For convenience (seemingly, and it really is convenient) equo and other
tools can be run from the checkout, and Entropy modules are loaded from
the checkout. Now there is a strict separation when system paths and
when paths from the checkout are used.

It makes it a bit more robust, secure and preditable at the cost of
a little more complexity.

A pleasant side effect of this change is that it is not required to
change directory to the tool (to use non-system one), as paths in the
checkout are relative to scripts.

Imports in lib/tests were not adjusted.
2018-10-27 13:03:58 +02:00

590 lines
21 KiB
Python
Executable File

#!/usr/bin/python
import sys
import os
from os import path as osp
_base = osp.dirname(osp.dirname(osp.realpath(__file__)))
if os.path.isfile(osp.join(_base, "entropy-in-vcs-checkout")):
sys.path.insert(0, osp.join(_base, "entropy_path_loader"))
else:
sys.path.insert(0, "/usr/lib/entropy/entropy_path_loader")
del osp
import entropy_path_loader
import time
import tempfile
import errno
import bz2
from entropy.i18n import _
from entropy.output import print_info, blue, teal, brown, darkgreen, purple, \
print_error, print_warning, TextInterface
from entropy.exceptions import SystemDatabaseError
from entropy.client.interfaces.db import GenericRepository
from entropy.qa import QAInterface
from entropy.core.settings.base import SystemSettings
from entropy.exceptions import EntropyException
from entropy.const import const_convert_to_rawstring, etpConst
from entropy.locks import SimpleFileLock
import entropy.dep
import entropy.tools
class WebinstallGenerator(TextInterface):
SHELL_PREAMBLE = const_convert_to_rawstring("""\
#!/bin/sh
function execute_app() {
equo install "$0" $@
return ${?}
}
execute_app $@
exit ${?}
"""+ etpConst['databasestarttag'])
class CalculationError(EntropyException):
"""Raised when an error occurred while calculating the work queue"""
def __init__(self, repository_id, entropy_repository, package_dirs,
mirror_urls, regenerate = False):
self._regenerate = regenerate
self._repo_id = repository_id
self._repo = entropy_repository
self._package_dirs = package_dirs
# this is part of the (unwritten) specification, don't change it!
self._mirror_urls_str = "\n".join(mirror_urls)
self._qa = QAInterface()
def __copy_data(self, f_obj_source, f_obj_dest):
while True:
chunk = f_obj_source.read(16384)
if not chunk:
break
f_obj_dest.write(chunk)
f_obj_dest.flush()
def _calculate_required_actions(self):
"""
Compute the list of package identifiers requiring a webinstall package
generation and collect the expired ones (that would be removed).
"""
package_ids = self._repo.listAllPackageIds()
download_map = dict((x, self._repo.retrieveDownloadURL(x)) for x in
package_ids)
package_dirs_cache = {}
expired_webinstall_files = set()
work_queue = []
max_count = len(package_ids)
count = 0
def _determine_local_package_path(package_id):
download_path = download_map[package_id]
download_path_dir, download_file_name = os.path.split(download_path)
local_package_dir = package_dirs_cache.get(download_path_dir)
if local_package_dir is None:
for package_dir in self._package_dirs:
if package_dir.endswith(download_path_dir):
local_package_dir = package_dir
package_dirs_cache[download_path_dir] = package_dir
break
if local_package_dir is None:
self.output("%s: %s" % (
brown("Cannot find local package directory for"),
download_path,
),
header = teal(" @@ "),
importance = 1, back = True,
level = "error"
)
raise WebinstallGenerator.CalculationError(
"cannot find local package dir for: %s" % (download_path,))
local_package_path = os.path.join(local_package_dir,
download_file_name)
return local_package_path
def _determine_local_etp_path(local_package_dir, package_id):
atom, category, name, version, slot, tag, revision, branch, \
etpapi = self._repo.getScopeData(package_id)
version += "%s%s" % (etpConst['entropyrevisionprefix'], revision,)
local_etp_fn = entropy.dep.create_package_relative_path(category,
name, version, tag, ext = etpConst['packagesext_webinstall'])
local_etp_path = os.path.join(local_package_dir, local_etp_fn)
return local_etp_path
def _validate_etp_creation(local_package_path, local_etp_path, overwrite):
if self._regenerate:
# always generate the file in this case
return True
if not os.path.isfile(local_package_path) and \
os.path.isfile(local_etp_path):
expired_webinstall_files.add(local_etp_path)
return False
if not os.path.isfile(local_package_path):
# local_etp_path does not exist and package file is not
# available, skip!
return False
if not overwrite:
if os.path.isfile(local_etp_path):
# already available
return False
return True
work_queue_cache = set()
for package_id in sorted(package_ids, reverse = True):
count += 1
download_path = download_map[package_id]
if (count % 150 == 0) or (count == 0) or (count == max_count):
self.output("%s: %s" % (purple("scanning"), download_path),
header = teal(" @@ "),
count = (count, max_count),
importance = 0,
back = True)
# don't enqueue the same package id twice
if package_id in work_queue_cache:
continue
local_package_path = _determine_local_package_path(package_id)
local_package_dir = os.path.dirname(local_package_path)
local_etp_path = _determine_local_etp_path(local_package_dir,
package_id)
valid = _validate_etp_creation(local_package_path,
local_etp_path, False)
if not valid:
continue
work_queue.append((package_id, local_package_path, local_etp_path))
work_queue_cache.add(package_id)
# also pull in its reverse dependencies
# this to ensure that packages referencing this packages will
# have updated metadata.
# in this case, etp files are going to be overwritten
revdeps = self._repo.retrieveReverseDependencies(package_id)
revdeps = [x for x in revdeps if x not in work_queue_cache]
for rev_pkg_id in revdeps:
local_package_path = _determine_local_package_path(rev_pkg_id)
local_package_dir = os.path.dirname(local_package_path)
local_etp_path = _determine_local_etp_path(local_package_dir,
rev_pkg_id)
valid = _validate_etp_creation(local_package_path,
local_etp_path, True)
if valid:
work_queue.append((rev_pkg_id, local_package_path,
local_etp_path))
work_queue_cache.add(rev_pkg_id)
# collect really expired .etp files
ext = etpConst['packagesext_webinstall']
for package_dir in self._package_dirs:
if not os.path.isdir(package_dir):
continue
for cur_dir, sub_dirs, files in os.walk(package_dir):
for etp_file in files:
if not etp_file.endswith(ext):
continue
etp_file = os.path.join(package_dir, etp_file)
pkg_file = etp_file[:-len(ext)] + etpConst['packagesext']
if not os.path.isfile(pkg_file):
# we can drop our .etp
expired_webinstall_files.add(etp_file)
return work_queue, expired_webinstall_files
def _cleanup_expired_files(self, expired_webinstall_files):
"""
Cleanup routine that removes expired webinstall package files.
"""
for expired_file in sorted(expired_webinstall_files):
try:
os.remove(expired_file)
except OSError as err:
self.output("%s %s: %s" % (
teal("cannot remove"),
expired_file,
repr(err),
),
header = brown(" @@ "),
level = "warning",
importance = 1
)
def _prepare_base_package_repository(self):
"""
Prepare an empty Entropy Repository that will be used as base
for embedding package metadata.
"""
treeupdates_actions = self._repo.listAllTreeUpdatesActions()
# generate empty repository file and re-use it every time
# this improves the execution a lot
orig_fd, tmp_repo_orig_path = tempfile.mkstemp(
suffix="repo-webinst-gen")
try:
empty_repo = GenericRepository(
readOnly = False,
dbFile = tmp_repo_orig_path,
name = "empty",
xcache = False,
indexing = False,
skipChecks = True)
empty_repo.initializeRepository()
empty_repo.bumpTreeUpdatesActions(treeupdates_actions)
empty_repo.commit()
empty_repo.close()
return tmp_repo_orig_path
except Exception:
os.remove(tmp_repo_orig_path)
raise
finally:
os.close(orig_fd)
def _generate_webinstall_package(self, base_repository_path,
package_id, package_path, etp_path, cache_map):
"""
Generate a webinstall package for given package_id matched inside
the working Entropy Repository instance passed at constructor time.
If no exceptions are raised, the generation went successful.
The webinstall file generation is atomic.
"""
# handle caching
cache_map_len_threshold = 800
cache_map_len = len(cache_map)
if cache_map_len > cache_map_len_threshold:
to_remove = cache_map_len_threshold - cache_map_len
# LRU logic ! yay!
for key in sorted(cache_map.keys(), key = lambda x: cache_map[x][0]):
if to_remove < 1:
break
to_remove -= 1
cache_map.pop(key)
# WARNING: usage of undocumented feature of
# get_deep_dependency_list
# NOTE: how about build deps?
pkg_match = (package_id, self._repo)
matches = self._qa.get_deep_dependency_list(None, pkg_match)
tmp_fd, tmp_repo_path = tempfile.mkstemp(
suffix="repo-webinst-gen")
with os.fdopen(tmp_fd, "wb") as tmp_repo_f:
with open(base_repository_path, "rb") as tmp_repo_source_f:
self.__copy_data(tmp_repo_source_f, tmp_repo_f)
dest_repo = None
compressed_tmp_path = None
compressed_fd = None
try:
repo_arch = self._repo.getSetting("arch")
atom = self._repo.retrieveAtom(package_id)
dest_repo = GenericRepository(
readOnly = False,
dbFile = tmp_repo_path,
name = atom,
xcache = False,
indexing = False,
skipChecks = True)
deps_pkg_ids = set([pkg_id for pkg_id, _repo in matches])
deps_pkg_ids.add(package_id)
for dep_package_id in deps_pkg_ids:
cached = cache_map.get(dep_package_id)
if cached is None:
data = self._repo.getPackageData(dep_package_id,
get_content = False, get_changelog = False)
cache_map[dep_package_id] = (time.time(), data)
else:
cache_t, data = cached
if "original_repository" in data:
del data['original_repository']
dest_package_id = dest_repo.addPackage(data,
revision = data['revision'],
formatted_content = True)
source = etpConst['install_sources']['unknown']
if dep_package_id == package_id:
source = etpConst['install_sources']['user']
# required in order to make mirror URL to be
# resolved correctly, and, to make only the main
# package to be pulled in for install.
dest_repo.storeInstalledPackage(dest_package_id,
self._repo_id, source = source)
dest_repo._setSetting("plain_packages",
self._mirror_urls_str)
dest_repo._setSetting("arch", repo_arch)
dest_repo.commit()
dest_repo.close()
dest_repo = None
# ready to bzip2
compressed_fd = None
compressed_tmp_path = None
try:
compressed_fd, compressed_tmp_path = tempfile.mkstemp(
suffix="repo-webinst-gen")
entropy.tools.compress_file(tmp_repo_path,
compressed_tmp_path, bz2.BZ2File)
try:
os.rename(compressed_tmp_path, tmp_repo_path)
except OSError as err:
if err.errno != errno.EXDEV:
raise
shutil.move(compressed_tmp_path, tmp_repo_path)
finally:
if compressed_fd is not None:
os.close(compressed_fd)
tmp_etp_path = etp_path + "._etp_work"
with open(tmp_etp_path, "wb") as etp_f:
etp_f.write(WebinstallGenerator.SHELL_PREAMBLE)
with open(tmp_repo_path, "rb") as bin_f:
while True:
chunk = bin_f.read(16384)
if not chunk:
break
etp_f.write(chunk)
bin_f.flush()
etp_f.flush()
os.rename(tmp_etp_path, etp_path)
finally:
if compressed_fd is not None:
try:
os.close(compressed_fd)
except OSError:
pass
if compressed_tmp_path is not None:
try:
os.remove(compressed_tmp_path)
except (IOError, OSError):
pass
if dest_repo is not None:
dest_repo.close()
try:
os.close(tmp_fd)
except (OSError, IOError):
pass
try:
os.remove(tmp_repo_path)
except (OSError, IOError):
pass
def sync(self):
self.output(purple("Scanning..."),
header = teal(" @@ "),
importance = 1, back = True)
work_queue, expired_webinstall_files = \
self._calculate_required_actions()
if not (work_queue or expired_webinstall_files):
# nothing to do
self.output(purple("Nothing to do."),
header = teal(" @@ "),
importance = 1)
return True
if work_queue:
self.output(purple("Generating web-install packages..."),
header = teal(" @@ "),
importance = 1)
tmp_repo_orig_path = self._prepare_base_package_repository()
pkg_data_cache = {}
max_count = len(work_queue)
count = 0
try:
for package_id, package_path, etp_path in work_queue:
count += 1
atom = self._repo.retrieveAtom(package_id)
self.output("%s: %s" % (purple("generating for"), atom),
header = teal(" @@ "),
count = (count, max_count),
importance = 0,
back = True)
self._generate_webinstall_package(tmp_repo_orig_path,
package_id, package_path, etp_path, pkg_data_cache)
self.output("%s: %s" % (purple("generated"), etp_path),
header = teal(" @@ "),
count = (count, max_count),
importance = 0)
finally:
try:
os.remove(tmp_repo_orig_path)
except (OSError, IOError):
pass
# help the garbage collector
for key in list(pkg_data_cache.keys()):
del pkg_data_cache[key]
pkg_data_cache.clear()
del pkg_data_cache
if expired_webinstall_files:
self._cleanup_expired_files(expired_webinstall_files)
return True
def _print_help(args):
app_name = os.path.basename(sys.argv[0])
print_info("%s - %s" % (blue(app_name),
teal(_("Repository web-install packages generator tool")),))
print_info(" %s:\t%s %s" % (
purple(_("generate packages")),
brown(app_name),
darkgreen("generate [--regen] <repository id> <repository file path> <packages dirs [list]> -- [<mirror urls [list]>]"))
)
print_info(" %s = %s" % (
teal("<packages dirs [list]>"),
_("dirs where package files are storied for repository"),)
)
print_info(" %s = %s" % (
teal("<mirror urls [list]>"),
_("list of package mirror urls (not mandatory)"),)
)
print_info(" %s = %s" % (
teal("--regen"),
_("regenerate all the package files"),)
)
print_info(" %s:\t\t%s %s" % (purple(_("this help")), brown(app_name),
darkgreen("help")))
if not args:
return 1
return 0
def _generate(args):
regenerate = False
if "--regen" in args:
regenerate = True
args.remove("--regen")
if not args:
print_error(brown(_("Invalid arguments")))
return 1
repository_id = args.pop(0)
if not entropy.tools.validate_repository_id(repository_id):
print_error(brown(_("Invalid repository identifier.")))
return 1
if not args:
print_error(brown(_("Invalid Entropy repository file path")))
return 1
entropy_repository_path = args.pop(0)
entropy_repository_path_dir = os.path.dirname(entropy_repository_path)
if not (os.path.isdir(entropy_repository_path_dir) and \
os.access(entropy_repository_path_dir, os.W_OK | os.R_OK)):
print_error(brown(_("Invalid Entropy repository file path")))
return 1
packages_dirs = []
mirror_urls = []
do_packages_dirs = True
for arg in args:
if arg == "--":
do_packages_dirs = False
continue
elif do_packages_dirs:
packages_dirs.append(arg)
else:
mirror_urls.append(arg)
if not packages_dirs:
print_error(brown(_("Missing packages directories")))
return 1
if not mirror_urls:
# coupled with ETP_REPOSITORIES_CONF
print_info(brown(_("Using repositories.conf settings for mirrors")))
sys_set = SystemSettings()
mirror_urls = sys_set['repositories']['available'].get(
repository_id, {}).get('plain_packages', [])
if not mirror_urls:
print_error(brown(_("Missing mirror urls")))
return 1
for package_dir in packages_dirs:
if not (os.path.isdir(package_dir) and \
os.access(package_dir, os.R_OK | os.W_OK)):
print_error("%s: %s" % (
brown(_("Insufficient permissions")),
package_dir,))
return 1
lock_map = {}
# acquire lock
lock_file = entropy_repository_path + ".webinstall.lock"
acquired = False
try:
acquired = SimpleFileLock.acquire(lock_file, lock_map)
if not acquired:
print_error(brown(_("Another instance is running.")))
return 1
repo = GenericRepository(
dbFile = entropy_repository_path,
name = repository_id,
indexing = True,
readOnly = False,
xcache = True)
try:
repo.validate()
repo.integrity_check()
except SystemDatabaseError:
print_error(brown(_("Invalid repository.")))
return 1
# force indexing, if user is not in entropy group, indexing is
# forced to False.
repo.setIndexing(True)
repo.createAllIndexes()
generator = WebinstallGenerator(repository_id, repo, packages_dirs,
mirror_urls, regenerate = regenerate)
sts = generator.sync()
repo.close()
if sts:
return 0
return 1
finally:
if acquired:
SimpleFileLock.release(lock_file, lock_map)
if __name__ == "__main__":
args_map = {
'generate': _generate,
'help': _print_help,
'__fallback__': _print_help,
}
argv = sys.argv[1:]
if not argv:
argv.append("help")
cmd, args = argv[0], argv[1:]
func = args_map.get(cmd, args_map.get("__fallback__"))
rc = func(args)
raise SystemExit(rc)