[entropy.spm] improve handling of non-ascii paths (e.g. puppet-agent).
It looks like Portage now stores unicode paths correctly in its metadata as opposed to what it used to be. We need to make sure that we parse those "CONTENTS" file and content metadata in general using the correct encoding. This will allow us to store and retrieve such metadata from the sqlite3 database correctly and also match the stored paths with the filesystem paths exactly. This commit may need a bit more real-life testing. Backward compat wrt old Entropy and Portage tbz2 files should be as expected. Unit tests attached.
This commit is contained in:
@@ -1410,7 +1410,11 @@ class PortagePlugin(SpmPlugin):
|
||||
data['datecreation'] = str(os.path.getmtime(package_file))
|
||||
data['size'] = str(entropy.tools.get_file_size(package_file))
|
||||
|
||||
tmp_dir = const_mkdtemp(prefix="entropy.spm._extract")
|
||||
# This allows os.* functions on Python2 to use unicode, correctly.
|
||||
# See the issues with puppet-agent (unit tests in db.py).
|
||||
tmp_dir = const_convert_to_unicode(
|
||||
const_mkdtemp(prefix="entropy.spm._extract"),
|
||||
enctype = sys.getfilesystemencoding())
|
||||
meta_dir = os.path.join(tmp_dir, "portage")
|
||||
pkg_dir = os.path.join(tmp_dir, "pkg")
|
||||
os.mkdir(meta_dir)
|
||||
@@ -3033,15 +3037,15 @@ class PortagePlugin(SpmPlugin):
|
||||
from portage.dbapi.vartree import write_contents
|
||||
|
||||
entropy_content_iter = entropy_package_metadata['content']
|
||||
sys_root = const_convert_to_rawstring(etpConst['systemroot'])
|
||||
# Make sure that we use the fs encoding. This works with both
|
||||
# old and new Entropy packages.
|
||||
sys_root = const_convert_to_unicode(
|
||||
etpConst['systemroot'], enctype=sys.getfilesystemencoding())
|
||||
content_meta = {}
|
||||
|
||||
try:
|
||||
for _package_id, _path, _ftype in entropy_content_iter:
|
||||
|
||||
_ftype = const_convert_to_rawstring(_ftype)
|
||||
path_orig = const_convert_to_rawstring(_path)
|
||||
path = sys_root + path_orig
|
||||
for _package_id, path, _ftype in entropy_content_iter:
|
||||
path = sys_root + path
|
||||
|
||||
is_sym = os.path.islink(path)
|
||||
if os.path.isfile(path) and not is_sym:
|
||||
@@ -4648,22 +4652,28 @@ class PortagePlugin(SpmPlugin):
|
||||
if os.path.isfile(content_file):
|
||||
|
||||
with open(content_file, "rb") as f:
|
||||
content = [const_convert_to_unicode(x) for x in f.readlines()]
|
||||
for line in f.readlines():
|
||||
try:
|
||||
# Modern Entropy/Portage correctly use unicode.
|
||||
line = const_convert_to_unicode(
|
||||
line, enctype=sys.getfilesystemencoding())
|
||||
except UnicodeDecodeError:
|
||||
# Support for very ancient Entropy or Portage packages.
|
||||
line = const_convert_to_unicode(
|
||||
line, enctype=etpConst['conf_raw_encoding'])
|
||||
|
||||
outcontent = set()
|
||||
for line in content:
|
||||
line = line.strip().split()
|
||||
try:
|
||||
line = line.strip().split(" ")
|
||||
datatype = line[0]
|
||||
datafile = line[1:]
|
||||
|
||||
if datatype == obj_t:
|
||||
datafile = datafile[:-2]
|
||||
datafile = ' '.join(datafile)
|
||||
datafile = " ".join(datafile)
|
||||
elif datatype in (dir_t, fif_t, dev_t):
|
||||
datafile = ' '.join(datafile)
|
||||
datafile = " ".join(datafile)
|
||||
elif datatype == sym_t:
|
||||
datafile = datafile[:-3]
|
||||
datafile = ' '.join(datafile)
|
||||
datafile = " ".join(datafile)
|
||||
else:
|
||||
myexc = "%s %s. %s." % (
|
||||
datafile,
|
||||
@@ -4675,19 +4685,22 @@ class PortagePlugin(SpmPlugin):
|
||||
warnings.warn(
|
||||
"Empty file path detected, skipping!")
|
||||
continue
|
||||
outcontent.add((datafile, datatype))
|
||||
except:
|
||||
pass
|
||||
|
||||
outcontent = sorted(outcontent)
|
||||
for datafile, datatype in outcontent:
|
||||
pkg_content[datafile] = datatype
|
||||
pkg_content[datafile] = datatype
|
||||
|
||||
else:
|
||||
|
||||
# CONTENTS is not generated when a package is emerged with
|
||||
# portage and the option -B
|
||||
# we have to use the unpacked package file and generate content dict
|
||||
try:
|
||||
# Modern Entropy/Portage correctly use unicode.
|
||||
pkg_dir = const_convert_to_unicode(
|
||||
pkg_dir, enctype=sys.getfilesystemencoding())
|
||||
except UnicodeDecodeError:
|
||||
# Support for very ancient Entropy or Portage packages.
|
||||
pkg_dir = const_convert_to_unicode(
|
||||
pkg_dir, enctype=etpConst['conf_raw_encoding'])
|
||||
|
||||
tmpdir_len = len(pkg_dir)
|
||||
for currentdir, subdirs, files in os.walk(pkg_dir):
|
||||
cur_dir = currentdir[tmpdir_len:]
|
||||
|
||||
@@ -19,6 +19,14 @@ def get_test_generic_package(test_pkg):
|
||||
path = _get_test_generic_package_path(test_pkg)
|
||||
return path
|
||||
|
||||
def get_test_package_ca_certs():
|
||||
test_pkg = "ca-certificates-20180409.3.37.tbz2"
|
||||
return get_test_generic_package(test_pkg)
|
||||
|
||||
def get_test_package_puppet_agent():
|
||||
test_pkg = "puppet-agent-6.0.0.tbz2"
|
||||
return get_test_generic_package(test_pkg)
|
||||
|
||||
def get_test_package():
|
||||
test_pkg = "zlib-1.2.3-r1.tbz2"
|
||||
return get_test_generic_package(test_pkg)
|
||||
|
||||
@@ -423,6 +423,32 @@ class EntropyRepositoryTest(unittest.TestCase):
|
||||
self.assertTrue(isinstance(results, set))
|
||||
self.assertTrue(rc == 1)
|
||||
|
||||
def test_db_handle_unicode_puppet_agent(self):
|
||||
test_pkg = _misc.get_test_package_puppet_agent()
|
||||
data = self.Spm.extract_package_metadata(test_pkg)
|
||||
|
||||
idpackage = self.test_db.addPackage(data)
|
||||
db_data = self.test_db.getPackageData(idpackage)
|
||||
|
||||
_misc.clean_pkg_metadata(db_data)
|
||||
_misc.clean_pkg_metadata(data)
|
||||
self.assertEqual(data, db_data)
|
||||
|
||||
self.test_db.removePackage(idpackage)
|
||||
|
||||
def test_db_handle_unicode_ca_certs(self):
|
||||
test_pkg = _misc.get_test_package_ca_certs()
|
||||
data = self.Spm.extract_package_metadata(test_pkg)
|
||||
|
||||
idpackage = self.test_db.addPackage(data)
|
||||
db_data = self.test_db.getPackageData(idpackage)
|
||||
|
||||
_misc.clean_pkg_metadata(db_data)
|
||||
_misc.clean_pkg_metadata(data)
|
||||
self.assertEqual(data, db_data)
|
||||
|
||||
self.test_db.removePackage(idpackage)
|
||||
|
||||
def test_db_insert_compare_match_utf(self):
|
||||
|
||||
# insert/compare
|
||||
|
||||
BIN
lib/tests/packages/ca-certificates-20180409.3.37.tbz2
Normal file
BIN
lib/tests/packages/ca-certificates-20180409.3.37.tbz2
Normal file
Binary file not shown.
BIN
lib/tests/packages/puppet-agent-6.0.0.tbz2
Normal file
BIN
lib/tests/packages/puppet-agent-6.0.0.tbz2
Normal file
Binary file not shown.
Reference in New Issue
Block a user