diff --git a/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py b/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py index 276bd8f54..b0cf47f92 100644 --- a/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py +++ b/lib/entropy/spm/plugins/interfaces/portage_plugin/__init__.py @@ -1410,7 +1410,11 @@ class PortagePlugin(SpmPlugin): data['datecreation'] = str(os.path.getmtime(package_file)) data['size'] = str(entropy.tools.get_file_size(package_file)) - tmp_dir = const_mkdtemp(prefix="entropy.spm._extract") + # This allows os.* functions on Python2 to use unicode, correctly. + # See the issues with puppet-agent (unit tests in db.py). + tmp_dir = const_convert_to_unicode( + const_mkdtemp(prefix="entropy.spm._extract"), + enctype = sys.getfilesystemencoding()) meta_dir = os.path.join(tmp_dir, "portage") pkg_dir = os.path.join(tmp_dir, "pkg") os.mkdir(meta_dir) @@ -3033,15 +3037,15 @@ class PortagePlugin(SpmPlugin): from portage.dbapi.vartree import write_contents entropy_content_iter = entropy_package_metadata['content'] - sys_root = const_convert_to_rawstring(etpConst['systemroot']) + # Make sure that we use the fs encoding. This works with both + # old and new Entropy packages. + sys_root = const_convert_to_unicode( + etpConst['systemroot'], enctype=sys.getfilesystemencoding()) content_meta = {} try: - for _package_id, _path, _ftype in entropy_content_iter: - - _ftype = const_convert_to_rawstring(_ftype) - path_orig = const_convert_to_rawstring(_path) - path = sys_root + path_orig + for _package_id, path, _ftype in entropy_content_iter: + path = sys_root + path is_sym = os.path.islink(path) if os.path.isfile(path) and not is_sym: @@ -4648,22 +4652,28 @@ class PortagePlugin(SpmPlugin): if os.path.isfile(content_file): with open(content_file, "rb") as f: - content = [const_convert_to_unicode(x) for x in f.readlines()] + for line in f.readlines(): + try: + # Modern Entropy/Portage correctly use unicode. + line = const_convert_to_unicode( + line, enctype=sys.getfilesystemencoding()) + except UnicodeDecodeError: + # Support for very ancient Entropy or Portage packages. + line = const_convert_to_unicode( + line, enctype=etpConst['conf_raw_encoding']) - outcontent = set() - for line in content: - line = line.strip().split() - try: + line = line.strip().split(" ") datatype = line[0] datafile = line[1:] + if datatype == obj_t: datafile = datafile[:-2] - datafile = ' '.join(datafile) + datafile = " ".join(datafile) elif datatype in (dir_t, fif_t, dev_t): - datafile = ' '.join(datafile) + datafile = " ".join(datafile) elif datatype == sym_t: datafile = datafile[:-3] - datafile = ' '.join(datafile) + datafile = " ".join(datafile) else: myexc = "%s %s. %s." % ( datafile, @@ -4675,19 +4685,22 @@ class PortagePlugin(SpmPlugin): warnings.warn( "Empty file path detected, skipping!") continue - outcontent.add((datafile, datatype)) - except: - pass - - outcontent = sorted(outcontent) - for datafile, datatype in outcontent: - pkg_content[datafile] = datatype + pkg_content[datafile] = datatype else: # CONTENTS is not generated when a package is emerged with # portage and the option -B # we have to use the unpacked package file and generate content dict + try: + # Modern Entropy/Portage correctly use unicode. + pkg_dir = const_convert_to_unicode( + pkg_dir, enctype=sys.getfilesystemencoding()) + except UnicodeDecodeError: + # Support for very ancient Entropy or Portage packages. + pkg_dir = const_convert_to_unicode( + pkg_dir, enctype=etpConst['conf_raw_encoding']) + tmpdir_len = len(pkg_dir) for currentdir, subdirs, files in os.walk(pkg_dir): cur_dir = currentdir[tmpdir_len:] diff --git a/lib/tests/_misc.py b/lib/tests/_misc.py index 8d7a97f10..9b910c40e 100644 --- a/lib/tests/_misc.py +++ b/lib/tests/_misc.py @@ -19,6 +19,14 @@ def get_test_generic_package(test_pkg): path = _get_test_generic_package_path(test_pkg) return path +def get_test_package_ca_certs(): + test_pkg = "ca-certificates-20180409.3.37.tbz2" + return get_test_generic_package(test_pkg) + +def get_test_package_puppet_agent(): + test_pkg = "puppet-agent-6.0.0.tbz2" + return get_test_generic_package(test_pkg) + def get_test_package(): test_pkg = "zlib-1.2.3-r1.tbz2" return get_test_generic_package(test_pkg) diff --git a/lib/tests/db.py b/lib/tests/db.py index fec0a7f36..56fee3528 100644 --- a/lib/tests/db.py +++ b/lib/tests/db.py @@ -423,6 +423,32 @@ class EntropyRepositoryTest(unittest.TestCase): self.assertTrue(isinstance(results, set)) self.assertTrue(rc == 1) + def test_db_handle_unicode_puppet_agent(self): + test_pkg = _misc.get_test_package_puppet_agent() + data = self.Spm.extract_package_metadata(test_pkg) + + idpackage = self.test_db.addPackage(data) + db_data = self.test_db.getPackageData(idpackage) + + _misc.clean_pkg_metadata(db_data) + _misc.clean_pkg_metadata(data) + self.assertEqual(data, db_data) + + self.test_db.removePackage(idpackage) + + def test_db_handle_unicode_ca_certs(self): + test_pkg = _misc.get_test_package_ca_certs() + data = self.Spm.extract_package_metadata(test_pkg) + + idpackage = self.test_db.addPackage(data) + db_data = self.test_db.getPackageData(idpackage) + + _misc.clean_pkg_metadata(db_data) + _misc.clean_pkg_metadata(data) + self.assertEqual(data, db_data) + + self.test_db.removePackage(idpackage) + def test_db_insert_compare_match_utf(self): # insert/compare diff --git a/lib/tests/packages/ca-certificates-20180409.3.37.tbz2 b/lib/tests/packages/ca-certificates-20180409.3.37.tbz2 new file mode 100644 index 000000000..ca17e0657 Binary files /dev/null and b/lib/tests/packages/ca-certificates-20180409.3.37.tbz2 differ diff --git a/lib/tests/packages/puppet-agent-6.0.0.tbz2 b/lib/tests/packages/puppet-agent-6.0.0.tbz2 new file mode 100644 index 000000000..ce8c41185 Binary files /dev/null and b/lib/tests/packages/puppet-agent-6.0.0.tbz2 differ