From fcbe0cb04d35189401c0c880ebfb4311e952d776 Mon Sep 17 00:00:00 2001 From: Adam Goldschmidt Date: Mon, 15 Feb 2021 00:41:57 +0200 Subject: [PATCH] bpo-42967: only use '&' as a query string separator (#24297) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl(). urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator. Co-authored-by: Éric Araujo Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Éric Araujo --- Doc/library/cgi.rst | 9 ++- Doc/library/urllib.parse.rst | 16 ++++- Doc/whatsnew/3.10.rst | 13 ++++ Doc/whatsnew/3.6.rst | 13 ++++ Doc/whatsnew/3.7.rst | 13 ++++ Doc/whatsnew/3.8.rst | 13 ++++ Doc/whatsnew/3.9.rst | 15 +++- Lib/cgi.py | 23 ++++--- Lib/test/test_cgi.py | 29 ++++++-- Lib/test/test_urlparse.py | 68 +++++++++++++------ Lib/urllib/parse.py | 20 ++++-- .../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 + 12 files changed, 186 insertions(+), 47 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst Backport: * Drop Doc/whatsnew and Misc/NEWS.d * urllib.parse.urlencode -> urllib.urlencode * urllib.parse -> urlparse * Significant refactoring due to differences in arguments * Avoid using TestCase.subTest diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst index 4048592e73..05d9cdf424 100644 --- a/Doc/library/cgi.rst +++ b/Doc/library/cgi.rst @@ -277,24 +277,25 @@ These are useful if you want more control, or if you want to employ some of the algorithms implemented in this module in other circumstances. -.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]]) +.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing[, separator="&"]]]]) Parse a query in the environment or from a file (the file defaults to - ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values* and *strict_parsing* parameters are + ``sys.stdin`` and environment defaults to ``os.environ``). The + *keep_blank_values*, *strict_parsing* and *separator* parameters are passed to :func:`urlparse.parse_qs` unchanged. -.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields[, separator]]]]) This function is deprecated in this module. Use :func:`urlparse.parse_qs` instead. It is maintained here only for backward compatibility. -.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields[, separator]]]]) This function is deprecated in this module. Use :func:`urlparse.parse_qsl` instead. It is maintained here only for backward compatibility. -.. function:: parse_multipart(fp, pdict) +.. function:: parse_multipart(fp, pdict, separator="&") Parse input of type :mimetype:`multipart/form-data` (for file uploads). Arguments are *fp* for the input file and *pdict* for a dictionary containing @@ -303,6 +303,9 @@ algorithms implemented in this module in other circumstances. Note that this does not parse nested multipart parts --- use :class:`FieldStorage` for that. + .. versionchanged:: 2.7 security update + Added the *separator* parameter. + .. function:: parse_header(string) diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index f9c8ba7398..1a79078239 100644 --- a/Doc/library/urlparse.rst +++ b/Doc/library/urlparse.rst @@ -165,7 +165,7 @@ or on combining URL components into a URL string. Added IPv6 URL parsing capabilities. -.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields[, separator='&']]]]) Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a @@ -190,6 +190,8 @@ or on combining URL components into a URL string. read. If set, then throws a :exc:`ValueError` if there are more than *max_num_fields* fields read. + The optional argument *separator* is the symbol to use for separating the query arguments. It defaults to `&`. + Use the :func:`urllib.urlencode` function to convert such dictionaries into query strings. @@ -201,7 +203,12 @@ or on combining URL components into a URL string. .. versionchanged:: 2.7.16 Added *max_num_fields* parameter. -.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) + .. versionchanged:: 2.7 security update + Added *separator* parameter with the default value of `&`. Python versions earlier than Python 3.10 allowed using both ";" and "&" as + query parameter separator. This has been changed to allow only a single separator key, with "&" as the default separator. + + +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields[, separator='&']]]]) Parse a query string given as a string argument (data of type :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of @@ -226,6 +232,8 @@ or on combining URL components into a URL string. read. If set, then throws a :exc:`ValueError` if there are more than *max_num_fields* fields read. + The optional argument *separator* is the symbol to use for separating the query arguments. It defaults to `&`. + Use the :func:`urllib.urlencode` function to convert such lists of pairs into query strings. @@ -235,6 +243,11 @@ or on combining URL components into a URL string. .. versionchanged:: 2.7.16 Added *max_num_fields* parameter. + .. versionchanged:: 2.7 security update + Added *separator* parameter with the default value of `&`. Python versions earlier than Python 3.10 allowed using both ";" and "&" as + query parameter separator. This has been changed to allow only a single separator key, with "&" as the default separator. + + .. function:: urlunparse(parts) Construct a URL from a tuple as returned by ``urlparse()``. The *parts* argument diff --git a/Lib/cgi.py b/Lib/cgi.py index 6018c36086..6c72507c20 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -115,7 +115,8 @@ def closelog(): # 0 ==> unlimited input maxlen = 0 -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): +def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0, + separator='&'): """Parse a query in the environment or from a file (default stdin) Arguments, all optional: @@ -134,6 +135,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ if fp is None: fp = sys.stdin @@ -154,7 +158,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): if environ['REQUEST_METHOD'] == 'POST': ctype, pdict = parse_header(environ['CONTENT_TYPE']) if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict) + return parse_multipart(fp, pdict, separator=separator) elif ctype == 'application/x-www-form-urlencoded': clength = int(environ['CONTENT_LENGTH']) if maxlen and clength > maxlen: @@ -178,27 +182,30 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): else: qs = "" environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, + separator=separator) # parse query string function called from urlparse, # this is done in order to maintain backward compatibility. -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): +def parse_qs(qs, keep_blank_values=0, strict_parsing=0, separator='&'): """Parse a query given as a string argument.""" warn("cgi.parse_qs is deprecated, use urlparse.parse_qs instead", PendingDeprecationWarning, 2) - return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, + separator=separator) -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, + separator='&'): """Parse a query given as a string argument.""" warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead", PendingDeprecationWarning, 2) return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing, - max_num_fields) + max_num_fields, separator=separator) -def parse_multipart(fp, pdict): +def parse_multipart(fp, pdict, separator='&'): """Parse multipart input. Arguments: @@ -315,7 +319,7 @@ class FieldStorage: def __init__(self, fp=None, headers=None, outerboundary="", environ=os.environ, keep_blank_values=0, strict_parsing=0, - max_num_fields=None): + max_num_fields=None, separator='&'): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -363,6 +367,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'', self.keep_blank_values = keep_blank_values self.strict_parsing = strict_parsing self.max_num_fields = max_num_fields + self.separator = separator if 'REQUEST_METHOD' in environ: method = environ['REQUEST_METHOD'].upper() self.qs_on_post = None @@ -589,7 +594,8 @@ def read_urlencoded(self): if self.qs_on_post: qs += '&' + self.qs_on_post query = urlparse.parse_qsl(qs, self.keep_blank_values, - self.strict_parsing, self.max_num_fields) + self.strict_parsing, self.max_num_fields, + separator=self.separator) self.list = [MiniFieldStorage(key, value) for key, value in query] self.skip_lines() @@ -605,7 +610,8 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): query = urlparse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing, - self.max_num_fields) + self.max_num_fields, + separator=self.separator) self.list.extend(MiniFieldStorage(key, value) for key, value in query) FieldStorageClass = None @@ -649,7 +654,7 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): klass = self.FieldStorageClass or self.__class__ part = klass(self.fp, {}, ib, environ, keep_blank_values, strict_parsing, - max_num_fields) + max_num_fields, separator=self.separator) # Throw first part away while not part.done: diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index 6b29759da4..239d97589c 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -53,12 +53,9 @@ def do_test(buf, method): ("", ValueError("bad query field: ''")), ("&", ValueError("bad query field: ''")), ("&&", ValueError("bad query field: ''")), - (";", ValueError("bad query field: ''")), - (";&;", ValueError("bad query field: ''")), # Should the next few really be valid? ("=", {}), ("=&=", {}), - ("=;=", {}), # This rest seem to make sense ("=a", {'': ['a']}), ("&=a", ValueError("bad query field: ''")), @@ -73,8 +70,6 @@ def do_test(buf, method): ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), - ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 'cuyer': ['r'], @@ -201,6 +196,30 @@ def test_strict(self): self.assertEqual(expect[k], v) self.assertItemsEqual(expect.values(), d.values()) + def test_separator(self): + parse_semicolon = [ + ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}), + ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), + (";", ValueError("bad query field: ''")), + (";;", ValueError("bad query field: ''")), + ("=;a", ValueError("bad query field: 'a'")), + (";b=a", ValueError("bad query field: ''")), + ("b;=a", ValueError("bad query field: 'b'")), + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), + ("a=a+b;a=b+a", {'a': ['a b', 'b a']}), + ] + for orig, expect in parse_semicolon: + env = {'QUERY_STRING': orig} + fs = cgi.FieldStorage(separator=';', environ=env) + if isinstance(expect, dict): + for key in expect.keys(): + expect_val = expect[key] + self.assertIn(key, fs) + if len(expect_val) > 1: + self.assertEqual(fs.getvalue(key), expect_val) + else: + self.assertEqual(fs.getvalue(key), expect_val[0]) + def test_log(self): cgi.log("Testing") diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 762500789f..3b1c360625 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -32,16 +32,10 @@ ("&a=b", [('a', 'b')]), ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ("a=1&a=2", [('a', '1'), ('a', '2')]), - (";", []), - (";;", []), - (";a=b", [('a', 'b')]), - ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), - ("a=1;a=2", [('a', '1'), ('a', '2')]), - (b";", []), - (b";;", []), - (b";a=b", [(b'a', b'b')]), - (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), - (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), + (";a=b", [(';a', 'b')]), + ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), + (b";a=b", [(b';a', b'b')]), + (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), ] parse_qs_test_cases = [ @@ -68,16 +62,10 @@ (b"&a=b", {b'a': [b'b']}), (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1&a=2", {b'a': [b'1', b'2']}), - (";", {}), - (";;", {}), - (";a=b", {'a': ['b']}), - ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), - ("a=1;a=2", {'a': ['1', '2']}), - (b";", {}), - (b";;", {}), - (b";a=b", {b'a': [b'b']}), - (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), - (b"a=1;a=2", {b'a': [b'1', b'2']}), + (";a=b", {';a': ['b']}), + ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), + (b";a=b", {b';a': [b'b']}), + (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), ] class UrlParseTestCase(unittest.TestCase): @@ -886,6 +874,42 @@ def test_parse_qsl_encoding(self): self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + def test_parse_qs_separator(self): + parse_qs_semicolon_cases = [ + (";", {}), + (";;", {}), + (";a=b", {'a': ['b']}), + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), + ("a=1;a=2", {'a': ['1', '2']}), + (b";", {}), + (b";;", {}), + (b";a=b", {b'a': [b'b']}), + (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), + (b"a=1;a=2", {b'a': [b'1', b'2']}), + ] + for orig, expect in parse_qs_semicolon_cases: + result = urlparse.parse_qs(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + + + def test_parse_qsl_separator(self): + parse_qsl_semicolon_cases = [ + (";", []), + (";;", []), + (";a=b", [('a', 'b')]), + ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), + ("a=1;a=2", [('a', '1'), ('a', '2')]), + (b";", []), + (b";;", []), + (b";a=b", [(b'a', b'b')]), + (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), + (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), + ] + for orig, expect in parse_qsl_semicolon_cases: + result = urlparse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect, "Error parsing %r" % orig) + + def test_main(): test_support.run_unittest(UrlParseTestCase) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index ea897c3032..5bd067895b 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -662,7 +662,8 @@ def unquote(string, encoding='utf-8', errors='replace'): append(item) return ''.join(res) -def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): +def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, + separator='&'): """Parse a query given as a string argument. Arguments: @@ -686,10 +686,13 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + + separator: str. The symbol to use for separating the query arguments. + Defaults to &. """ dict = {} for name, value in parse_qsl(qs, keep_blank_values, strict_parsing, - max_num_fields): + max_num_fields, separator=separator): if name in dict: dict[name].append(value) else: @@ -701,7 +704,8 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, dict[name] = [value] return dict -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, + separator='&'): """Parse a query given as a string argument. Arguments: @@ -724,17 +727,23 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ + if not separator or not isinstance(separator, str): + raise ValueError("Separator must be of type str.") + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - num_fields = 1 + qs.count('&') + qs.count(';') + num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: -- 2.40.1