Import Upstream version 2.7.18

2025-08-15 16:28:06 +02:00
commit ba1f69ab39
4521 changed files with 1778434 additions and 0 deletions
--- a/Lib/test/crashers/README
+++ b/Lib/test/crashers/README
@@ -0,0 +1,16 @@
+This directory only contains tests for outstanding bugs that cause the
+interpreter to segfault.  Ideally this directory should always be empty, but
+sometimes it may not be easy to fix the underlying cause and the bug is deemed
+too obscure to invest the effort.
+
+Each test should fail when run from the command line:
+
+	./python Lib/test/crashers/weakref_in_del.py
+
+Put as much info into a docstring or comments to help determine the cause of the
+failure, as well as a bugs.python.org issue number if it exists.  Particularly
+note if the cause is system or environment dependent and what the variables are.
+
+Once the crash is fixed, the test case should be moved into an appropriate test
+(even if it was originally from the test suite).  This ensures the regression
+doesn't happen again.  And if it does, it should be easier to track down.
--- a/Lib/test/crashers/bogus_code_obj.py
+++ b/Lib/test/crashers/bogus_code_obj.py
@@ -0,0 +1,19 @@
+"""
+Broken bytecode objects can easily crash the interpreter.
+
+This is not going to be fixed.  It is generally agreed that there is no
+point in writing a bytecode verifier and putting it in CPython just for
+this.  Moreover, a verifier is bound to accept only a subset of all safe
+bytecodes, so it could lead to unnecessary breakage.
+
+For security purposes, "restricted" interpreters are not going to let
+the user build or load random bytecodes anyway.  Otherwise, this is a
+"won't fix" case.
+
+"""
+
+import types
+
+co = types.CodeType(0, 0, 0, 0, '\x04\x71\x00\x00', (),
+                    (), (), '', '', 1, '')
+exec co
--- a/Lib/test/crashers/borrowed_ref_1.py
+++ b/Lib/test/crashers/borrowed_ref_1.py
@@ -0,0 +1,29 @@
+"""
+_PyType_Lookup() returns a borrowed reference.
+This attacks the call in dictobject.c.
+"""
+
+class A(object):
+    pass
+
+class B(object):
+    def __del__(self):
+        print 'hi'
+        del D.__missing__
+
+class D(dict):
+    class __missing__:
+        def __init__(self, *args):
+            pass
+
+
+d = D()
+a = A()
+a.cycle = a
+a.other = B()
+del a
+
+prev = None
+while 1:
+    d[5]
+    prev = (prev,)
--- a/Lib/test/crashers/borrowed_ref_2.py
+++ b/Lib/test/crashers/borrowed_ref_2.py
@@ -0,0 +1,38 @@
+"""
+_PyType_Lookup() returns a borrowed reference.
+This attacks PyObject_GenericSetAttr().
+
+NB. on my machine this crashes in 2.5 debug but not release.
+"""
+
+class A(object):
+    pass
+
+class B(object):
+    def __del__(self):
+        print "hi"
+        del C.d
+
+class D(object):
+    def __set__(self, obj, value):
+        self.hello = 42
+
+class C(object):
+    d = D()
+
+    def g():
+        pass
+
+
+c = C()
+a = A()
+a.cycle = a
+a.other = B()
+
+lst = [None] * 1000000
+i = 0
+del a
+while 1:
+    c.d = 42         # segfaults in PyMethod_New(im_func=D.__set__, im_self=d)
+    lst[i] = c.g     # consume the free list of instancemethod objects
+    i += 1
--- a/Lib/test/crashers/buffer_mutate.py
+++ b/Lib/test/crashers/buffer_mutate.py
@@ -0,0 +1,30 @@
+#
+# The various methods of bufferobject.c (here buffer_subscript()) call
+# get_buf() before calling potentially more Python code (here via
+# PySlice_GetIndicesEx()).  But get_buf() already returned a void*
+# pointer.  This void* pointer can become invalid if the object
+# underlying the buffer is mutated (here a bytearray object).
+#
+# As usual, please keep in mind that the three "here" in the sentence
+# above are only examples.  Each can be changed easily and lead to
+# another crasher.
+#
+# This crashes for me on Linux 32-bits with CPython 2.6 and 2.7
+# with a segmentation fault.
+#
+
+
+class PseudoIndex(object):
+    def __index__(self):
+        for c in "foobar"*n:
+            a.append(c)
+        return n * 4
+
+
+for n in range(1, 100000, 100):
+    a = bytearray("test"*n)
+    buf = buffer(a)
+
+    s = buf[:PseudoIndex():1]
+    #print repr(s)
+    #assert s == "test"*n
--- a/Lib/test/crashers/compiler_recursion.py
+++ b/Lib/test/crashers/compiler_recursion.py
@@ -0,0 +1,5 @@
+"""
+The compiler (>= 2.5) recurses happily.
+"""
+
+compile('()'*9**5, '?', 'exec')
--- a/Lib/test/crashers/decref_before_assignment.py
+++ b/Lib/test/crashers/decref_before_assignment.py
@@ -0,0 +1,44 @@
+"""
+General example for an attack against code like this:
+
+    Py_DECREF(obj->attr); obj->attr = ...;
+
+here in Module/_json.c:scanner_init().
+
+Explanation: if the first Py_DECREF() calls either a __del__ or a
+weakref callback, it will run while the 'obj' appears to have in
+'obj->attr' still the old reference to the object, but not holding
+the reference count any more.
+
+Status: progress has been made replacing these cases, but there is an
+infinite number of such cases.
+"""
+
+import _json, weakref
+
+class Ctx1(object):
+    encoding = "utf8"
+    strict = None
+    object_hook = None
+    object_pairs_hook = None
+    parse_float = None
+    parse_int = None
+    parse_constant = None
+
+class Foo(unicode):
+    pass
+
+def delete_me(*args):
+    print scanner.encoding.__dict__
+
+class Ctx2(Ctx1):
+    @property
+    def encoding(self):
+        global wref
+        f = Foo("utf8")
+        f.abc = globals()
+        wref = weakref.ref(f, delete_me)
+        return f
+
+scanner = _json.make_scanner(Ctx1())
+scanner.__init__(Ctx2())
--- a/Lib/test/crashers/gc_has_finalizer.py
+++ b/Lib/test/crashers/gc_has_finalizer.py
@@ -0,0 +1,36 @@
+"""
+The gc module can still invoke arbitrary Python code and crash.
+This is an attack against _PyInstance_Lookup(), which is documented
+as follows:
+
+    The point of this routine is that it never calls arbitrary Python
+    code, so is always "safe":  all it does is dict lookups.
+
+But of course dict lookups can call arbitrary Python code.
+The following code causes mutation of the object graph during
+the call to has_finalizer() in gcmodule.c, and that might
+segfault.
+"""
+
+import gc
+
+
+class A:
+    def __hash__(self):
+        return hash("__del__")
+    def __eq__(self, other):
+        del self.other
+        return False
+
+a = A()
+b = A()
+
+a.__dict__[b] = 'A'
+
+a.other = b
+b.other = a
+
+gc.collect()
+del a, b
+
+gc.collect()
--- a/Lib/test/crashers/gc_inspection.py
+++ b/Lib/test/crashers/gc_inspection.py
@@ -0,0 +1,32 @@
+"""
+gc.get_referrers() can be used to see objects before they are fully built.
+
+Note that this is only an example.  There are many ways to crash Python
+by using gc.get_referrers(), as well as many extension modules (even
+when they are using perfectly documented patterns to build objects).
+
+Identifying and removing all places that expose to the GC a
+partially-built object is a long-term project.  A patch was proposed on
+SF specifically for this example but I consider fixing just this single
+example a bit pointless (#1517042).
+
+A fix would include a whole-scale code review, possibly with an API
+change to decouple object creation and GC registration, and according
+fixes to the documentation for extension module writers.  It's unlikely
+to happen, though.  So this is currently classified as
+"gc.get_referrers() is dangerous, use only for debugging".
+"""
+
+import gc
+
+
+def g():
+    marker = object()
+    yield marker
+    # now the marker is in the tuple being constructed
+    [tup] = [x for x in gc.get_referrers(marker) if type(x) is tuple]
+    print tup
+    print tup[1]
+
+
+tuple(g())
--- a/Lib/test/crashers/infinite_loop_re.py
+++ b/Lib/test/crashers/infinite_loop_re.py
@@ -0,0 +1,16 @@
+
+# This was taken from http://python.org/sf/1541697
+# It's not technically a crasher.  It may not even truly be infinite,
+# however, I haven't waited a long time to see the result.  It takes
+# 100% of CPU while running this and should be fixed.
+
+import re
+starttag = re.compile(r'<[a-zA-Z][-_.:a-zA-Z0-9]*\s*('
+        r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
+        r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]'
+        r'[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*(?=[\s>/<])))?'
+    r')*\s*/?\s*(?=[<>])')
+
+if __name__ == '__main__':
+    foo = '<table cellspacing="0" cellpadding="0" style="border-collapse'
+    starttag.match(foo)
--- a/Lib/test/crashers/loosing_mro_ref.py
+++ b/Lib/test/crashers/loosing_mro_ref.py
@@ -0,0 +1,35 @@
+"""
+There is a way to put keys of any type in a type's dictionary.
+I think this allows various kinds of crashes, but so far I have only
+found a convoluted attack of _PyType_Lookup(), which uses the mro of the
+type without holding a strong reference to it.  Probably works with
+super.__getattribute__() too, which uses the same kind of code.
+"""
+
+class MyKey(object):
+    def __hash__(self):
+        return hash('mykey')
+
+    def __cmp__(self, other):
+        # the following line decrefs the previous X.__mro__
+        X.__bases__ = (Base2,)
+        # trash all tuples of length 3, to make sure that the items of
+        # the previous X.__mro__ are really garbage
+        z = []
+        for i in range(1000):
+            z.append((i, None, None))
+        return -1
+
+
+class Base(object):
+    mykey = 'from Base'
+
+class Base2(object):
+    mykey = 'from Base2'
+
+# you can't add a non-string key to X.__dict__, but it can be
+# there from the beginning :-)
+X = type('X', (Base,), {MyKey(): 5})
+
+print X.mykey
+# I get a segfault, or a slightly wrong assertion error in a debug build.
--- a/Lib/test/crashers/mutation_inside_cyclegc.py
+++ b/Lib/test/crashers/mutation_inside_cyclegc.py
@@ -0,0 +1,31 @@
+
+# The cycle GC collector can be executed when any GC-tracked object is
+# allocated, e.g. during a call to PyList_New(), PyDict_New(), ...
+# Moreover, it can invoke arbitrary Python code via a weakref callback.
+# This means that there are many places in the source where an arbitrary
+# mutation could unexpectedly occur.
+
+# The example below shows list_slice() not expecting the call to
+# PyList_New to mutate the input list.  (Of course there are many
+# more examples like this one.)
+
+
+import weakref
+
+class A(object):
+    pass
+
+def callback(x):
+    del lst[:]
+
+
+keepalive = []
+
+for i in range(100):
+    lst = [str(i)]
+    a = A()
+    a.cycle = a
+    keepalive.append(weakref.ref(a, callback))
+    del a
+    while lst:
+        keepalive.append(lst[:])
--- a/Lib/test/crashers/nasty_eq_vs_dict.py
+++ b/Lib/test/crashers/nasty_eq_vs_dict.py
@@ -0,0 +1,47 @@
+# from http://mail.python.org/pipermail/python-dev/2001-June/015239.html
+
+# if you keep changing a dictionary while looking up a key, you can
+# provoke an infinite recursion in C
+
+# At the time neither Tim nor Michael could be bothered to think of a
+# way to fix it.
+
+class Yuck:
+    def __init__(self):
+        self.i = 0
+
+    def make_dangerous(self):
+        self.i = 1
+
+    def __hash__(self):
+        # direct to slot 4 in table of size 8; slot 12 when size 16
+        return 4 + 8
+
+    def __eq__(self, other):
+        if self.i == 0:
+            # leave dict alone
+            pass
+        elif self.i == 1:
+            # fiddle to 16 slots
+            self.__fill_dict(6)
+            self.i = 2
+        else:
+            # fiddle to 8 slots
+            self.__fill_dict(4)
+            self.i = 1
+
+        return 1
+
+    def __fill_dict(self, n):
+        self.i = 0
+        dict.clear()
+        for i in range(n):
+            dict[i] = i
+        dict[self] = "OK!"
+
+y = Yuck()
+dict = {y: "OK!"}
+
+z = Yuck()
+y.make_dangerous()
+print dict[z]
--- a/Lib/test/crashers/recursion_limit_too_high.py
+++ b/Lib/test/crashers/recursion_limit_too_high.py
@@ -0,0 +1,16 @@
+# The following example may crash or not depending on the platform.
+# E.g. on 32-bit Intel Linux in a "standard" configuration it seems to
+# crash on Python 2.5 (but not 2.4 nor 2.3).  On Windows the import
+# eventually fails to find the module, possibly because we run out of
+# file handles.
+
+# The point of this example is to show that sys.setrecursionlimit() is a
+# hack, and not a robust solution.  This example simply exercises a path
+# where it takes many C-level recursions, consuming a lot of stack
+# space, for each Python-level recursion.  So 1000 times this amount of
+# stack space may be too much for standard platforms already.
+
+import sys
+if 'recursion_limit_too_high' in sys.modules:
+    del sys.modules['recursion_limit_too_high']
+import recursion_limit_too_high
--- a/Lib/test/crashers/recursive_call.py
+++ b/Lib/test/crashers/recursive_call.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+
+# No bug report AFAIK, mail on python-dev on 2006-01-10
+
+# This is a "won't fix" case.  It is known that setting a high enough
+# recursion limit crashes by overflowing the stack.  Unless this is
+# redesigned somehow, it won't go away.
+
+import sys
+
+sys.setrecursionlimit(1 << 30)
+f = lambda f:f(f)
+
+if __name__ == '__main__':
+    f(f)
--- a/Lib/test/crashers/warnings_del_crasher.py
+++ b/Lib/test/crashers/warnings_del_crasher.py
@@ -0,0 +1,29 @@
+"""
+Expose a race in the _warnings module, which is the C backend for the
+warnings module. The "_warnings" module tries to access attributes of the
+"warnings" module (because of the API it has to support), but doing so
+during interpreter shutdown is problematic. Specifically, the call to
+PyImport_GetModuleDict() in Python/_warnings.c:get_warnings_attr will
+abort() if the modules dict has already been cleaned up.
+
+This crasher is timing-dependent, and more threads (NUM_THREADS) may be
+necessary to expose it reliably on different systems.
+"""
+
+import threading
+import warnings
+
+NUM_THREADS = 10
+
+class WarnOnDel(object):
+    def __del__(self):
+        warnings.warn("oh no something went wrong", UserWarning)
+
+def do_work():
+    while True:
+        w = WarnOnDel()
+
+for i in range(NUM_THREADS):
+    t = threading.Thread(target=do_work)
+    t.setDaemon(1)
+    t.start()