[PATCH V2] contrib: add a tool to detect cyclic references
Jun Wu
quark at fb.com
Sun May 28 05:02:54 UTC 2017
# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1495946922 25200
# Sat May 27 21:48:42 2017 -0700
# Node ID 869505c8d99d438f1c7d0775645d4ac28d70c15e
# Parent ad37c569ec8121a99e4cd9da52365b114b82e744
# Available At https://bitbucket.org/quark-zju/hg-draft
# hg pull https://bitbucket.org/quark-zju/hg-draft -r 869505c8d99d
contrib: add a tool to detect cyclic references
This patch adds a simple tool to find out all cyclic references for CPython.
The added test shows some common cases where cycles are created.
diff --git a/contrib/leakdetect.py b/contrib/leakdetect.py
new file mode 100644
--- /dev/null
+++ b/contrib/leakdetect.py
@@ -0,0 +1,147 @@
+# leakdetect.py - CPython object cyclic references analysis
+#
+# Copyright 2017 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import contextlib
+import gc
+import os
+import platform
+import sys
+
+def stronglyconnectedcomponents(vertices, edges):
+ """yield strongly connected components in a directed graph"""
+ ignored = set() # vertices outputted already
+ stack = [] # vertice DFS stack
+ index = {} # stack[index[n]] == n; later visit: bigger index
+ lowlink = {} # see Tarjan's strongly connected components algorithm
+ execstack = [] # enumerate execution stack to bypass Python stack limit
+
+ def visit(v):
+ index[v] = lowlink[v] = len(stack)
+ stack.append(v)
+ execstack.append((checkroot, (v,)))
+ for w in edges.get(v, []):
+ execstack.append((updatelowlink, (v, w)))
+ if w not in index:
+ execstack.append((visit, (w,)))
+
+ def updatelowlink(v, w):
+ lowlink[v] = min(lowlink[v], lowlink[w])
+
+ def checkroot(v):
+ i = index[v]
+ if lowlink[v] == i:
+ component = stack[i:]
+ del stack[i:]
+ if component[0] not in ignored:
+ ignored.update(component)
+ return component
+
+ for v in vertices:
+ if v not in ignored:
+ index.clear()
+ lowlink.clear()
+ execstack.append((visit, (v,)))
+ while execstack:
+ func, args = execstack.pop()
+ result = func(*args)
+ if result is not None:
+ yield result
+
+def objectreferencegraph(objects):
+ """return (vertices, edges), referenced relationship among objects
+
+ vertices contain object IDs.
+ """
+ idset = set(id(o) for o in objects)
+ edges = {}
+ for o in objects:
+ edges[id(o)] = [id(r) for r in gc.get_referents(o) if id(r) in idset]
+ return idset, edges
+
+def describe(obj):
+ """short and easy to understand text to help identify an object
+
+ similar to repr(), but avoids huge outputs in all cases.
+ """
+ if isinstance(obj, tuple):
+ return '<tuple (%s)>' % ', '.join(describe(v) for v in obj)
+ elif isinstance(obj, type):
+ return '<type %s>' % obj.__name__
+ elif isinstance(obj, type(describe)): # function
+ code = obj.func_code
+ path = os.path.basename(code.co_filename)
+ return ('<function %s@%s:%s>' % (obj.func_name, path,
+ code.co_firstlineno))
+ elif isinstance(obj, type(os.environ.get)): # instancemethod
+ return repr(obj)
+ else:
+ if type(obj) in (list, set, dict):
+ tryrepr = repr(obj)
+ if len(tryrepr) < 70:
+ return tryrepr
+ cellcontent = getattr(obj, 'cell_contents', None)
+ if cellcontent is not None:
+ return '<cell %s>' % describe(cellcontent)
+ typename = type(obj).__name__
+ if typename == 'getset_descriptor':
+ return repr(obj)
+ return '<%s>' % typename
+
+def analysegarbage(garbage, out):
+ """find cyclic references among garbage objects and report them"""
+ out.write('--- leak detection report ---\n'
+ '%d objects not freed by reference counting\n' % len(garbage))
+ vertices, edges = objectreferencegraph(garbage)
+ groups = []
+ for component in stronglyconnectedcomponents(vertices, edges):
+ if len(component) == 1:
+ v = component[0]
+ if v in edges[v]: # self-cycle
+ groups.append((v,))
+ else:
+ groups.append(component)
+
+ objmap = {id(o): o for o in garbage}
+ outputs = []
+ for g in groups:
+ output = 'cycle of %d objects:\n' % len(g)
+ output += ''.join(sorted(' %s\n' % describe(objmap[i]) for i in g))
+ outputs.append(output)
+ out.write(''.join(sorted(outputs)))
+
+if platform.python_implementation() == 'CPython':
+ # only verified to work in CPython
+ @contextlib.contextmanager
+ def leakdetect(out=sys.stderr):
+ # collect garbage so previous garbage won't affect us
+ gc.set_debug(gc.get_debug() & ~gc.DEBUG_SAVEALL)
+ gc.collect()
+ # make gc.collect put objects to gc.garbage instead of releasing them
+ gc.set_debug(gc.get_debug() | gc.DEBUG_SAVEALL)
+ gc.disable()
+ try:
+ yield
+ finally:
+ # populate gc.garbage
+ gc.collect()
+ garbage = gc.garbage
+ # restore the default behavior
+ gc.set_debug(gc.get_debug() & ~gc.DEBUG_SAVEALL)
+ if garbage:
+ analysegarbage(garbage, out)
+ del gc.garbage[:]
+
+ disabled = False
+else:
+ # fallback to a no-op gracefully
+ @contextlib.contextmanager
+ def leakdetect(out=sys.stderr):
+ yield
+
+ disabled = True
diff --git a/tests/test-leakdetect.py b/tests/test-leakdetect.py
new file mode 100644
--- /dev/null
+++ b/tests/test-leakdetect.py
@@ -0,0 +1,50 @@
+from __future__ import absolute_import
+
+import inspect
+import os
+import sys
+
+dirname = os.path.dirname
+sys.path.insert(0, os.path.join(dirname(dirname(__file__)), 'contrib'))
+
+import leakdetect
+
+if leakdetect.disabled:
+ sys.stdout.write('skipped: missing feature: cpython\n')
+ sys.exit(80)
+
+def leaklistdict():
+ a = []
+ b = {}
+ b['a'] = a
+ a.append(b)
+ a.append([[[[['65537']]]]])
+ c = [('long text' * 100,)]
+ c.append(c)
+
+def leakframe():
+ frame = inspect.currentframe()
+ a = []
+ a.append(a)
+ b = [[['some text']]]
+
+def leakfunc():
+ def func(x):
+ return func(x + 1)
+ f = lambda x: g(x)
+ g = lambda x: f(x)
+
+def leakclass():
+ # http://bugs.python.org/msg60985
+ class A(object):
+ pass
+ B = type('B', (object,), {})
+ a = A()
+ b = B()
+ class C(a.__class__):
+ pass
+ a.__class__ = C
+
+for leak in [leaklistdict, leakframe, leakfunc, leakclass]:
+ with leakdetect.leakdetect():
+ leak()
diff --git a/tests/test-leakdetect.py.out b/tests/test-leakdetect.py.out
new file mode 100644
--- /dev/null
+++ b/tests/test-leakdetect.py.out
@@ -0,0 +1,43 @@
+--- leak detection report ---
+9 objects not freed by reference counting
+cycle of 1 objects:
+ <list>
+cycle of 2 objects:
+ [{'a': [...]}, [[[[['65537']]]]]]
+ {'a': [{...}, [[[[['65537']]]]]]}
+--- leak detection report ---
+5 objects not freed by reference counting
+cycle of 1 objects:
+ <frame>
+cycle of 1 objects:
+ [[...]]
+--- leak detection report ---
+9 objects not freed by reference counting
+cycle of 3 objects:
+ <cell <function func at test-leakdetect.py:32>>
+ <function func at test-leakdetect.py:32>
+ <tuple (<cell <function func at test-leakdetect.py:32>>)>
+cycle of 6 objects:
+ <cell <function <lambda>@test-leakdetect.py:34>>
+ <cell <function <lambda>@test-leakdetect.py:35>>
+ <function <lambda>@test-leakdetect.py:34>
+ <function <lambda>@test-leakdetect.py:35>
+ <tuple (<cell <function <lambda>@test-leakdetect.py:34>>)>
+ <tuple (<cell <function <lambda>@test-leakdetect.py:35>>)>
+--- leak detection report ---
+15 objects not freed by reference counting
+cycle of 2 objects:
+ <tuple (<type C>, <type A>, <type object>)>
+ <type C>
+cycle of 5 objects:
+ <attribute '__dict__' of 'A' objects>
+ <attribute '__weakref__' of 'A' objects>
+ <dict>
+ <tuple (<type A>, <type object>)>
+ <type A>
+cycle of 5 objects:
+ <attribute '__dict__' of 'B' objects>
+ <attribute '__weakref__' of 'B' objects>
+ <dict>
+ <tuple (<type B>, <type object>)>
+ <type B>
More information about the Mercurial-devel
mailing list