[PATCH V2] contrib: add a tool to detect cyclic references
Gregory Szorc
gregory.szorc at gmail.com
Sun May 28 14:45:04 EDT 2017
On Sat, May 27, 2017 at 10:02 PM, Jun Wu <quark at fb.com> wrote:
> # HG changeset patch
> # User Jun Wu <quark at fb.com>
> # Date 1495946922 25200
> # Sat May 27 21:48:42 2017 -0700
> # Node ID 869505c8d99d438f1c7d0775645d4ac28d70c15e
> # Parent ad37c569ec8121a99e4cd9da52365b114b82e744
> # Available At https://bitbucket.org/quark-zju/hg-draft
> # hg pull https://bitbucket.org/quark-zju/hg-draft -r
> 869505c8d99d
> contrib: add a tool to detect cyclic references
>
> This patch adds a simple tool to find out all cyclic references for
> CPython.
>
> The added test shows some common cases where cycles are created.
>
This seems reasonable. However, the tool as it stands isn't very usable
because you must include it in sys.path and import it. I'd favor adding
this as mercurial/leaks.py (or possibly adding to mercurial/profiling.py)
so it is trivial to import and use the context manager. That, or refactor
it as an extension and have the extension stash a reference to the context
manager under mercurial.util.leakdetect (or similar) - again so it is easy
to use.
>
> diff --git a/contrib/leakdetect.py b/contrib/leakdetect.py
> new file mode 100644
> --- /dev/null
> +++ b/contrib/leakdetect.py
> @@ -0,0 +1,147 @@
> +# leakdetect.py - CPython object cyclic references analysis
> +#
> +# Copyright 2017 Facebook, Inc.
> +#
> +# This software may be used and distributed according to the terms of the
> +# GNU General Public License version 2 or any later version.
> +
> +from __future__ import absolute_import
> +
> +import contextlib
> +import gc
> +import os
> +import platform
> +import sys
> +
> +def stronglyconnectedcomponents(vertices, edges):
> + """yield strongly connected components in a directed graph"""
> + ignored = set() # vertices outputted already
> + stack = [] # vertice DFS stack
> + index = {} # stack[index[n]] == n; later visit: bigger index
> + lowlink = {} # see Tarjan's strongly connected components algorithm
> + execstack = [] # enumerate execution stack to bypass Python stack
> limit
> +
> + def visit(v):
> + index[v] = lowlink[v] = len(stack)
> + stack.append(v)
> + execstack.append((checkroot, (v,)))
> + for w in edges.get(v, []):
> + execstack.append((updatelowlink, (v, w)))
> + if w not in index:
> + execstack.append((visit, (w,)))
> +
> + def updatelowlink(v, w):
> + lowlink[v] = min(lowlink[v], lowlink[w])
> +
> + def checkroot(v):
> + i = index[v]
> + if lowlink[v] == i:
> + component = stack[i:]
> + del stack[i:]
> + if component[0] not in ignored:
> + ignored.update(component)
> + return component
> +
> + for v in vertices:
> + if v not in ignored:
> + index.clear()
> + lowlink.clear()
> + execstack.append((visit, (v,)))
> + while execstack:
> + func, args = execstack.pop()
> + result = func(*args)
> + if result is not None:
> + yield result
> +
> +def objectreferencegraph(objects):
> + """return (vertices, edges), referenced relationship among objects
> +
> + vertices contain object IDs.
> + """
> + idset = set(id(o) for o in objects)
> + edges = {}
> + for o in objects:
> + edges[id(o)] = [id(r) for r in gc.get_referents(o) if id(r) in
> idset]
> + return idset, edges
> +
> +def describe(obj):
> + """short and easy to understand text to help identify an object
> +
> + similar to repr(), but avoids huge outputs in all cases.
> + """
> + if isinstance(obj, tuple):
> + return '<tuple (%s)>' % ', '.join(describe(v) for v in obj)
> + elif isinstance(obj, type):
> + return '<type %s>' % obj.__name__
> + elif isinstance(obj, type(describe)): # function
> + code = obj.func_code
> + path = os.path.basename(code.co_filename)
> + return ('<function %s@%s:%s>' % (obj.func_name, path,
> + code.co_firstlineno))
> + elif isinstance(obj, type(os.environ.get)): # instancemethod
> + return repr(obj)
> + else:
> + if type(obj) in (list, set, dict):
> + tryrepr = repr(obj)
> + if len(tryrepr) < 70:
> + return tryrepr
> + cellcontent = getattr(obj, 'cell_contents', None)
> + if cellcontent is not None:
> + return '<cell %s>' % describe(cellcontent)
> + typename = type(obj).__name__
> + if typename == 'getset_descriptor':
> + return repr(obj)
> + return '<%s>' % typename
> +
> +def analysegarbage(garbage, out):
> + """find cyclic references among garbage objects and report them"""
> + out.write('--- leak detection report ---\n'
> + '%d objects not freed by reference counting\n' %
> len(garbage))
> + vertices, edges = objectreferencegraph(garbage)
> + groups = []
> + for component in stronglyconnectedcomponents(vertices, edges):
> + if len(component) == 1:
> + v = component[0]
> + if v in edges[v]: # self-cycle
> + groups.append((v,))
> + else:
> + groups.append(component)
> +
> + objmap = {id(o): o for o in garbage}
> + outputs = []
> + for g in groups:
> + output = 'cycle of %d objects:\n' % len(g)
> + output += ''.join(sorted(' %s\n' % describe(objmap[i]) for i in
> g))
> + outputs.append(output)
> + out.write(''.join(sorted(outputs)))
> +
> +if platform.python_implementation() == 'CPython':
> + # only verified to work in CPython
> + @contextlib.contextmanager
> + def leakdetect(out=sys.stderr):
> + # collect garbage so previous garbage won't affect us
> + gc.set_debug(gc.get_debug() & ~gc.DEBUG_SAVEALL)
> + gc.collect()
> + # make gc.collect put objects to gc.garbage instead of releasing
> them
> + gc.set_debug(gc.get_debug() | gc.DEBUG_SAVEALL)
> + gc.disable()
> + try:
> + yield
> + finally:
> + # populate gc.garbage
> + gc.collect()
> + garbage = gc.garbage
> + # restore the default behavior
> + gc.set_debug(gc.get_debug() & ~gc.DEBUG_SAVEALL)
>
Wouldn't this be semantically better if it restored the original settings
stashed away from a gc.get_debug()?
> + if garbage:
> + analysegarbage(garbage, out)
> + del gc.garbage[:]
> +
> + disabled = False
> +else:
> + # fallback to a no-op gracefully
> + @contextlib.contextmanager
> + def leakdetect(out=sys.stderr):
> + yield
> +
> + disabled = True
> diff --git a/tests/test-leakdetect.py b/tests/test-leakdetect.py
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-leakdetect.py
> @@ -0,0 +1,50 @@
> +from __future__ import absolute_import
> +
> +import inspect
> +import os
> +import sys
> +
> +dirname = os.path.dirname
> +sys.path.insert(0, os.path.join(dirname(dirname(__file__)), 'contrib'))
> +
> +import leakdetect
> +
> +if leakdetect.disabled:
> + sys.stdout.write('skipped: missing feature: cpython\n')
> + sys.exit(80)
> +
> +def leaklistdict():
> + a = []
> + b = {}
> + b['a'] = a
> + a.append(b)
> + a.append([[[[['65537']]]]])
> + c = [('long text' * 100,)]
> + c.append(c)
> +
> +def leakframe():
> + frame = inspect.currentframe()
> + a = []
> + a.append(a)
> + b = [[['some text']]]
> +
> +def leakfunc():
> + def func(x):
> + return func(x + 1)
> + f = lambda x: g(x)
> + g = lambda x: f(x)
> +
> +def leakclass():
> + # http://bugs.python.org/msg60985
> + class A(object):
> + pass
> + B = type('B', (object,), {})
> + a = A()
> + b = B()
> + class C(a.__class__):
> + pass
> + a.__class__ = C
> +
> +for leak in [leaklistdict, leakframe, leakfunc, leakclass]:
> + with leakdetect.leakdetect():
> + leak()
> diff --git a/tests/test-leakdetect.py.out b/tests/test-leakdetect.py.out
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-leakdetect.py.out
> @@ -0,0 +1,43 @@
> +--- leak detection report ---
> +9 objects not freed by reference counting
> +cycle of 1 objects:
> + <list>
> +cycle of 2 objects:
> + [{'a': [...]}, [[[[['65537']]]]]]
> + {'a': [{...}, [[[[['65537']]]]]]}
> +--- leak detection report ---
> +5 objects not freed by reference counting
> +cycle of 1 objects:
> + <frame>
> +cycle of 1 objects:
> + [[...]]
> +--- leak detection report ---
> +9 objects not freed by reference counting
> +cycle of 3 objects:
> + <cell <function func at test-leakdetect.py:32>>
> + <function func at test-leakdetect.py:32>
> + <tuple (<cell <function func at test-leakdetect.py:32>>)>
> +cycle of 6 objects:
> + <cell <function <lambda>@test-leakdetect.py:34>>
> + <cell <function <lambda>@test-leakdetect.py:35>>
> + <function <lambda>@test-leakdetect.py:34>
> + <function <lambda>@test-leakdetect.py:35>
> + <tuple (<cell <function <lambda>@test-leakdetect.py:34>>)>
> + <tuple (<cell <function <lambda>@test-leakdetect.py:35>>)>
> +--- leak detection report ---
> +15 objects not freed by reference counting
> +cycle of 2 objects:
> + <tuple (<type C>, <type A>, <type object>)>
> + <type C>
> +cycle of 5 objects:
> + <attribute '__dict__' of 'A' objects>
> + <attribute '__weakref__' of 'A' objects>
> + <dict>
> + <tuple (<type A>, <type object>)>
> + <type A>
> +cycle of 5 objects:
> + <attribute '__dict__' of 'B' objects>
> + <attribute '__weakref__' of 'B' objects>
> + <dict>
> + <tuple (<type B>, <type object>)>
> + <type B>
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20170528/6909c965/attachment.html>
More information about the Mercurial-devel
mailing list