[PATCH 6 of 9 RFC] statprof: pass data structure to display functions

Gregory Szorc gregory.szorc at gmail.com
Tue Aug 16 01:25:13 EDT 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1471230765 25200
#      Sun Aug 14 20:12:45 2016 -0700
# Node ID d23f7839f565d1cb9c3cfa85005dade84f41a2f4
# Parent  5fced4748aeb241c3f6eac4b3e28baf8ae6632a1
statprof: pass data structure to display functions

Currently, statprof maintains a global "state" variable that is used by
several functions. This is preventing future modifications to enable
collection on multiple threads.

We start weaning statprof off global state by changing the display
functions to take an object containing data.

diff --git a/mercurial/statprof.py b/mercurial/statprof.py
--- a/mercurial/statprof.py
+++ b/mercurial/statprof.py
@@ -317,16 +317,17 @@ def stop():
             state.thread.join()
 
         state.accumulate_time(clock())
         state.last_start_time = None
         statprofpath = os.environ.get('STATPROF_DEST')
         if statprofpath:
             save_data(statprofpath)
 
+
 def save_data(path):
     with open(path, 'w+') as file:
         file.write(str(state.accumulated_time) + '\n')
         for sample in state.samples:
             time = str(sample.time)
             stack = sample.stack
             sites = ['\1'.join([s.path, str(s.lineno), s.function])
                      for s in stack]
@@ -420,76 +421,77 @@ class SiteStats(object):
 class DisplayFormats:
     ByLine = 0
     ByMethod = 1
     AboutMethod = 2
     Hotpath = 3
     FlameGraph = 4
     Json = 5
 
-def display(fp=None, format=3, **kwargs):
+def display(fp=None, format=3, data=None, **kwargs):
     '''Print statistics, either to stdout or the given file object.'''
+    data = data or state
 
     if fp is None:
         import sys
         fp = sys.stdout
-    if len(state.samples) == 0:
+    if len(data.samples) == 0:
         print('No samples recorded.', file=fp)
         return
 
     if format == DisplayFormats.ByLine:
-        display_by_line(fp)
+        display_by_line(data, fp)
     elif format == DisplayFormats.ByMethod:
-        display_by_method(fp)
+        display_by_method(data, fp)
     elif format == DisplayFormats.AboutMethod:
-        display_about_method(fp, **kwargs)
+        display_about_method(data, fp, **kwargs)
     elif format == DisplayFormats.Hotpath:
-        display_hotpath(fp, **kwargs)
+        display_hotpath(data, fp, **kwargs)
     elif format == DisplayFormats.FlameGraph:
-        write_to_flame(fp)
+        write_to_flame(data, fp)
     elif format == DisplayFormats.Json:
-        write_to_json(fp)
+        write_to_json(data, fp)
     else:
         raise Exception("Invalid display format")
 
     if format != DisplayFormats.Json:
         print('---', file=fp)
-        print('Sample count: %d' % len(state.samples), file=fp)
-        print('Total time: %f seconds' % state.accumulated_time, file=fp)
+        print('Sample count: %d' % len(data.samples), file=fp)
+        print('Total time: %f seconds' % data.accumulated_time, file=fp)
 
-def display_by_line(fp):
+def display_by_line(data, fp):
     '''Print the profiler data with each sample line represented
     as one row in a table.  Sorted by self-time per line.'''
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
     stats.sort(reverse=True, key=lambda x: x.selfseconds())
 
     print('%5.5s %10.10s   %7.7s  %-8.8s' %
           ('%  ', 'cumulative', 'self', ''), file=fp)
     print('%5.5s  %9.9s  %8.8s  %-8.8s' %
           ("time", "seconds", "seconds", "name"), file=fp)
 
     for stat in stats:
         site = stat.site
         sitelabel = '%s:%d:%s' % (site.filename(), site.lineno, site.function)
         print('%6.2f %9.2f %9.2f  %s' % (stat.selfpercent(),
                                          stat.totalseconds(),
                                          stat.selfseconds(),
                                          sitelabel),
               file=fp)
 
-def display_by_method(fp):
+def display_by_method(data, fp):
     '''Print the profiler data with each sample function represented
     as one row in a table.  Important lines within that function are
     output as nested rows.  Sorted by self-time per line.'''
     print('%5.5s %10.10s   %7.7s  %-8.8s' %
           ('%  ', 'cumulative', 'self', ''), file=fp)
     print('%5.5s  %9.9s  %8.8s  %-8.8s' %
           ("time", "seconds", "seconds", "name"), file=fp)
 
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
 
     grouped = defaultdict(list)
     for stat in stats:
         grouped[stat.site.filename() + ":" + stat.site.function].append(stat)
 
     # compute sums for each function
     functiondata = []
     for fname, sitestats in grouped.iteritems():
@@ -523,29 +525,29 @@ def display_by_method(fp):
             # only show line numbers for significant locations (>1% time spent)
             if stat.selfpercent() > 1:
                 source = stat.site.getsource(25)
                 stattuple = (stat.selfpercent(), stat.selfseconds(),
                              stat.site.lineno, source)
 
                 print('%33.0f%% %6.2f   line %s: %s' % (stattuple), file=fp)
 
-def display_about_method(fp, function=None, **kwargs):
+def display_about_method(data, fp, function=None, **kwargs):
     if function is None:
         raise Exception("Invalid function")
 
     filename = None
     if ':' in function:
         filename, function = function.split(':')
 
     relevant_samples = 0
     parents = {}
     children = {}
 
-    for sample in state.samples:
+    for sample in data.samples:
         for i, site in enumerate(sample.stack):
             if site.function == function and (not filename
                 or site.filename() == filename):
                 relevant_samples += 1
                 if i != len(sample.stack) - 1:
                     parent = sample.stack[i + 1]
                     if parent in parents:
                         parents[parent] = parents[parent] + 1
@@ -559,17 +561,17 @@ def display_about_method(fp, function=No
 
     parents = [(parent, count) for parent, count in parents.iteritems()]
     parents.sort(reverse=True, key=lambda x: x[1])
     for parent, count in parents:
         print('%6.2f%%   %s:%s   line %s: %s' %
             (count / relevant_samples * 100, parent.filename(),
             parent.function, parent.lineno, parent.getsource(50)), file=fp)
 
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
     stats = [s for s in stats
                if s.site.function == function and
                (not filename or s.site.filename() == filename)]
 
     total_cum_sec = 0
     total_self_sec = 0
     total_self_percent = 0
     total_cum_percent = 0
@@ -592,17 +594,17 @@ def display_about_method(fp, function=No
 
     children = [(child, count) for child, count in children.iteritems()]
     children.sort(reverse=True, key=lambda x: x[1])
     for child, count in children:
         print('        %6.2f%%   line %s: %s' %
               (count / relevant_samples * 100, child.lineno,
                child.getsource(50)), file=fp)
 
-def display_hotpath(fp, limit=0.05, **kwargs):
+def display_hotpath(data, fp, limit=0.05, **kwargs):
     class HotNode(object):
         def __init__(self, site):
             self.site = site
             self.count = 0
             self.children = {}
 
         def add(self, stack, time):
             self.count += time
@@ -616,18 +618,18 @@ def display_hotpath(fp, limit=0.05, **kw
                 i = 1
                 # Skip boiler plate parts of the stack
                 while i < len(stack) and '%s:%s' % (stack[i].filename(), stack[i].function) in skips:
                     i += 1
                 if i < len(stack):
                     child.add(stack[i:], time)
 
     root = HotNode(None)
-    lasttime = state.samples[0].time
-    for sample in state.samples:
+    lasttime = data.samples[0].time
+    for sample in data.samples:
         root.add(sample.stack[::-1], sample.time - lasttime)
         lasttime = sample.time
 
     def _write(node, depth, multiple_siblings):
         site = node.site
         visiblechildren = [c for c in node.children.itervalues()
                              if c.count >= (limit * root.count)]
         if site:
@@ -664,50 +666,50 @@ def display_hotpath(fp, limit=0.05, **kw
 
         visiblechildren.sort(reverse=True, key=lambda x: x.count)
         for child in visiblechildren:
             _write(child, newdepth, len(visiblechildren) > 1)
 
     if root.count > 0:
         _write(root, 0, False)
 
-def write_to_flame(fp):
+def write_to_flame(data, fp):
     scriptpath = os.environ['HOME'] + '/flamegraph.pl'
     if not os.path.exists(scriptpath):
         print("error: missing ~/flamegraph.pl", file=fp)
         print("get it here: https://github.com/brendangregg/FlameGraph",
               file=fp)
         return
 
     fd, path = tempfile.mkstemp()
 
     file = open(path, "w+")
 
     lines = {}
-    for sample in state.samples:
+    for sample in data.samples:
         sites = [s.function for s in sample.stack]
         sites.reverse()
         line = ';'.join(sites)
         if line in lines:
             lines[line] = lines[line] + 1
         else:
             lines[line] = 1
 
     for line, count in lines.iteritems():
         file.write("%s %s\n" % (line, count))
 
     file.close()
 
     os.system("perl ~/flamegraph.pl %s > ~/flamegraph.svg" % path)
     print("Written to ~/flamegraph.svg", file=fp)
 
-def write_to_json(fp):
+def write_to_json(data, fp):
     samples = []
 
-    for sample in state.samples:
+    for sample in data.samples:
         stack = []
 
         for frame in sample.stack:
             stack.append((frame.path, frame.lineno, frame.function))
 
         samples.append((sample.time, stack))
 
     print(json.dumps(samples), file=fp)


More information about the Mercurial-devel mailing list