[PATCH 3 of 3] commands: add debugdeltachain command

Gregory Szorc gregory.szorc at gmail.com
Sun Dec 6 02:45:03 CST 2015


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1449387466 28800
#      Sat Dec 05 23:37:46 2015 -0800
# Node ID 5e40f33ca9aad630d6a75f73bbcc5f8fdad3cd01
# Parent  860794fcdaf0794f1f08d1620f0421d1eec96b6d
commands: add debugdeltachain command

We have debug commands for displaying overall revlog statistics
(debugrevlog) and for dumping a revlog index (debugindex). As part
of investigating various aspects of revlog behavior and performance,
I found it important to have an understanding of how revlog
delta chains behave in practice.

This patch implements a "debugdeltachain" command. For each revision
in a revlog, it dumps information about the delta chain. Which delta
chain it is part of, length of the delta chain, distance since base
revision, info about base revision, size of the delta chain, etc.

This command has already uncovered some weird history in
mozilla-central I didn't know about. So I think it's valuable.

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -2480,16 +2480,127 @@ def debugindexdot(ui, repo, file_=None, 
     for i in r:
         node = r.node(i)
         pp = r.parents(node)
         ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i))
         if pp[1] != nullid:
             ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
     ui.write("}\n")
 
+ at command('debugdeltachain',
+    debugrevlogopts,
+    _('-c|-m|FILE'),
+    optionalrepo=True)
+def debugdeltachain(ui, repo, file_=None, **opts):
+    """dump information about delta chains in a revlog
+
+    The printed columns are:
+
+       rev        revision number
+       chain#     delta chain identifier (numbered by unique base)
+       chainlen   delta chain length to this point
+       prev       previous revision in delta chain
+       delta      role of delta / how it was computed
+       size       compressed size of revision
+       rawsize    uncompressed size of revision
+       chainsize  total size of compressed revisions in chain
+       ratio      total chain size divided by uncompressed revision size
+                  new delta chains typically start at ratio 2.00
+       lindist    linear distance from base revision in delta chain to end
+                  of this revision
+       extradist  total size of revisions not part of this delta chain from
+                  base of delta chain to end of this revision; a measurement
+                  of how much extra data we need to read/seek across to read
+                  this revision
+       extraratio extradist divided by chainsize; another representation of
+                  how much unrelated data is needed to load this delta chain
+    """
+    r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts)
+    index = r.index
+    generaldelta = r.version & revlog.REVLOGGENERALDELTA
+
+    def revinfo(rev):
+        iterrev = rev
+        e = index[iterrev]
+        chain = []
+        compsize = e[1]
+        uncompsize = e[2]
+        chainsize = 0
+
+        if generaldelta:
+            if e[3] == e[5]:
+                deltatype = 'p1'
+            elif e[3] == e[6]:
+                deltatype = 'p2'
+            elif e[3] == rev - 1:
+                deltatype = 'prev'
+            elif e[3] == rev:
+                deltatype = 'base'
+            else:
+                deltatype = 'other'
+        else:
+            if e[3] == rev:
+                deltatype = 'base'
+            else:
+                deltatype = 'prev'
+
+        while iterrev != e[3]:
+            chain.append(iterrev)
+            chainsize += e[1]
+            if generaldelta:
+                iterrev = e[3]
+            else:
+                iterrev -= 1
+            e = index[iterrev]
+        else:
+            chainsize += e[1]
+            chain.append(iterrev)
+
+        chain.reverse()
+        return compsize, uncompsize, deltatype, chain, chainsize
+
+    ui.write('    rev  chain# chainlen     prev   delta       '
+             'size    rawsize  chainsize     ratio  lindist  extradist '
+             'extraratio\n')
+
+    chainbases = {}
+    for rev in r:
+        comp, uncomp, deltatype, chain, chainsize = revinfo(rev)
+        chainbase = chain[0]
+        chainidx = chainbases.setdefault(chainbase, len(chainbases) + 1)
+        basestart = r.start(chainbase)
+        revstart = r.start(rev)
+        lineardist = revstart + comp - basestart
+        extradist = lineardist - chainsize
+        try:
+            prevrev = chain[-2]
+        except IndexError:
+            prevrev = -1
+
+        chainratio = float(chainsize) / float(uncomp)
+        extraratio = float(extradist) / float(chainsize)
+
+        ui.write('{rev:>7d} {chainidx:>7d} {chainlen:>8d} {prevrev:>8d} '
+                 '{deltatype:>7s} {compsize:>10d} {uncompsize:>10d} '
+                 '{chainsize:>10d} {chainratio:>9.5f} {lindist:>9d} '
+                 '{extradist:>9d} {extraratio:>10.5f}\n'
+                 .format(
+                     rev=rev,
+                     chainidx=chainidx,
+                     chainlen=len(chain),
+                     prevrev=prevrev,
+                     deltatype=deltatype,
+                     compsize=comp,
+                     uncompsize=uncomp,
+                     chainsize=chainsize,
+                     chainratio=chainratio,
+                     lindist=lineardist,
+                     extradist=extradist,
+                     extraratio=extraratio))
+
 @command('debuginstall', [], '', norepo=True)
 def debuginstall(ui):
     '''test Mercurial installation
 
     Returns 0 on success.
     '''
 
     def writetemp(contents):
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -75,16 +75,17 @@ Show debug commands if there are no othe
   debugcheckstate
   debugcommands
   debugcomplete
   debugconfig
   debugcreatestreamclonebundle
   debugdag
   debugdata
   debugdate
+  debugdeltachain
   debugdirstate
   debugdiscovery
   debugextensions
   debugfileset
   debugfsinfo
   debuggetbundle
   debugignore
   debugindex
@@ -238,16 +239,17 @@ Show all commands + options
   debugbundle: all
   debugcheckstate: 
   debugcommands: 
   debugcomplete: options
   debugcreatestreamclonebundle: 
   debugdag: tags, branches, dots, spaces
   debugdata: changelog, manifest, dir
   debugdate: extended
+  debugdeltachain: changelog, manifest, dir
   debugdirstate: nodates, datesort
   debugdiscovery: old, nonheads, ssh, remotecmd, insecure
   debugextensions: template
   debugfileset: rev
   debugfsinfo: 
   debuggetbundle: head, common, type
   debugignore: 
   debugindex: changelog, manifest, dir, format
diff --git a/tests/test-help.t b/tests/test-help.t
--- a/tests/test-help.t
+++ b/tests/test-help.t
@@ -807,16 +807,18 @@ Test list of internal help commands
    debugcomplete
                  returns the completion list associated with the given command
    debugcreatestreamclonebundle
                  create a stream clone bundle file
    debugdag      format the changelog or an index DAG as a concise textual
                  description
    debugdata     dump the contents of a data file revision
    debugdate     parse and display a date
+   debugdeltachain
+                 dump information about delta chains in a revlog
    debugdirstate
                  show the contents of the current dirstate
    debugdiscovery
                  runs the changeset discovery protocol in isolation
    debugextensions
                  show information about active extensions
    debugfileset  parse and apply a fileset specification
    debugfsinfo   show information detected about current filesystem


More information about the Mercurial-devel mailing list