[PATCH 2 of 2] commands: print chunk type in debugrevlog

Gregory Szorc gregory.szorc at gmail.com
Thu Nov 17 23:36:59 EST 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1479443400 28800
#      Thu Nov 17 20:30:00 2016 -0800
# Node ID 4f92246570138fcf43913a287619706940e33e92
# Parent  3a1a4b0f3fd8445b166608e86829e048770ffa92
commands: print chunk type in debugrevlog

Each data entry ("chunk") in a revlog has a type based on the first
byte of the data. This type indicates how to interpret the data.

This seems like a useful thing to be able to query through a debug
command. So let's add that to `hg debugrevlog`.

This does make `hg debugrevlog` slightly slower, as it has to read
more than just the index. However, even on the mozilla-unified
manifest (which is ~200MB spread over ~350K revisions), this takes
<400ms.

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -15,6 +15,7 @@ import random
 import re
 import shlex
 import socket
+import string
 import sys
 import tempfile
 import time
@@ -3194,6 +3195,8 @@ def debugrevlog(ui, repo, file_=None, **
     datasize = [None, 0, 0]
     fullsize = [None, 0, 0]
     deltasize = [None, 0, 0]
+    chunktypecounts = {}
+    chunktypesizes = {}
 
     def addsize(size, l):
         if l[0] is None or size < l[0]:
@@ -3231,6 +3234,20 @@ def debugrevlog(ui, repo, file_=None, **
             elif delta != nullrev:
                 numother += 1
 
+        # Obtain data on the raw chunks in the revlog.
+        chunk = r._chunkraw(rev, rev)[1]
+        if chunk:
+            chunktype = chunk[0]
+        else:
+            chunktype = 'empty'
+
+        if chunktype not in chunktypecounts:
+            chunktypecounts[chunktype] = 0
+            chunktypesizes[chunktype] = 0
+
+        chunktypecounts[chunktype] += 1
+        chunktypesizes[chunktype] += size
+
     # Adjust size min value for empty cases
     for size in (datasize, fullsize, deltasize):
         if size[0] is None:
@@ -3282,6 +3299,24 @@ def debugrevlog(ui, repo, file_=None, **
     ui.write(('    full      : ') + fmt % pcfmt(fulltotal, totalsize))
     ui.write(('    deltas    : ') + fmt % pcfmt(deltatotal, totalsize))
 
+    def fmtchunktype(chunktype):
+        if chunktype == 'empty':
+            return '    %s     : ' % chunktype
+        elif chunktype in string.ascii_letters:
+            return '    0x%s (%s)  : ' % (hex(chunktype), chunktype)
+        else:
+            return '    0x%s      : ' % hex(chunktype)
+
+    ui.write('\n')
+    ui.write(('chunks        : ') + fmt2 % numrevs)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
+    ui.write(('chunks size   : ') + fmt2 % totalsize)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
+
     ui.write('\n')
     fmt = dfmtstr(max(avgchainlen, compratio))
     ui.write(('avg chain length  : ') + fmt % avgchainlen)
diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
--- a/tests/test-debugcommands.t
+++ b/tests/test-debugcommands.t
@@ -22,6 +22,11 @@
       full      : 44 (100.00%)
       deltas    :  0 ( 0.00%)
   
+  chunks        :  1
+      0x75 (u)  :  1 (100.00%)
+  chunks size   : 44
+      0x75 (u)  : 44 (100.00%)
+  
   avg chain length  : 0
   max chain length  : 0
   compression ratio : 0


More information about the Mercurial-devel mailing list