[PATCH 2 of 2] commands: print chunk type in debugrevlog

Augie Fackler raf at durin42.com
Fri Nov 18 20:42:55 EST 2016


On Thu, Nov 17, 2016 at 08:36:59PM -0800, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc at gmail.com>
> # Date 1479443400 28800
> #      Thu Nov 17 20:30:00 2016 -0800
> # Node ID 4f92246570138fcf43913a287619706940e33e92
> # Parent  3a1a4b0f3fd8445b166608e86829e048770ffa92
> commands: print chunk type in debugrevlog

queued, thanks

>
> Each data entry ("chunk") in a revlog has a type based on the first
> byte of the data. This type indicates how to interpret the data.
>
> This seems like a useful thing to be able to query through a debug
> command. So let's add that to `hg debugrevlog`.
>
> This does make `hg debugrevlog` slightly slower, as it has to read
> more than just the index. However, even on the mozilla-unified
> manifest (which is ~200MB spread over ~350K revisions), this takes
> <400ms.
>
> diff --git a/mercurial/commands.py b/mercurial/commands.py
> --- a/mercurial/commands.py
> +++ b/mercurial/commands.py
> @@ -15,6 +15,7 @@ import random
>  import re
>  import shlex
>  import socket
> +import string
>  import sys
>  import tempfile
>  import time
> @@ -3194,6 +3195,8 @@ def debugrevlog(ui, repo, file_=None, **
>      datasize = [None, 0, 0]
>      fullsize = [None, 0, 0]
>      deltasize = [None, 0, 0]
> +    chunktypecounts = {}
> +    chunktypesizes = {}
>
>      def addsize(size, l):
>          if l[0] is None or size < l[0]:
> @@ -3231,6 +3234,20 @@ def debugrevlog(ui, repo, file_=None, **
>              elif delta != nullrev:
>                  numother += 1
>
> +        # Obtain data on the raw chunks in the revlog.
> +        chunk = r._chunkraw(rev, rev)[1]
> +        if chunk:
> +            chunktype = chunk[0]
> +        else:
> +            chunktype = 'empty'
> +
> +        if chunktype not in chunktypecounts:
> +            chunktypecounts[chunktype] = 0
> +            chunktypesizes[chunktype] = 0
> +
> +        chunktypecounts[chunktype] += 1
> +        chunktypesizes[chunktype] += size
> +
>      # Adjust size min value for empty cases
>      for size in (datasize, fullsize, deltasize):
>          if size[0] is None:
> @@ -3282,6 +3299,24 @@ def debugrevlog(ui, repo, file_=None, **
>      ui.write(('    full      : ') + fmt % pcfmt(fulltotal, totalsize))
>      ui.write(('    deltas    : ') + fmt % pcfmt(deltatotal, totalsize))
>
> +    def fmtchunktype(chunktype):
> +        if chunktype == 'empty':
> +            return '    %s     : ' % chunktype
> +        elif chunktype in string.ascii_letters:
> +            return '    0x%s (%s)  : ' % (hex(chunktype), chunktype)
> +        else:
> +            return '    0x%s      : ' % hex(chunktype)
> +
> +    ui.write('\n')
> +    ui.write(('chunks        : ') + fmt2 % numrevs)
> +    for chunktype in sorted(chunktypecounts):
> +        ui.write(fmtchunktype(chunktype))
> +        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
> +    ui.write(('chunks size   : ') + fmt2 % totalsize)
> +    for chunktype in sorted(chunktypecounts):
> +        ui.write(fmtchunktype(chunktype))
> +        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
> +
>      ui.write('\n')
>      fmt = dfmtstr(max(avgchainlen, compratio))
>      ui.write(('avg chain length  : ') + fmt % avgchainlen)
> diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
> --- a/tests/test-debugcommands.t
> +++ b/tests/test-debugcommands.t
> @@ -22,6 +22,11 @@
>        full      : 44 (100.00%)
>        deltas    :  0 ( 0.00%)
>
> +  chunks        :  1
> +      0x75 (u)  :  1 (100.00%)
> +  chunks size   : 44
> +      0x75 (u)  : 44 (100.00%)
> +
>    avg chain length  : 0
>    max chain length  : 0
>    compression ratio : 0
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list