[PATCH v3] debug: automate the process of truncating a damaged obsstore (issue5265)

Gregory Szorc gregory.szorc at gmail.com
Mon Jul 4 13:21:05 EDT 2016


On Mon, Jul 4, 2016 at 8:48 AM, Simon Farnsworth <simonfar at fb.com> wrote:

> # HG changeset patch
> # User Simon Farnsworth <simonfar at fb.com>
> # Date 1467646999 25200
> #      Mon Jul 04 08:43:19 2016 -0700
> # Node ID 31eea7188911ca17b36e69170dfcec07f4da37ae
> # Parent  fd93b15b5c30d16fd9c9eba61402d07fc4085db3
> debug: automate the process of truncating a damaged obsstore (issue5265)
>
> We occasionally see users who've had a system crash damage the obsstore
> file
> in their .hg/store directory; this makes all `hg` commands fail until we go
> in and remove the damaged section of the obsstore by hand.
>
> Automate the process we use when this happens, as a debug command because
> it
> loses the corrupted data. We only use it in rare circumstances when it's
> important to retrieve a user's work and apply it to a fresh clone.
>
> diff --git a/mercurial/commands.py b/mercurial/commands.py
> --- a/mercurial/commands.py
> +++ b/mercurial/commands.py
> @@ -15,6 +15,7 @@
>  import re
>  import shlex
>  import socket
> +import struct
>  import sys
>  import tempfile
>  import time
> @@ -3702,6 +3703,61 @@
>              displayer.show(repo[r], **props)
>          displayer.close()
>
> + at command('debugtruncatestore',
> +    [('', 'obsolete', None, _('truncate bad markers in obsstore'))],
> +    _('[OPTION]'))
> +def debugtruncatestore(ui, repo, **opts):
> +    """Fix up repository corruption by truncating damaged files
> +
> +    Most on-disk data structures are designed to be append-only. A failed
> write
> +    (e.g. due to an unexpected power failure) can leave the file
> corrupted.
> +
> +    This command attempts to recover from that situation by replacing the
> +    corrupted file with a version that only contains the valid records
> from the
> +    broken file. It is not guaranteed to remove all corrupt records - it
> will
> +    only remove corrupt records where normal use of the repo would result
> in a
> +    crash.
> +
> +    Corrupt files will be renamed with a .corrupt extension before the
> fixed
> +    version is written out, so that you can examine the corruption and/or
> undo
> +    this command.
> +
> +    You should normally use :hg:`recover` before resorting to this
> command.
> +    """
> +
> +    if 'obsolete' in opts:
> +        data = repo.svfs.tryread('obsstore')
> +        if data:
> +            # Slow algorithm - but this is an emergency debug operation
> +            version = None
> +            corrupt = False
> +            while version is None and len(data) > 0:
> +                try:
> +                    (version, markers) = obsolete._readmarkers(data)
> +                    # Force evaluation of all the markers - the pure
> +                    # implementation returns a generator which won't
> detonate
> +                    # until you evaluate the bad marker.
> +                    for marker in markers:
> +                        pass
> +                except (ValueError, struct.error, error.Abort):
> +                    corrupt = True
> +                    version = None
> +                    data = data[:-1]
> +                    continue
> +                break
> +            if corrupt:
> +                with repo.lock():
> +                    repo.svfs.rename('obsstore', 'obsstore.corrupt')
> +                    if len(data) > 0:
> +                        repo.svfs.write('obsstore', data)
> +                        ui.write(_('truncated damaged obsstore\n'))
> +                    else:
> +                        ui.write(_('deleted unreadable obsstore\n'))
> +            else:
> +                ui.write(_('no damage to obsstore\n'))
> +        else:
> +            ui.write(_('no obsstore\n'))
> +
>

Please forgive me if this is pedantic, but I think this code should live in
obsolete.pm, where all the other code manipulating this file is located.
Generally speaking, I think @command functions should only 1) check
input/arguments 2) call into library function(s) 3) format output.
Otherwise we end up with a commands.py that is even larger (and it is
already grossly large IMO).


>  @command('debugwalk', walkopts, _('[OPTION]... [FILE]...'),
> inferrepo=True)
>  def debugwalk(ui, repo, *pats, **opts):
>      """show how files match on given patterns"""
> diff --git a/tests/test-completion.t b/tests/test-completion.t
> --- a/tests/test-completion.t
> +++ b/tests/test-completion.t
> @@ -109,6 +109,7 @@
>    debugsub
>    debugsuccessorssets
>    debugtemplate
> +  debugtruncatestore
>    debugwalk
>    debugwireargs
>
> @@ -274,6 +275,7 @@
>    debugsub: rev
>    debugsuccessorssets:
>    debugtemplate: rev, define
> +  debugtruncatestore: obsolete
>    debugwalk: include, exclude
>    debugwireargs: three, four, five, ssh, remotecmd, insecure
>    files: rev, print0, include, exclude, template, subrepos
> diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
> --- a/tests/test-debugcommands.t
> +++ b/tests/test-debugcommands.t
> @@ -126,3 +126,45 @@
>     debugstacktrace.py:7 *in * (glob)
>     debugstacktrace.py:6 *in g (glob)
>     */util.py:* in debugstacktrace (glob)
> +
> +Test corruption-fixing debugtruncatestore command
> +
> +  $ hg init corrupt-obsstore
> +  $ cd corrupt-obsstore
> +  $ cat >> .hg/hgrc << EOF
> +  > [experimental]
> +  > evolution = all
> +  > [extensions]
> +  > evolve =
> +  > EOF
> +  $ echo a > file
> +  $ hg commit -qAm file-a -d 1/1/2001
> +  $ hg debugobsolete
> +
> +  $ echo corrupt > .hg/store/obsstore
> +  $ hg debugobsolete 2> /dev/null
> +  [255]
> +  $ hg debugtruncatestore --obsolete
> +  deleted unreadable obsstore
> +  $ hg debugobsolete
> +
> +  $ echo bee > file
> +  $ hg commit -qAm file-b -d 1/1/2001
> +  $ echo b > file
> +  $ hg commit -qAm file-b -d 1/1/2001
> +  $ hg fold -m file-b -r '.^::.' -d 1/1/2001
> +  2 changesets folded
> +  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
> +
> +  $ hg debugobsolete
> +  b429646ef7b48810f0aeaceb257eb589ff2e7e07
> a7a6f057a0dda49d17b8ddb53d228494f939fed3 0 \(.*\) {'user': 'test'} (re)
> +  12af3ef7db84073f5b5802cb875af46c6fc8f420
> a7a6f057a0dda49d17b8ddb53d228494f939fed3 0 \(.*\) {'user': 'test'} (re)
> +  $ python -c 'print("CORRUPT" * 120)' >> .hg/store/obsstore
> +  $ hg debugobsolete 2> /dev/null
> +  [1]
> +  $ hg debugtruncatestore --obsolete
> +  truncated damaged obsstore
> +  $ hg debugobsolete
> +  b429646ef7b48810f0aeaceb257eb589ff2e7e07
> a7a6f057a0dda49d17b8ddb53d228494f939fed3 0 \(.*\) {'user': 'test'} (re)
> +  12af3ef7db84073f5b5802cb875af46c6fc8f420
> a7a6f057a0dda49d17b8ddb53d228494f939fed3 0 \(.*\) {'user': 'test'} (re)
> +  $ cd ..
> diff --git a/tests/test-help.t b/tests/test-help.t
> --- a/tests/test-help.t
> +++ b/tests/test-help.t
> @@ -917,6 +917,8 @@
>                   show set of successors for revision
>     debugtemplate
>                   parse and apply a template
> +   debugtruncatestore
> +                 Fix up repository corruption by truncating damaged files
>     debugwalk     show how files match on given patterns
>     debugwireargs
>                   (no help text available)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20160704/3af9a99e/attachment.html>


More information about the Mercurial-devel mailing list