[PATCH 4 of 7 V2] transaction: add support for non-append files

Pierre-Yves David pierre-yves.david at ens-lyon.org
Mon Mar 31 19:45:02 CDT 2014



On 03/31/2014 04:19 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham at fb.com>
> # Date 1395699711 25200
> #      Mon Mar 24 15:21:51 2014 -0700
> # Node ID 498a1087dd60ec7234c4352e566abe977eb63332
> # Parent  f85f9ea96d16e180de3e38cfc468e53441f41743
> transaction: add support for non-append files
>
> This adds support for normal, non-append-only files in transactions.  For
> example, .hg/store/fncache and .hg/store/phaseroots should be written as part of
> the transaction, but are not append only files.
>
> This adds a journal.backupfiles along side the normal journal. This tracks which
> files have been backed up as part of the transaction.  transaction.addbackup()
> creates a backup of the file (using a hardlink), which is later used to recover
> in the event of the transaction failing.
>
> Using a seperate journal allows the repository to still be used by older
> versions of Mercurial. A future patch will use this functionality and add tests
> for it.

Reading this patches I've an hard to time understand if it would behave 
race free is two process are trying to rollback a transaction at the 
same time.


But the lock protection make it probably impossible.


> diff --git a/mercurial/transaction.py b/mercurial/transaction.py
> --- a/mercurial/transaction.py
> +++ b/mercurial/transaction.py
> @@ -12,8 +12,8 @@
>   # GNU General Public License version 2 or any later version.
>
>   from i18n import _
> -import errno
> -import error
> +import errno, os
> +import error, util
>
>   def active(func):
>       def _active(self, *args, **kwds):
> @@ -24,22 +24,35 @@
>       return _active
>
>   def _playback(journal, report, opener, entries, unlink=True):

A docstring explaining how the process happen would be great.


> +    backupfiles = []
>       for f, o, ignore in entries:
>           if o or not unlink:
> -            try:
> -                fp = opener(f, 'a')
> -                fp.truncate(o)
> -                fp.close()
> -            except IOError:
> -                report(_("failed to truncate %s\n") % f)
> -                raise
> +            if isinstance(o, int):
> +                try:
> +                    fp = opener(f, 'a')
> +                    fp.truncate(o)
> +                    fp.close()
> +                except IOError:
> +                    report(_("failed to truncate %s\n") % f)
> +                    raise
> +            else:
> +                fpath = opener.join(f)
> +                opath = opener.join(o)
> +                util.copyfile(opath, fpath)
> +                backupfiles.append(o)

Why do you use the same list entries for two different entry.

It looks like the transaction could have a fullfile entry with different 
content.

(Using isinstance is usually considered fragile and inelegant)



>           else:
>               try:
>                   opener.unlink(f)
>               except (IOError, OSError), inst:
>                   if inst.errno != errno.ENOENT:
>                       raise
> +
>       opener.unlink(journal)
> +    backuppath = "%s.backupfiles" % journal
> +    if opener.exists(backuppath):
> +        opener.unlink(backuppath)
> +    for f in backupfiles:
> +        opener.unlink(f)
>
>   class transaction(object):
>       def __init__(self, report, opener, journal, after=None, createmode=None,
> @@ -56,9 +69,12 @@
>           self.journal = journal
>           self._queue = []
>
> +        self.backupfilesjournal = "%s.backupfiles" % journal
>           self.file = opener.open(self.journal, "w")
> +        self.backupsfile = opener.open(self.backupfilesjournal, 'w')
>           if createmode is not None:
>               opener.chmod(self.journal, createmode & 0666)
> +            opener.chmod(self.backupfilesjournal, createmode & 0666)
>
>       def __del__(self):
>           if self.journal:
> @@ -71,11 +87,23 @@
>       @active
>       def endgroup(self):
>           q = self._queue.pop()
> -        d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q])
>           self.entries.extend(q)
> +
> +        offsets = []
> +        backups = []
> +        for f, o, _ in q:
> +            if isinstance(o, int):
> +                offsets.append((f, o))
> +            else:
> +                backups.append((f, o))
> +        d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets])
>           self.file.write(d)
>           self.file.flush()
>
> +        d = ''.join(['%s\0%s\0' % (f, o) for f, o in backups])
> +        self.backupsfile.write(d)
> +        self.backupsfile.flush()
> +
>       @active
>       def add(self, file, offset, data=None):
>           if file in self.map:
> @@ -91,6 +119,27 @@
>           self.file.flush()
>
>       @active
> +    def addbackup(self, file):


A docstring would be nice


> +        if file in self.map:
> +            return
> +        backupfile = "journal.%s" % file
> +        if self.opener.exists(file):
> +            filepath = self.opener.join(file)
> +            backuppath = self.opener.join(backupfile)
> +            util.copyfiles(filepath, backuppath, hardlink=True)
> +        else:
> +            self.add(file, 0)
> +            return
> +
> +        if self._queue:
> +            self._queue[-1].append((file, backupfile))
> +            return
> +        self.entries.append((file, backupfile, None))
> +        self.map[file] = len(self.entries) - 1
> +        self.backupsfile.write("%s\0%s\0" % (file, backupfile))
> +        self.backupsfile.flush()
> +
> +    @active
>       def find(self, file):
>           if file in self.map:
>               return self.entries[self.map[file]]
> @@ -136,11 +185,16 @@
>           if self.count != 0:
>               return
>           self.file.close()
> -        self.entries = []
>           if self.after:
>               self.after()
>           if self.opener.isfile(self.journal):
>               self.opener.unlink(self.journal)
> +        if self.opener.isfile(self.backupfilesjournal):
> +            self.opener.unlink(self.backupfilesjournal)
> +            for f, o, _ in self.entries:
> +                if not isinstance(o, int):
> +                    self.opener.unlink(o)
> +        self.entries = []
>           self.journal = None
>
>       @active
> @@ -162,6 +216,7 @@
>               if not self.entries:
>                   if self.journal:
>                       self.opener.unlink(self.journal)
> +                    self.opener.unlink(self.backupfilesjournal)
>                   return
>
>               self.report(_("transaction abort!\n"))
> @@ -189,4 +244,14 @@
>           except ValueError:
>               report(_("couldn't read journal entry %r!\n") % l)
>
> +    backupjournal = "%s.backupfiles" % file
> +    if opener.exists(backupjournal):
> +        fp = opener.open(backupjournal)
> +        data = fp.read()
> +        if len(data) > 0:
> +            parts = data.split('\0')
> +            for i in xrange(0, len(parts), 2):
> +                f, o = parts[i:i + 1]
> +                entries.append((f, o, None))
> +
>       _playback(file, report, opener, entries)


Updating (or most probably creating :-() the dostring of this function 
with the on disk format would be great.



More information about the Mercurial-devel mailing list