[PATCH] unionrepo: read-only operations on a union of two localrepos

Brodie Rao brodie at sf.io
Fri Feb 8 09:22:31 CST 2013


On Fri, Feb 8, 2013 at 11:12 AM, Mads Kiilerich <mads at kiilerich.com> wrote:
> # HG changeset patch
> # User Mads Kiilerich <madski at unity3d.com>
> # Date 1358520849 -3600
> # Node ID 1f62f308ce833825eb8878af4b0c42a7eb1ea175
> # Parent  2fefd1170bf269e26bb304553009f38e0117c342
> unionrepo: read-only operations on a union of two localrepos
>
> unionrepo is just like bundlerepo without bundles.
>
> The implementation is very similar to bundlerepo, but I don't see any obvious
> way to generalize it.
>
> Some most obvious use cases for this would be log and diff across local repos,
> as a kind of preview of pulls, for instance:
>
>   $ hg -R union:repo1+repo2 heads
>   $ hg -R union:repo1+repo2 log -r REPO1REV -r REPO2REV
>   $ hg -R union:repo1+repo2 log -r '::REPO1REV-::REPO2REV'
>   $ hg -R union:repo1+repo2 log -r 'ancestor(REPO1REV,REPO2REV)'
>   $ hg -R union:repo1+repo2 diff -r REPO1REV -r REPO2REV
>
> This is going to be used in RhodeCode, and Bitbucket already uses something
> similar. Having a core implementation would be beneficial.

It might be useful to support an arbitrary number of unions. It should
be pretty easy to extend this to support that.

> diff --git a/mercurial/hg.py b/mercurial/hg.py
> --- a/mercurial/hg.py
> +++ b/mercurial/hg.py
> @@ -9,8 +9,8 @@
>  from i18n import _
>  from lock import release
>  from node import hex, nullid
> -import localrepo, bundlerepo, httppeer, sshpeer, statichttprepo, bookmarks
> -import lock, util, extensions, error, node, scmutil, phases, url
> +import localrepo, bundlerepo, unionrepo, httppeer, sshpeer, statichttprepo
> +import bookmarks, lock, util, extensions, error, node, scmutil, phases, url
>  import cmdutil, discovery
>  import merge as mergemod
>  import verify as verifymod
> @@ -64,6 +64,7 @@
>
>  schemes = {
>      'bundle': bundlerepo,
> +    'union': unionrepo,
>      'file': _local,
>      'http': httppeer,
>      'https': httppeer,
> diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
> new file mode 100644
> --- /dev/null
> +++ b/mercurial/unionrepo.py
> @@ -0,0 +1,183 @@
> +# unionrepo.py - repository class for viewing union of repository changesets
> +#
> +# Derived from bundlerepo.py
> +# Copyright 2006, 2007 Benoit Boissinot <bboissin at gmail.com>
> +# Copyright 2013 Unity Technologies, Mads Kiilerich <madski at unity3d.com>
> +#
> +# This software may be used and distributed according to the terms of the
> +# GNU General Public License version 2 or any later version.
> +
> +"""Repository class for "in-memory pull" of one local repository to another,
> +allowing operations like diff and log with revsets.
> +"""
> +
> +from node import nullid
> +import util, mdiff, scmutil
> +import localrepo, changelog, manifest, filelog, revlog
> +
> +class unionrevlog(revlog.revlog):
> +    def __init__(self, opener, indexfile, revlog2, linkmapper):
> +        # How it works:
> +        # To retrieve a revision, we just need to know the node id so we can
> +        # look it up in revlog2.
> +        #
> +        # basemap is indexed with revisions coming from the second revlog.
> +        #
> +        # To differentiate a rev in the second revlog from a rev in the revlog,
> +        # we check revision against basemap.
> +        opener = scmutil.readonlyvfs(opener)
> +        revlog.revlog.__init__(self, opener, indexfile)
> +        self.revlog2 = revlog2
> +
> +        self.basemap = {} # mapping rev that is in revlog2 to ... nothing
> +        n = len(self)
> +        self.bundlerevs = set() # used by 'bundle()' revset expression
> +        for rev2 in self.revlog2:
> +            rev = self.revlog2.index[rev2]
> +            # rev numbers - in revlog2, very different from self.rev
> +            _start, _csize, _rsize, _base, linkrev, p1rev, p2rev, node = rev
> +
> +            if linkmapper is None: # link is to same revlog
> +                assert linkrev == rev2 # we never link back
> +                link = n
> +            else: # rev must be mapped from repo2 cl to unified cl by linkmapper
> +                link = linkmapper(linkrev)
> +
> +            if node in self.nodemap:
> +                # this happens for for the common revlog revisions
> +                self.bundlerevs.add(self.nodemap[node])
> +                continue
> +
> +            p1node = self.revlog2.node(p1rev)
> +            p2node = self.revlog2.node(p2rev)
> +
> +            e = (None, None, None, None,
> +                 link, self.rev(p1node), self.rev(p2node), node)
> +            self.basemap[n] = None
> +            self.index.insert(-1, e)
> +            self.nodemap[node] = n
> +            self.bundlerevs.add(n)
> +            n += 1
> +
> +    def _chunk(self, rev):
> +        if rev not in self.basemap:
> +            return revlog.revlog._chunk(self, rev)
> +        return self.revlog2._chunk(self.node(rev))
> +
> +    def revdiff(self, rev1, rev2):
> +        """return or calculate a delta between two revisions"""
> +        if rev1 in self.basemap and rev2 in self.basemap:
> +            return self.revlog2.revdiff(
> +                self.revlog2.rev(self.node(rev1)),
> +                self.revlog2.rev(self.node(rev2)))
> +        elif rev1 not in self.basemap and rev2 not in self.basemap:
> +            return revlog.revlog.revdiff(self, rev1, rev2)
> +
> +        return mdiff.textdiff(self.revision(self.node(rev1)),
> +                              self.revision(self.node(rev2)))
> +
> +    def revision(self, nodeorrev):
> +        """return an uncompressed revision of a given node or revision
> +        number.
> +        """
> +        if isinstance(nodeorrev, int):
> +            rev = nodeorrev
> +            node = self.node(rev)
> +        else:
> +            node = nodeorrev
> +            rev = self.rev(node)
> +
> +        if node == nullid:
> +            return ""
> +
> +        if rev in self.basemap:
> +            text = self.revlog2.revision(node)
> +            self._cache = (node, rev, text)
> +        else:
> +            text = revlog.revlog.revision(self, rev)
> +            # already cached
> +        return text
> +
> +    def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
> +        raise NotImplementedError
> +    def addgroup(self, revs, linkmapper, transaction):
> +        raise NotImplementedError
> +    def strip(self, rev, minlink):
> +        raise NotImplementedError
> +    def checksize(self):
> +        raise NotImplementedError
> +
> +class unionchangelog(unionrevlog, changelog.changelog):
> +    def __init__(self, opener, opener2):
> +        changelog.changelog.__init__(self, opener)
> +        linkmapper = None
> +        changelog2 = changelog.changelog(opener2)
> +        unionrevlog.__init__(self, opener, self.indexfile, changelog2,
> +                             linkmapper)
> +
> +class unionmanifest(unionrevlog, manifest.manifest):
> +    def __init__(self, opener, opener2, linkmapper):
> +        manifest.manifest.__init__(self, opener)
> +        manifest2 = manifest.manifest(opener2)
> +        unionrevlog.__init__(self, opener, self.indexfile, manifest2,
> +                             linkmapper)
> +
> +class unionfilelog(unionrevlog, filelog.filelog):
> +    def __init__(self, opener, path, opener2, linkmapper, repo):
> +        filelog.filelog.__init__(self, opener, path)
> +        filelog2 = filelog.filelog(opener2, path)
> +        unionrevlog.__init__(self, opener, self.indexfile, filelog2,
> +                             linkmapper)
> +        self._repo = repo
> +
> +    def _file(self, f):
> +        self._repo.file(f)
> +
> +class unionpeer(localrepo.localpeer):
> +    def canpush(self):
> +        return False
> +
> +class unionrepository(localrepo.localrepository):
> +    def __init__(self, ui, path, path2):
> +        localrepo.localrepository.__init__(self, ui, path)
> +        self.ui.setconfig('phases', 'publish', False)
> +
> +        self._url = 'union:%s+%s' % (util.expandpath(path),
> +                                     util.expandpath(path2))
> +        self.repo2 = localrepo.localrepository(ui, path2)
> +
> +    @localrepo.unfilteredpropertycache
> +    def changelog(self):
> +        return unionchangelog(self.sopener, self.repo2.sopener)
> +
> +    def _clrev(self, rev2):
> +        """map from repo2 changelog rev to temporary rev in self.changelog"""
> +        node = self.repo2.changelog.node(rev2)
> +        return self.changelog.rev(node)
> +
> +    @localrepo.unfilteredpropertycache
> +    def manifest(self):
> +        return unionmanifest(self.sopener, self.repo2.sopener,
> +                             self._clrev)
> +
> +    def url(self):
> +        return self._url
> +
> +    def file(self, f):
> +        return unionfilelog(self.sopener, f, self.repo2.sopener,
> +                            self._clrev, self)
> +
> +    def close(self):
> +        self.repo2.close()
> +
> +    def cancopy(self):
> +        return False
> +
> +    def peer(self):
> +        return unionpeer(self)
> +
> +def instance(ui, path, create):
> +    u = util.url(path)
> +    assert u.scheme == 'union'
> +    repopath, repopath2 = u.path.split("+", 1)
> +    return unionrepository(ui, repopath, repopath2)

This should raise util.Abort() if create is True, since creating a
unionrepo wouldn't make much sense. bundlerepo does this same check
already.

On a related note, it'd be interesting to see what happens if you
clone a repo with a union: destination.

> diff --git a/tests/test-bundle-simple.t b/tests/test-bundle-simple.t
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-bundle-simple.t
> @@ -0,0 +1,127 @@
> +# sed 's,bundle:repo1+repo2[.]hg,union:repo1+repo2,g' test-bundle-simple.t > test-union-simple.t
> +
> +  $ hg init repo1
> +  $ cd repo1
> +  $ touch repo1-0
> +  $ echo repo1-0 > f
> +  $ hg ci -Aqmrepo1-0
> +  $ touch repo1-1
> +  $ echo repo1-1 >> f
> +  $ hg ci -Aqmrepo1-1
> +  $ touch repo1-2
> +  $ echo repo1-2 >> f
> +  $ hg ci -Aqmrepo1-2
> +  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> +  $ tip1=`hg id -q`
> +  $ cd ..
> +
> +  $ hg clone -q repo1 --rev 0 repo2
> +  $ cd repo2
> +  $ touch repo2-1
> +  $ sed '1irepo2-1 at top' f > f.tmp
> +  $ mv f.tmp f
> +  $ hg ci -Aqmrepo2-1
> +  $ touch repo2-2
> +  $ hg pull -q ../repo1 -r 1
> +  $ hg merge -q
> +  $ hg ci -Aqmrepo2-2-merge
> +  $ touch repo2-3
> +  $ echo repo2-3 >> f
> +  $ hg ci -mrepo2-3
> +  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  4:2f0d178c469c  repo2-3
> +  3:9e6fb3e0b9da  repo2-2-merge
> +  2:8a58db72e69d  repo1-1
> +  1:c337dba826e7  repo2-1
> +  0:f093fec0529b  repo1-0
> +  $ cd ..
> +
> +  $ hg -R repo2 bundle --all repo2.hg
> +  5 changesets found
> +
> +  $ hg -R bundle:repo1+repo2.hg log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  4:9e6fb3e0b9da  repo2-2-merge
> +  3:c337dba826e7  repo2-1
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> +
> +  $ hg -R bundle:repo1+repo2.hg mani -r $tip1
> +  f
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +  $ hg -R bundle:repo1+repo2.hg mani -r 4
> +  f
> +  repo1-0
> +  repo1-1
> +  repo2-1
> +  repo2-2
> +
> +  $ hg -R repo1 cat repo1/f -r2
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +
> +  $ hg -R bundle:repo1+repo2.hg cat -r$tip1 repo1/f
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +
> +  $ hg -R bundle:repo1+repo2.hg cat -r4 $TESTTMP/repo1/f
> +  repo2-1 at top
> +  repo1-0
> +  repo1-1
> +
> +  $ hg -R bundle:repo1+repo2.hg diff -r$tip1 -rtip
> +  diff -r 68c0685446a3 -r 2f0d178c469c f
> +  --- a/f      Thu Jan 01 00:00:00 1970 +0000
> +  +++ b/f      Thu Jan 01 00:00:00 1970 +0000
> +  @@ -1,3 +1,4 @@
> +  +repo2-1 at top
> +   repo1-0
> +   repo1-1
> +  -repo1-2
> +  +repo2-3
> +
> +  $ hg -R bundle:repo1+repo2.hg heads --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  2:68c0685446a3  repo1-2
> +  $ hg -R bundle:repo1+repo2.hg id -r "ancestor($tip1, 5)"
> +  8a58db72e69d
> +
> +  $ hg -R bundle:repo1+repo2.hg annotate $TESTTMP/repo1/f -r tip
> +  3: repo2-1 at top
> +  0: repo1-0
> +  1: repo1-1
> +  5: repo2-3
> +
> +  $ hg clone -U bundle:repo1+repo2.hg repo3
> +  requesting all changes
> +  adding changesets
> +  adding manifests
> +  adding file changes
> +  added 6 changesets with 11 changes to 6 files (+1 heads)
> +
> +  $ hg -R repo3 verify
> +  checking changesets
> +  checking manifests
> +  crosschecking files in changesets and manifests
> +  checking files
> +  6 files, 6 changesets, 11 total revisions
> +
> +  $ hg -R repo3 heads --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  2:68c0685446a3  repo1-2
> +
> +  $ hg -R repo3 log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  4:9e6fb3e0b9da  repo2-2-merge
> +  3:c337dba826e7  repo2-1
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> diff --git a/tests/test-union-simple.t b/tests/test-union-simple.t
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-union-simple.t
> @@ -0,0 +1,127 @@
> +# sed 's,bundle:repo1+repo2[.]hg,union:repo1+repo2,g' test-bundle-simple.t > test-union-simple.t
> +
> +  $ hg init repo1
> +  $ cd repo1
> +  $ touch repo1-0
> +  $ echo repo1-0 > f
> +  $ hg ci -Aqmrepo1-0
> +  $ touch repo1-1
> +  $ echo repo1-1 >> f
> +  $ hg ci -Aqmrepo1-1
> +  $ touch repo1-2
> +  $ echo repo1-2 >> f
> +  $ hg ci -Aqmrepo1-2
> +  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> +  $ tip1=`hg id -q`
> +  $ cd ..
> +
> +  $ hg clone -q repo1 --rev 0 repo2
> +  $ cd repo2
> +  $ touch repo2-1
> +  $ sed '1irepo2-1 at top' f > f.tmp
> +  $ mv f.tmp f
> +  $ hg ci -Aqmrepo2-1
> +  $ touch repo2-2
> +  $ hg pull -q ../repo1 -r 1
> +  $ hg merge -q
> +  $ hg ci -Aqmrepo2-2-merge
> +  $ touch repo2-3
> +  $ echo repo2-3 >> f
> +  $ hg ci -mrepo2-3
> +  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  4:2f0d178c469c  repo2-3
> +  3:9e6fb3e0b9da  repo2-2-merge
> +  2:8a58db72e69d  repo1-1
> +  1:c337dba826e7  repo2-1
> +  0:f093fec0529b  repo1-0
> +  $ cd ..
> +
> +  $ hg -R repo2 bundle --all repo2.hg
> +  5 changesets found
> +
> +  $ hg -R union:repo1+repo2 log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  4:9e6fb3e0b9da  repo2-2-merge
> +  3:c337dba826e7  repo2-1
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> +
> +  $ hg -R union:repo1+repo2 mani -r $tip1
> +  f
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +  $ hg -R union:repo1+repo2 mani -r 4
> +  f
> +  repo1-0
> +  repo1-1
> +  repo2-1
> +  repo2-2
> +
> +  $ hg -R repo1 cat repo1/f -r2
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +
> +  $ hg -R union:repo1+repo2 cat -r$tip1 repo1/f
> +  repo1-0
> +  repo1-1
> +  repo1-2
> +
> +  $ hg -R union:repo1+repo2 cat -r4 $TESTTMP/repo1/f
> +  repo2-1 at top
> +  repo1-0
> +  repo1-1
> +
> +  $ hg -R union:repo1+repo2 diff -r$tip1 -rtip
> +  diff -r 68c0685446a3 -r 2f0d178c469c f
> +  --- a/f      Thu Jan 01 00:00:00 1970 +0000
> +  +++ b/f      Thu Jan 01 00:00:00 1970 +0000
> +  @@ -1,3 +1,4 @@
> +  +repo2-1 at top
> +   repo1-0
> +   repo1-1
> +  -repo1-2
> +  +repo2-3
> +
> +  $ hg -R union:repo1+repo2 heads --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  2:68c0685446a3  repo1-2
> +  $ hg -R union:repo1+repo2 id -r "ancestor($tip1, 5)"
> +  8a58db72e69d
> +
> +  $ hg -R union:repo1+repo2 annotate $TESTTMP/repo1/f -r tip
> +  3: repo2-1 at top
> +  0: repo1-0
> +  1: repo1-1
> +  5: repo2-3
> +
> +  $ hg clone -U union:repo1+repo2 repo3
> +  requesting all changes
> +  adding changesets
> +  adding manifests
> +  adding file changes
> +  added 6 changesets with 11 changes to 6 files (+1 heads)
> +
> +  $ hg -R repo3 verify
> +  checking changesets
> +  checking manifests
> +  crosschecking files in changesets and manifests
> +  checking files
> +  6 files, 6 changesets, 11 total revisions
> +
> +  $ hg -R repo3 heads --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  2:68c0685446a3  repo1-2
> +
> +  $ hg -R repo3 log --template '{rev}:{node|short}  {desc|firstline}\n'
> +  5:2f0d178c469c  repo2-3
> +  4:9e6fb3e0b9da  repo2-2-merge
> +  3:c337dba826e7  repo2-1
> +  2:68c0685446a3  repo1-2
> +  1:8a58db72e69d  repo1-1
> +  0:f093fec0529b  repo1-0
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list