[PATCH 4 of 4] localrepo: modify changegroupsubset to collect shallow manifest and file nodes

Vishakh H vsh426 at gmail.com
Tue Aug 10 08:08:56 CDT 2010


# HG changeset patch
# User Vishakh H <vsh426 at gmail.com>
# Date 1281445635 -19800
# Node ID 19f9e58d79b28419ea8c6bb1bfa7d66161e371a1
# Parent  fad9bbac7eea83282c1d807213702dc048fe3d34
localrepo: modify changegroupsubset to collect shallow manifest and file nodes

When shallowroot is passed to changegroupsubset, we calculate its descendants.
Knowing the subgraph, we can figure out which nodes of manifest and filelog
need to be in the changegroup. For most changesets the collecting happens when
the node is a descendant of the shallowroot. If a changeset introduces file nodes
from outside the tree, in the case of the shallowroot or merge with a node that
is not a descendant of the shallowroot, we have to read the complete manifest
to determine the file nodes.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1295,7 +1295,7 @@
             bases = [nullid]
         msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
 
-        if extranodes is None:
+        if extranodes is None and shallowroot is None:
             # can we go through the fast path ?
             heads.sort()
             allheads = self.heads()
@@ -1318,6 +1318,42 @@
         # Nor do we know which filenodes are missing.
         msng_filenode_set = {}
 
+        # nodes needed in shallow clone
+        sh_cl = set()
+        sh_filenode = {}
+        if shallowroot is not None:
+            shallowrev = cl.rev(shallowroot)
+            # get all changelog nodes present in shallow clone
+            shtree = set([-1, shallowrev])
+            shtree.update(cl.descendants(shallowrev))
+
+            def updatenodes(revlist, filenodes, add):
+            # read full manifest when changeset introduces filenodes from
+            # outside the shallow tree
+                for r in revlist:
+                    p = cl.parentrevs(r)
+                    if (p[0] not in shtree or
+                        (p[1] != nullrev and p[1] not in shtree)):
+                        m = mnfst.read(cl.read(cl.node(r))[0])
+                        clnode = cl.node(r)
+                        for f in m:
+                            add(filenodes, f, m[f], clnode)
+
+            if shallowrev in commonrevs:
+                # remote has shallowroot and some descendants
+                commonrevs.intersection_update(shtree)
+                # find filenodes that exist in remote
+                updatenodes(commonrevs, sh_filenode,
+                            lambda nodeset, fn, fnode, cn:
+                            nodeset.setdefault(fn, set()).add(fnode))
+            sh_cl = shtree.difference(commonrevs)
+            # find filenodes needed in remote
+            updatenodes(sh_cl, msng_filenode_set,
+                        lambda nodeset, fn, fnode, clnode:
+                        nodeset.setdefault(fn, {}).setdefault(fnode, clnode))
+            shtree.clear()
+            sh_cl = set(map(cl.node, sh_cl))
+
         junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
         junk = None
 
@@ -1373,8 +1409,10 @@
         # node that the recipient of the changegroup will already have, we can
         # also assume the recipient will have all the parents.  This function
         # prunes them from the set of missing nodes.
-        def prune(revlog, missingnodes):
+        def prune(revlog, missingnodes, shnodes=None):
             hasset = set()
+            if shnodes:
+                hasset.update(shnodes)
             # If a 'missing' filenode thinks it belongs to a changenode we
             # assume the recipient must have, then the recipient must have
             # that filenode.
@@ -1384,8 +1422,9 @@
                     hasset.add(n)
             for n in hasset:
                 missingnodes.pop(n, None)
-            for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
-                missingnodes.pop(revlog.node(r), None)
+            if not shallowroot:
+                for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
+                    missingnodes.pop(revlog.node(r), None)
 
         # Add the nodes that were explicitly requested.
         def add_extra_nodes(name, nodes):
@@ -1396,12 +1435,29 @@
                 if node not in nodes:
                     nodes[node] = linknode
 
+        def checkparent(node, revlog, shnode):
+            if not shallowroot:
+                return -1
+            frev = revlog.parentrevs(revlog.rev(node))[0]
+            lrev = revlog.linkrev(frev)
+            if lrev not in commonrevs:
+                if revlog.node(frev) in shnode:
+                    return -2
+                else:
+                    return frev
+            else:
+                return -2
+
         # Now that we have all theses utility functions to help out and
         # logically divide up the task, generate the group.
         def gengroup():
             # The set of changed files starts empty.
             changedfiles = set()
-            collect = changegroup.collector(cl, msng_mnfst_set, changedfiles)
+            # update with files needed for shallow clone
+            if shallowroot:
+                changedfiles.update(msng_filenode_set)
+            collect = changegroup.collector(cl, msng_mnfst_set,
+                                            changedfiles, sh_cl)
 
             # Create a changenode group generator that will call our functions
             # back to lookup the owning changenode and collect information.
@@ -1416,11 +1472,13 @@
             msng_mnfst_lst = msng_mnfst_set.keys()
             # Sort the manifestnodes by revision number.
             msng_mnfst_lst.sort(key=mnfst.rev)
+            missingrev = checkparent(msng_mnfst_lst[0], mnfst, [])
             # Create a generator for the manifestnodes that calls our lookup
             # and data collection functions back.
             group = mnfst.group(msng_mnfst_lst,
                                 lambda mnode: msng_mnfst_set[mnode],
-                                filenode_collector(changedfiles))
+                                filenode_collector(changedfiles),
+                                missingrev=missingrev)
             for cnt, chnk in enumerate(group):
                 yield chnk
                 self.ui.progress(_('bundling manifests'), cnt, unit=_('chunks'))
@@ -1430,6 +1488,7 @@
             # them.
             msng_mnfst_lst = None
             msng_mnfst_set.clear()
+            sh_cl.clear()
 
             if extranodes:
                 for fname in extranodes:
@@ -1446,7 +1505,7 @@
                 # Toss out the filenodes that the recipient isn't really
                 # missing.
                 missingfnodes = msng_filenode_set.pop(fname, {})
-                prune(filerevlog, missingfnodes)
+                prune(filerevlog, missingfnodes, sh_filenode.get(fname, None))
                 add_extra_nodes(fname, missingfnodes)
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
@@ -1456,11 +1515,14 @@
                     # Sort the filenodes by their revision # (topological order)
                     nodeiter = list(missingfnodes)
                     nodeiter.sort(key=filerevlog.rev)
+                    missingrev = checkparent(nodeiter[0], filerevlog,
+                                                 sh_filenode.pop(fname, []))
                     # Create a group generator and only pass in a changenode
                     # lookup function as we need to collect no information
                     # from filenodes.
                     group = filerevlog.group(nodeiter,
-                                             lambda fnode: missingfnodes[fnode])
+                                             lambda fnode: missingfnodes[fnode],
+                                             missingrev=missingrev)
                     for chnk in group:
                         self.ui.progress(
                             _('bundling files'), cnt, item=fname, unit=_('chunks'))


More information about the Mercurial-devel mailing list