[PATCH 4 of 4] localrepo: modify changegroupsubset to collect shallow manifest and file nodes

Vishakh H vsh426 at gmail.com
Thu Aug 12 10:39:50 CDT 2010


# HG changeset patch
# User Vishakh H <vsh426 at gmail.com>
# Date 1281627412 -19800
# Node ID 88eedff17e5bbefb0140fbc9c2ed0a77484158cc
# Parent  fcbe54b1378310b03f39d5a23f9cc6b069660f7b
localrepo: modify changegroupsubset to collect shallow manifest and file nodes

When shallowroot is passed to changegroupsubset, we calculate its descendants.
Knowing the subgraph, we can figure out which nodes of manifest and filelog
need to be in the changegroup. For most changesets the collecting happens when
the node is a descendant of the shallowroot. If a changeset introduces file nodes
from outside the tree, in the case of the shallowroot or merge with a node that
is not a descendant of the shallowroot, we have to read the complete manifest
to determine the file nodes.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1295,7 +1295,7 @@
             bases = [nullid]
         msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
 
-        if extranodes is None:
+        if extranodes is None and shallowroot is None:
             # can we go through the fast path ?
             heads.sort()
             allheads = self.heads()
@@ -1318,6 +1318,42 @@
         # Nor do we know which filenodes are missing.
         msng_filenode_set = {}
 
+        # nodes needed in shallow clone
+        shallowclnodes = set()
+        shallowfilenodes = {}
+        if shallowroot is not None:
+            shallowrev = cl.rev(shallowroot)
+            # get all changelog nodes present in shallow clone
+            shtree = set([shallowrev])
+            shtree.update(cl.descendants(shallowrev))
+
+            def updatenodes(revlist, add):
+            # read full manifest when changeset introduces filenodes from
+            # outside the shallow tree
+                for r in revlist:
+                    p = cl.parentrevs(r)
+                    if (p[0] not in shtree or
+                        (p[1] != nullrev and p[1] not in shtree)):
+                        m = mnfst.read(cl.read(cl.node(r))[0])
+                        clnode = cl.node(r)
+                        for f in m:
+                            add(f, m[f], clnode)
+
+            if shallowrev in commonrevs:
+                # remote has shallowroot and some descendants
+                commonrevs.intersection_update(shtree)
+                # find filenodes that exist in remote
+                updatenodes(commonrevs,
+                            lambda fn, fnode, clnode:
+                            shallowfilenodes.setdefault(fn, set()).add(fnode))
+            shallowclnodes = shtree.difference(commonrevs)
+            # find filenodes needed in remote
+            updatenodes(shallowclnodes,
+                lambda fn, fnode, clnode:
+                msng_filenode_set.setdefault(fn, {}).setdefault(fnode, clnode))
+            shtree.clear()
+            shallowclnodes = set(map(cl.node, shallowclnodes))
+
         junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
         junk = None
 
@@ -1373,8 +1409,10 @@
         # node that the recipient of the changegroup will already have, we can
         # also assume the recipient will have all the parents.  This function
         # prunes them from the set of missing nodes.
-        def prune(revlog, missingnodes):
+        def prune(revlog, missingnodes, shnodes=None):
             hasset = set()
+            if shnodes:
+                hasset.update(shnodes)
             # If a 'missing' filenode thinks it belongs to a changenode we
             # assume the recipient must have, then the recipient must have
             # that filenode.
@@ -1384,8 +1422,9 @@
                     hasset.add(n)
             for n in hasset:
                 missingnodes.pop(n, None)
-            for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
-                missingnodes.pop(revlog.node(r), None)
+            if not shallowroot:
+                for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
+                    missingnodes.pop(revlog.node(r), None)
 
         # Add the nodes that were explicitly requested.
         def add_extra_nodes(name, nodes):
@@ -1396,12 +1435,32 @@
                 if node not in nodes:
                     nodes[node] = linknode
 
+        def checkparent(node, revlog, shnodes):
+            frev = revlog.parentrevs(revlog.rev(node))[0]
+            if not shallowroot:
+                if frev == nullrev:
+                    return True
+                else:
+                    return False
+            lrev = revlog.linkrev(frev)
+            if lrev not in commonrevs:
+                if revlog.node(frev) in shnodes:
+                    return False
+                else:
+                    return True
+            else:
+                return False
+
         # Now that we have all theses utility functions to help out and
         # logically divide up the task, generate the group.
         def gengroup():
             # The set of changed files starts empty.
             changedfiles = set()
-            collect = changegroup.collector(cl, msng_mnfst_set, changedfiles)
+            # update with files needed for shallow clone
+            if shallowroot:
+                changedfiles.update(msng_filenode_set)
+            collect = changegroup.collector(cl, msng_mnfst_set,
+                                            changedfiles, shallowclnodes)
 
             # Create a changenode group generator that will call our functions
             # back to lookup the owning changenode and collect information.
@@ -1410,17 +1469,20 @@
                 yield chnk
                 self.ui.progress(_('bundling changes'), cnt, unit=_('chunks'))
             self.ui.progress(_('bundling changes'), None)
+            shallowclnodes.clear()
 
             prune(mnfst, msng_mnfst_set)
             add_extra_nodes(1, msng_mnfst_set)
             msng_mnfst_lst = msng_mnfst_set.keys()
             # Sort the manifestnodes by revision number.
             msng_mnfst_lst.sort(key=mnfst.rev)
+            fullrev = checkparent(msng_mnfst_lst[0], mnfst, [])
             # Create a generator for the manifestnodes that calls our lookup
             # and data collection functions back.
             group = mnfst.group(msng_mnfst_lst,
                                 lambda mnode: msng_mnfst_set[mnode],
-                                filenode_collector(changedfiles))
+                                filenode_collector(changedfiles),
+                                fullrev=fullrev)
             for cnt, chnk in enumerate(group):
                 yield chnk
                 self.ui.progress(_('bundling manifests'), cnt, unit=_('chunks'))
@@ -1446,7 +1508,7 @@
                 # Toss out the filenodes that the recipient isn't really
                 # missing.
                 missingfnodes = msng_filenode_set.pop(fname, {})
-                prune(filerevlog, missingfnodes)
+                prune(filerevlog, missingfnodes, shallowfilenodes.get(fname, None))
                 add_extra_nodes(fname, missingfnodes)
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
@@ -1456,11 +1518,14 @@
                     # Sort the filenodes by their revision # (topological order)
                     nodeiter = list(missingfnodes)
                     nodeiter.sort(key=filerevlog.rev)
+                    fullrev = checkparent(nodeiter[0], filerevlog,
+                                                 shallowfilenodes.pop(fname, []))
                     # Create a group generator and only pass in a changenode
                     # lookup function as we need to collect no information
                     # from filenodes.
                     group = filerevlog.group(nodeiter,
-                                             lambda fnode: missingfnodes[fnode])
+                                             lambda fnode: missingfnodes[fnode],
+                                             fullrev=fullrev)
                     for chnk in group:
                         self.ui.progress(
                             _('bundling files'), cnt, item=fname, unit=_('chunks'))


More information about the Mercurial-devel mailing list