[PATCH 4 of 4] localrepo: modify changegroupsubset to collect shallow manifest and file nodes

Vishakh H vsh426 at gmail.com
Fri Aug 13 09:12:47 CDT 2010


# HG changeset patch
# User Vishakh H <vsh426 at gmail.com>
# Date 1281708749 -19800
# Node ID e57cea7ba13448ef95b8b6489d94d75fea46db69
# Parent  37e29e978a9c4596525383970e2e7ee910cdfac7
localrepo: modify changegroupsubset to collect shallow manifest and file nodes

When shallowroot is passed to changegroupsubset, we calculate its descendants.
Knowing the subgraph, we can figure out which nodes of manifest and filelog
need to be in the changegroup. For most changesets the collecting happens when
the node is a descendant of the shallowroot. If a changeset introduces file
nodes from outside the tree, in the case of the shallowroot or merge with a
node that is not a descendant of the shallowroot, we have to read the complete
manifest to determine the file nodes.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1263,7 +1263,8 @@
             for node in nodes:
                 self.ui.debug("%s\n" % hex(node))
 
-    def changegroupsubset(self, bases, heads, source, extranodes=None):
+    def changegroupsubset(self, bases, heads, source, extranodes=None,
+                          shallowroot=None):
         """Compute a changegroup consisting of all the nodes that are
         descendents of any of the bases and ancestors of any of the heads.
         Return a chunkbuffer object whose read() method will return
@@ -1282,6 +1283,9 @@
         values are lists of (node, linknode) tuples, where node is a wanted
         node and linknode is the changelog node that should be transmitted as
         the linkrev.
+
+        shallowroot is the root of the shallow clone for which the
+        changegroup is generated.
         """
 
         # Set up some initial variables
@@ -1295,7 +1299,7 @@
             bases = [nullid]
         msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
 
-        if extranodes is None:
+        if extranodes is None and shallowroot is None:
             # can we go through the fast path ?
             heads.sort()
             allheads = self.heads()
@@ -1318,6 +1322,42 @@
         # Nor do we know which filenodes are missing.
         msng_filenode_set = {}
 
+        # nodes needed in shallow clone
+        shallowclnodes = set()
+        shallowfilenodes = {}
+        if shallowroot is not None:
+            shallowrev = cl.rev(shallowroot)
+            # get all changelog nodes present in shallow clone
+            shtree = set([shallowrev])
+            shtree.update(cl.descendants(shallowrev))
+
+            def updatenodes(revlist, add):
+            # read full manifest when changeset introduces filenodes from
+            # outside the shallow tree
+                for r in revlist:
+                    p = cl.parentrevs(r)
+                    if (p[0] not in shtree or
+                        (p[1] != nullrev and p[1] not in shtree)):
+                        m = mnfst.read(cl.read(cl.node(r))[0])
+                        clnode = cl.node(r)
+                        for f in m:
+                            add(f, m[f], clnode)
+
+            if shallowrev in commonrevs:
+                # remote has shallowroot and some descendants
+                commonrevs.intersection_update(shtree)
+                # find filenodes that exist in remote
+                updatenodes(commonrevs,
+                            lambda fn, fnode, clnode:
+                            shallowfilenodes.setdefault(fn, set()).add(fnode))
+            shallowclnodes = shtree.difference(commonrevs)
+            # find filenodes needed in remote
+            updatenodes(shallowclnodes,
+                lambda fn, fnode, clnode:
+                msng_filenode_set.setdefault(fn, {}).setdefault(fnode, clnode))
+            shtree.clear()
+            shallowclnodes = set(map(cl.node, shallowclnodes))
+
         junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
         junk = None
 
@@ -1373,8 +1413,10 @@
         # node that the recipient of the changegroup will already have, we can
         # also assume the recipient will have all the parents.  This function
         # prunes them from the set of missing nodes.
-        def prune(revlog, missingnodes):
+        def prune(revlog, missingnodes, shnodes=None):
             hasset = set()
+            if shnodes:
+                hasset.update(shnodes)
             # If a 'missing' filenode thinks it belongs to a changenode we
             # assume the recipient must have, then the recipient must have
             # that filenode.
@@ -1384,8 +1426,9 @@
                     hasset.add(n)
             for n in hasset:
                 missingnodes.pop(n, None)
-            for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
-                missingnodes.pop(revlog.node(r), None)
+            if not shallowroot:
+                for r in revlog.ancestors(*[revlog.rev(n) for n in hasset]):
+                    missingnodes.pop(revlog.node(r), None)
 
         # Add the nodes that were explicitly requested.
         def add_extra_nodes(name, nodes):
@@ -1396,12 +1439,29 @@
                 if node not in nodes:
                     nodes[node] = linknode
 
+        def checkparent(nodelist, revlog, shnodes):
+            if not shallowroot or len(nodelist) == 0:
+                    return False
+            frev = revlog.parentrevs(revlog.rev(nodelist[0]))[0]
+            lrev = revlog.linkrev(frev)
+            if lrev not in commonrevs:
+                if revlog.node(frev) in shnodes:
+                    return False
+                else:
+                    return True
+            else:
+                return False
+
         # Now that we have all theses utility functions to help out and
         # logically divide up the task, generate the group.
         def gengroup():
             # The set of changed files starts empty.
             changedfiles = set()
-            collect = changegroup.collector(cl, msng_mnfst_set, changedfiles)
+            # update with files needed for shallow clone
+            if shallowroot:
+                changedfiles.update(msng_filenode_set)
+            collect = changegroup.collector(cl, msng_mnfst_set,
+                                            changedfiles, shallowclnodes)
 
             # Create a changenode group generator that will call our functions
             # back to lookup the owning changenode and collect information.
@@ -1410,17 +1470,20 @@
                 yield chnk
                 self.ui.progress(_('bundling changes'), cnt, unit=_('chunks'))
             self.ui.progress(_('bundling changes'), None)
+            shallowclnodes.clear()
 
             prune(mnfst, msng_mnfst_set)
             add_extra_nodes(1, msng_mnfst_set)
             msng_mnfst_lst = msng_mnfst_set.keys()
             # Sort the manifestnodes by revision number.
             msng_mnfst_lst.sort(key=mnfst.rev)
+            fullrev = checkparent(msng_mnfst_lst, mnfst, [])
             # Create a generator for the manifestnodes that calls our lookup
             # and data collection functions back.
             group = mnfst.group(msng_mnfst_lst,
                                 lambda mnode: msng_mnfst_set[mnode],
-                                filenode_collector(changedfiles))
+                                filenode_collector(changedfiles),
+                                fullrev=fullrev)
             for cnt, chnk in enumerate(group):
                 yield chnk
                 self.ui.progress(_('bundling manifests'), cnt, unit=_('chunks'))
@@ -1446,7 +1509,7 @@
                 # Toss out the filenodes that the recipient isn't really
                 # missing.
                 missingfnodes = msng_filenode_set.pop(fname, {})
-                prune(filerevlog, missingfnodes)
+                prune(filerevlog, missingfnodes, shallowfilenodes.get(fname, None))
                 add_extra_nodes(fname, missingfnodes)
                 # If any filenodes are left, generate the group for them,
                 # otherwise don't bother.
@@ -1456,11 +1519,14 @@
                     # Sort the filenodes by their revision # (topological order)
                     nodeiter = list(missingfnodes)
                     nodeiter.sort(key=filerevlog.rev)
+                    fullrev = checkparent(nodeiter, filerevlog,
+                                                 shallowfilenodes.pop(fname, []))
                     # Create a group generator and only pass in a changenode
                     # lookup function as we need to collect no information
                     # from filenodes.
                     group = filerevlog.group(nodeiter,
-                                             lambda fnode: missingfnodes[fnode])
+                                             lambda fnode: missingfnodes[fnode],
+                                             fullrev=fullrev)
                     for chnk in group:
                         self.ui.progress(
                             _('bundling files'), cnt, item=fname, unit=_('chunks'))


More information about the Mercurial-devel mailing list