[PATCH 11 of 14] localrepo: changegroupsubset collects shallow changeset, manifest and file nodes

Peter Arrenbrecht peter.arrenbrecht at gmail.com
Fri Jul 30 01:49:53 CDT 2010


On Fri, Jul 16, 2010 at 9:15 AM, Vishakh H <vsh426 at gmail.com> wrote:
> # HG changeset patch
> # User Vishakh H <vsh426 at gmail.com>
> # Date 1279263210 -19800
> # Node ID 3ba8f61d3793dbe0aad3fe8c7ab6e29fd7f53a2e
> # Parent  2a420893bd473f2d187089e314a86f8ccfe5979f
> localrepo: changegroupsubset collects shallow changeset, manifest and file nodes
>
> when shallowroot is passed to changegroupsubset, we calculate its descendants.
> knowing the subgraph, we can figure out which nodes of manifest and filelog need to
> be in the changegroup.

Haven't had time to step through this in detail, alas. So I might have
to come back with more comments after my vacation.

> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -1312,7 +1312,7 @@
>             bases = [nullid]
>         msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
>
> -        if extranodes is None:
> +        if extranodes is None and shallowroot is None:
>             # can we go through the fast path ?
>             heads.sort()
>             allheads = self.heads()
> @@ -1356,6 +1356,30 @@
>         msng_mnfst_set = {}
>         # Nor do we know which filenodes are missing.
>         msng_filenode_set = {}
> +        # nodes needed in shallow clone
> +        sh_cl = set()
> +        sh_mnfst = set()
> +        sh_filenode = {}
> +        if shallowroot is not None:
> +            # get all changelog nodes present in shallow clone
> +            sh_cl.add(shallowroot)
> +            sh_cl.update(map(cl.node, cl.descendants(cl.rev(shallowroot))))
> +            # corresponding manifest and file nodes
> +            for shnode in sh_cl:
> +                shmnfstnode = cl.read(shnode)[0]
> +                sh_mnfst.add(shmnfstnode)
> +                m = mnfst.read(shmnfstnode)

This means you're reading all of the manifests in the shallow scope,
which can be expensive. Is this really necessary? I think I only did
this for specific manifests which could be causing trouble in my
earlier work, IIRC.

> +                for f in m:
> +                    sh_filenode.setdefault(f, set()).add(m[f])
> +            if shallowroot in has_cl_set:
> +                # remote has only shallowroot and descendants
> +                has_cl_set.intersection_update(sh_cl)
> +            else:
> +                shallowmnfst = cl.read(shallowroot)[0]
> +                m = mnfst.read(shallowmnfst)
> +                for f in m:
> +                    ndset = msng_filenode_set.setdefault(f, {})
> +                    ndset.setdefault(m[f], shallowroot)

I guess here you're guarding against filerevs from earlier revisions
needed in the initial shallow pull.

>         junk = mnfst.index[len(mnfst) - 1] # Get around a bug in lazyindex
>         junk = None
> @@ -1459,7 +1483,12 @@
>         def gengroup():
>             # The set of changed files starts empty.
>             changedfiles = {}
> -            collect = changegroup.collector(cl, msng_mnfst_set, changedfiles)
> +            #update with files in shallow root
> +            if shallowroot and shallowroot not in has_cl_set:
> +                for fn in sh_filenode:
> +                    changedfiles.setdefault(fn,fn)

...and here too. But I fail to see where you handle the case of
possibly-target-absent filerevs as I noted in my initial response.

> +            collect = changegroup.collector(cl, msng_mnfst_set,
> +                                            changedfiles, sh_cl)
>
>             # Create a changenode group generator that will call our functions
>             # back to lookup the owning changenode and collect information.
> @@ -1491,7 +1520,8 @@
>             # Create a generator for the manifestnodes that calls our lookup
>             # and data collection functions back.
>             group = mnfst.group(msng_mnfst_lst, lookup_manifest_link,
> -                                filenode_collector(changedfiles))
> +                                filenode_collector(changedfiles),
> +                                shallownodes=sh_mnfst)
>             cnt = 0
>             for chnk in group:
>                 yield chnk
> @@ -1503,6 +1533,7 @@
>             # them.
>             msng_mnfst_lst = None
>             msng_mnfst_set.clear()
> +            sh_mnfst.clear()
>
>             if extranodes:
>                 for fname in extranodes:
> @@ -1531,11 +1562,13 @@
>                     yield fname
>                     # Sort the filenodes by their revision #
>                     msng_filenode_lst.sort(key=filerevlog.rev)
> +                    sh_nodes = sh_filenode.get(fname, [])
>                     # Create a group generator and only pass in a changenode
>                     # lookup function as we need to collect no information
>                     # from filenodes.
>                     group = filerevlog.group(msng_filenode_lst,
> -                                             lookup_filenode_link_func(fname))
> +                                             lookup_filenode_link_func(fname),
> +                                             shallownodes=sh_nodes)
>                     for chnk in group:
>                         self.ui.progress(
>                             _('bundling files'), cnt, item=fname, unit=_('chunks'))
> @@ -1544,6 +1577,8 @@
>                 if fname in msng_filenode_set:
>                     # Don't need this anymore, toss it to free memory.
>                     del msng_filenode_set[fname]
> +                if shallowroot and fname in sh_filenode:
> +                    del sh_filenode[fname]
>             # Signal that no more groups are left.
>             yield changegroup.closechunk()
>             self.ui.progress(_('bundling files'), None)

-parren


More information about the Mercurial-devel mailing list