[patch 4/7] Add an optional map of specific revisions for changegroup to include

Chris Mason mason at suse.com
Thu Sep 8 21:22:59 CDT 2005


On Mon, 22 Aug 2005 16:38:24 -0400
Chris Mason <mason at suse.com> wrote:

> On Mon, 22 Aug 2005 13:14:27 -0700
>  
> > > revmap['filename'][changeset id] = filerev
> > > 
> > > revmap['filename'] is passed to the revlog.group() method so the
> > > proper file revisions can be found for a given list of changesets.
> > 
> > Can we make a wrapper around newer that generates this map and get
> > rid of the conditional code?

Here is an almost entirely untested new version.  It gets rid of the 
conditional code by calculating the revmap for all callers.  There are 
performance implications here, it will use more ram and more cpu because 
the manifest needs to be read for each changeset to calculate the revmap.  

I do sort the list of revisions so we're at least reading deltas in the 
optimal order. I haven't measured it yet or tested very carefully because 
I wanted to see if the general idea was acceptable.

For callers, the default semantics of repo.changegroup have not changed.  
But if you call it like this:

chgrp = repo.changegroup(revlist, newer=0)

newer is not called and changegroup assumes revlist is a complete list of 
all the revisions to include.

-chris

# HG changeset patch
# User mason at suse.com
Allow repo.changegroup to take a specific list of revisions to include

The default logic in changegroup simply finds all newer revisions than
each of the basenodes passed in.  When trying to create a changegroup
with only specific revs, changegroup needs to know exactly which
revisions to include.

Also, it is possible for a specific file revision to be linked to more
than one changeset.  The default logic might not include some file
revisions because they are actually linked into an older changeset.

This patch calculates a revision map in changegroup.  The revision map is a
hash of filenames containing a hash of changeset revisions:

revmap['filename'][changeset id] = filerev

revmap['filename'] is passed to the revlog.group() method so the proper
file revisions can be found for a given list of changesets.

Index: crew/mercurial/localrepo.py
===================================================================
--- crew.orig/mercurial/localrepo.py	2005-09-08 21:18:56.000000000 -0400
+++ crew/mercurial/localrepo.py	2005-09-08 21:49:25.000000000 -0400
@@ -892,34 +892,65 @@ class localrepository:
         cg = self.changegroup(update)
         return remote.addchangegroup(cg)
 
-    def changegroup(self, basenodes):
+    def changegroup(self, basenodes, newer=1):
         genread = util.chunkbuffer
 
+        # build a dict mapping filenames, changeset ids and file revision (sha1)
+        # revs is an array of changeset sha ids you want to include
+        # the end result looks like this:
+        # revmap[filename][changeset sha] = file sha
+        #
+        # The manifest is given a special magic name: '.hg/00manifest'
+        def changemap(revs):
+            revmap = {}
+            # performance is horrible if we don't walk the changeset and 
+            # manifest in revision order.  Create a sorted list of sha1 
+            # sorted by rev number
+            ids = map(self.changelog.rev, revs)
+            ids.sort()
+            list = map(self.changelog.node, ids)
+            for n in list:
+                if n is nullid:
+                    continue
+                c = self.changelog.read(n)
+                pp = self.changelog.parents(n)
+                mm = self.manifest.read(c[0])
+                revmap.setdefault('.hg/00manifest', {})[n] = c[0]
+                for f in c[3]:
+                    if f in mm:
+                        revmap.setdefault(f, {})[n] = mm[f]
+            return revmap
+
         def gengroup():
-            nodes = self.newer(basenodes)
+            if newer:
+                nodes = self.newer(basenodes)
+            else:
+                nodes = basenodes
+            revmap = changemap(nodes)
 
             # construct the link map
-            linkmap = {}
+            chmap = {}
             for n in nodes:
-                linkmap[self.changelog.rev(n)] = n
+                if n != nullid:
+                    chmap[n] = n
+
+            # the changegroup is changesets + manifests + all file revs
+            for y in self.changelog.group(chmap): yield y
+
+            # remove the manifest from the revmap so it doesn't show up
+            # in the loop below.
+            if '.hg/00manifest' in revmap:
+                m = revmap.pop('.hg/00manifest')
+                for y in self.manifest.group(m): yield y
 
             # construct a list of all changed files
-            changed = {}
-            for n in nodes:
-                c = self.changelog.read(n)
-                for f in c[3]:
-                    changed[f] = 1
-            changed = changed.keys()
+            changed = revmap.keys()
             changed.sort()
 
-            # the changegroup is changesets + manifests + all file revs
-            revs = [ self.changelog.rev(n) for n in nodes ]
-
-            for y in self.changelog.group(linkmap): yield y
-            for y in self.manifest.group(linkmap): yield y
             for f in changed:
+                m = revmap[f]
                 yield struct.pack(">l", len(f) + 4) + f
-                g = self.file(f).group(linkmap)
+                g = self.file(f).group(m)
                 for y in g:
                     yield y
 
Index: crew/mercurial/revlog.py
===================================================================
--- crew.orig/mercurial/revlog.py	2005-09-08 21:18:56.000000000 -0400
+++ crew/mercurial/revlog.py	2005-09-08 21:54:52.000000000 -0400
@@ -469,7 +469,7 @@ class revlog:
             elif lx > ly:
                 lx = x.next()
 
-    def group(self, linkmap):
+    def group(self, map):
         """calculate a delta group
 
         Given a list of changeset revs, return a set of deltas and
@@ -477,15 +477,31 @@ class revlog:
         parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
         have this parent as it has all history before these
         changesets. parent is parent[0]
+
+        map is a dict used to explicitly list which revisions will be
+        included in the group:
+
+        map[changeset sha] = sha
         """
         revs = []
         needed = {}
+        rev2ch = {}
 
-        # find file nodes/revs that match changeset revs
-        for i in xrange(0, self.count()):
-            if self.index[i][3] in linkmap:
-                revs.append(i)
-                needed[i] = 1
+        # from the revision map we construct a list of rev numbers we have 
+        # to include.
+        #
+        # a mapping of revnumber -> changeset sha1 is saved so that we can 
+        # yield the changeset later on.
+        for c in map:
+            r = map[c]
+            n = self.rev(r)
+            # some changesets in the map can point to the same revision number
+            # so we have to check if we've already seen this one.
+            if n not in needed:
+                needed[n] = 1
+                revs.append(n)
+                rev2ch[r] = c
+        revs.sort()
 
         # if we don't have any revisions touched by these changesets, bail
         if not revs:
@@ -574,7 +590,7 @@ class revlog:
                 d = chunks[b]
 
             p = self.parents(n)
-            meta = n + p[0] + p[1] + linkmap[self.linkrev(n)]
+            meta = n + p[0] + p[1] + rev2ch[n]
             l = struct.pack(">l", len(meta) + len(d) + 4)
             yield l
             yield meta



More information about the Mercurial mailing list