[PATCH] subrepo: do not push "clean" subrepos when the parent repo is pushed

Angel Ezquerra angel.ezquerra at gmail.com
Wed Feb 13 18:06:18 CST 2013


# HG changeset patch
# User Angel Ezquerra <angel.ezquerra at gmail.com>
# Date 1360795816 -3600
# Node ID 26276460d54aecdeb107c82c4e3f2ca7c0c6a8b3
# Parent  55b9b294b7544a6a144f627f71f4b770907d5a98
subrepo: do not push "clean" subrepos when the parent repo is pushed

A clean subrepo is defined as one that has not had its dirstate, bookmarks or
phases modified.

This patch works by adding a "clean" method to subrepos. In the case of
mercurial subrepos, this method calculates a "stamp" (i.e. a set of file hashes)
of the repository state at the time of push with a similar "stamp" that was
stored on a file when the subrepo was cloned or pushed to a given remote target.
If the stamps match the subrepo has no changes that must be pushed to the target
repository and thus the push can be skipped.

Note that we calculate the stamp file by calculating hashes for several key
repository files, such as the dirstate, the bookmarks file and the phaseroots
file. This means that our "clean" detection is not perfect, in the sense that
if the working directory has been updated to a different revision we will
assume that the subrepo is not clean. However, if we update to another revision
and back to the original revision the clean() method will correctly detec the
subrepo as being clean.

Also note that a subrepo being "clean" is not the opposite of it being "dirty".
A subrepo is dirty if it updated to a different revision that the one that is
pointed to by the subrepo parent or if its working directory is not clean. This
is a different concept.

diff --git a/mercurial/subrepo.py b/mercurial/subrepo.py
--- a/mercurial/subrepo.py
+++ b/mercurial/subrepo.py
@@ -300,6 +300,16 @@
 
 class abstractsubrepo(object):
 
+    def clean(self):
+        """
+        returns true if the repository has not changed since it was last
+        cloned or pulled.
+        Note that this is very different and definitely not the opposite
+        of the repository being "dirty", which is related to having changes
+        on the working directory or the current revision.
+        """
+        return False
+
     def dirty(self, ignoreupdate=False):
         """returns true if the dirstate of the subrepo is dirty or does not
         match current stored state. If ignoreupdate is true, only check
@@ -426,6 +436,73 @@
         self._repo.ui.setconfig('ui', '_usedassubrepo', 'True')
         self._initrepo(r, state[0], create)
 
+    def clean(self, path):
+        """
+        returns true if the repository has not changed since it was last
+        cloned or pulled.
+        Note that this is very different and definitely not the opposite
+        of the repository being "dirty", which is related to having changes
+        on the working directory or the current revision.
+        """
+        return self._calcrepostamp(path) == self._readrepostamp(path)
+
+    def _getfilestamp(self, filename):
+        data = ''
+        if os.path.exists(filename):
+            fd = open(filename)
+            data = fd.read()
+            fd.close()
+        return util.sha1(data).hexdigest()
+
+    def _calcrepostamp(self, remotepath):
+        '''calculate a unique "stamp" for the current repository state
+
+        This method is used to to detect when there are changes that may
+        require a push to a given remote path.'''
+        filelist = ('dirstate', 'bookmarks', 'store/phaseroots')
+        stamp = ['# %s\n' % remotepath]
+        lock = self._repo.lock()
+        try:
+            for relname in filelist:
+                absname = os.path.normpath(self._repo.join(relname))
+                stamp.append('%s = %s\n' % (absname, self._getfilestamp(absname)))
+        finally:
+            lock.release()
+        return stamp
+
+    def _getstampfilename(self, remotepath):
+        '''get a unique filename for the remote repo stamp'''
+        fname = util.sha1(remotepath).hexdigest()
+        return self._repo.join(os.path.join('stamps', fname))
+
+    def _readrepostamp(self, remotepath):
+        '''read an existing remote repository stamp'''
+        stampfile = self._getstampfilename(remotepath)
+        if not os.path.exists(stampfile):
+            return ''
+        fd = open(stampfile, 'r')
+        stamp = fd.readlines()
+        fd.close()
+        return stamp
+
+    def _updaterepostamp(self, remotepath):
+        '''
+        Calc the current repo stamp saving it into a remote repo stamp file
+        Each remote repo requires its own stamp file, because a subrepo may
+        be clean versus a given remote repo, but not versus another.
+        '''
+        # save it to the clean file
+        # We should lock the repo
+        stampfile = self._getstampfilename(remotepath)
+        # [FIXME] should lock the repo? it is already locked by _calcrepostamp
+        stamp = self._calcrepostamp(remotepath)
+        stampdir = self._repo.join('stamps')
+        if not os.path.exists(stampdir):
+            util.makedir(stampdir, True)
+        fd = open(stampfile, 'w')
+        fd.writelines(stamp)
+        fd.close()
+
     @annotatesubrepoerror
     def _initrepo(self, parentrepo, source, create):
         self._repo._subparent = parentrepo
@@ -544,12 +621,17 @@
                                          update=False)
                 self._repo = cloned.local()
                 self._initrepo(parentrepo, source, create=True)
+                self._updaterepostamp(srcurl)
             else:
                 self._repo.ui.status(_('pulling subrepo %s from %s\n')
                                      % (subrelpath(self), srcurl))
+                cleansub = self.clean(srcurl)
                 self._repo.pull(other)
                 bookmarks.updatefromremote(self._repo.ui, self._repo, other,
                                            srcurl)
+                if cleansub:
+                    # keep the repo clean after pull
+                    self._updaterepostamp(srcurl)
 
     @annotatesubrepoerror
     def get(self, state, overwrite=False):
@@ -557,6 +639,9 @@
         source, revision, kind = state
         self._repo.ui.debug("getting subrepo %s\n" % self._path)
         hg.updaterepo(self._repo, revision, overwrite)
+        srcurl = _abssource(self._repo)
+        if self.clean(srcurl):
+            self._updaterepostamp(srcurl)
 
     @annotatesubrepoerror
     def merge(self, state):
@@ -599,10 +684,20 @@
                 return False
 
         dsturl = _abssource(self._repo, True)
+        if not force:
+            if self.clean(dsturl):
+                self._repo.ui.status(
+                    _('no changes made to subrepo %s since last push to %s\n')
+                    % (subrelpath(self), dsturl))
+                return None
         self._repo.ui.status(_('pushing subrepo %s to %s\n') %
             (subrelpath(self), dsturl))
         other = hg.peer(self._repo, {'ssh': ssh}, dsturl)
-        return self._repo.push(other, force, newbranch=newbranch)
+        res = self._repo.push(other, force, newbranch=newbranch)
+
+        # the repo is now clean
+        self._updaterepostamp(dsturl)
+        return res
 
     @annotatesubrepoerror
     def outgoing(self, ui, dest, opts):
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hg-2.4.patch
Type: text/x-patch
Size: 7200 bytes
Desc: not available
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20130214/ac4f42a9/attachment.bin>


More information about the Mercurial-devel mailing list