[PATCH 2 of 4] splicemap: improve error handling when source is hg (issue2084)

Ben Goswami bengoswami at fb.com
Tue Apr 30 15:13:18 CDT 2013


# HG changeset patch
# User Ben Goswami <bengoswami at fb.com>
# Date 1366915826 25200
#      Thu Apr 25 11:50:26 2013 -0700
# Node ID 9bf28065bccfbd1574b9ca62d8b7859bda8bc4d9
# Parent  e00f1814391cc4d44c5007b2874cd7c39901c817
splicemap: improve error handling when source is hg (issue2084)

1. Introduced 2 levels of error handling for splicemap files
   a. Check the splicemap file for rules which are same across different
      types of source repos.  This is done through enhancing parsesplicemap
      function
   b. Check revision string formats.  Each repo may have their own format.
      This is done usign checkrevformat function
   c. Implemented the above two for hg

diff --git a/hgext/convert/common.py b/hgext/convert/common.py
--- a/hgext/convert/common.py
+++ b/hgext/convert/common.py
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import base64, errno, subprocess, os, datetime
+import base64, errno, subprocess, os, datetime, re
 import cPickle as pickle
 from mercurial import util
 from mercurial.i18n import _
@@ -63,6 +63,15 @@
 
         self.encoding = 'utf-8'
 
+    def checkhexformat(self, revstr):
+        """ fails if revstr is not a 40 byte hex. mercurial and git both uses
+            such format for their rivision numbering
+        """
+        matchobj = re.match(r'[0-9a-fA-F]{40,40}$', revstr)
+        if matchobj is None:
+            raise util.Abort(_('splicemap entry %s is not a valid revision'
+                               ' identifier') % revstr)
+
     def before(self):
         pass
 
@@ -164,6 +173,13 @@
         """
         return {}
 
+    def checkrevformat(self, revstr):
+        """revstr is a string that describes a revision in the given
+           source control system.  Return true if revstr has correct
+           format.
+        """
+        return True
+
 class converter_sink(object):
     """Conversion sink (target) interface"""
 
diff --git a/hgext/convert/convcmd.py b/hgext/convert/convcmd.py
--- a/hgext/convert/convcmd.py
+++ b/hgext/convert/convcmd.py
@@ -121,9 +121,17 @@
         self.splicemap = self.parsesplicemap(opts.get('splicemap'))
         self.branchmap = mapfile(ui, opts.get('branchmap'))
 
+    def parsesplicemap(self, path):
+        """ check and validate the splicemap format and
+            return a child/parents dictionary.
+            Format checking has two parts.
+            1. generic format which is same across all source types
+            2. specific format checking which may be different for
+               different source type.  This logic is implemented in
+               checkrevformat function in source files like
+               hg.py, subversion.py etc.
+        """
 
-    def parsesplicemap(self, path):
-        """Parse a splicemap, return a child/parents dictionary."""
         if not path:
             return {}
         m = {}
@@ -136,18 +144,28 @@
                     continue
                 try:
                     child, parents = line.split(' ', 1)
+                    self.source.checkrevformat(child)
                     parents = parents.replace(',', ' ').split()
+                    # check if number of parents are upto 2 max
+                    if (len(parents) > 2):
+                        raise util.Abort(_('syntax error in %s(%d): child '\
+                                            'parent1[,parent2] expected') \
+                                            % (path, i + 1))
+                    for parent in parents:
+                        self.source.checkrevformat(parent)
                 except ValueError:
-                    raise util.Abort(_('syntax error in %s(%d): child parent1'
-                                       '[,parent2] expected') % (path, i + 1))
+                    raise util.Abort(_('syntax error in %s(%d): child '\
+                                        'parent1[,parent2] expected') \
+                                        % (path, i + 1))
                 pp = []
                 for p in parents:
                     if p not in pp:
                         pp.append(p)
                 m[child] = pp
-        except IOError, e:
-            if e.errno != errno.ENOENT:
-                raise
+         # if file does not exist or error reading, exit
+        except IOError:
+            raise util.Abort(_('splicemap file not found or error reading %s:')
+                               % path)
         return m
 
 
diff --git a/hgext/convert/hg.py b/hgext/convert/hg.py
--- a/hgext/convert/hg.py
+++ b/hgext/convert/hg.py
@@ -397,3 +397,7 @@
 
     def getbookmarks(self):
         return bookmarks.listbookmarks(self.repo)
+
+    def checkrevformat(self, revstr):
+        """ Mercurial, rivision string is a 40 byte hex """
+        self.checkhexformat(revstr)
diff --git a/tests/test-convert-splicemap.t b/tests/test-convert-splicemap.t
--- a/tests/test-convert-splicemap.t
+++ b/tests/test-convert-splicemap.t
@@ -37,6 +37,8 @@
   $ hg ci -Am addaandd
   adding a
   adding d
+  $ INVALIDID1=afd12345af
+  $ INVALIDID2=28173x36ddd1e67bf7098d541130558ef5534a86
   $ CHILDID1=`hg id --debug -i`
   $ echo d >> d
   $ hg ci -Am changed
@@ -53,7 +55,7 @@
   o  0:527cdedf31fb "addaandd" files: a d
   
 
-test invalid splicemap
+test invalid splicemap1
 
   $ cat > splicemap <<EOF
   > $CHILDID2
@@ -62,6 +64,24 @@
   abort: syntax error in splicemap(1): child parent1[,parent2] expected
   [255]
 
+test invalid splicemap2
+
+  $ cat > splicemap <<EOF
+  > $CHILDID2 $PARENTID1, $PARENTID2, $PARENTID2
+  > EOF
+  $ hg convert --splicemap splicemap repo2 repo1
+  abort: syntax error in splicemap(1): child parent1[,parent2] expected
+  [255]
+
+test invalid splicemap3
+
+  $ cat > splicemap <<EOF
+  > $INVALIDID1 $INVALIDID2
+  > EOF
+  $ hg convert --splicemap splicemap repo2 repo1
+  abort: splicemap entry afd12345af is not a valid revision identifier
+  [255]
+
 splice repo2 on repo1
 
   $ cat > splicemap <<EOF


More information about the Mercurial-devel mailing list