[PATCH] convert: Built-in cvsps for hg cvs import

Frank Kingswood frank at kingswood-consulting.co.uk
Sat Apr 19 10:56:54 CDT 2008


# HG changeset patch
# User Frank Kingswood <frank at kingswood-consulting.co.uk>
# Date 1208620367 -3600
# Node ID eb33dd1611138734d3127fc62c2112b04c168d5c
# Parent  090f2145df7db6a638c53cee74a7fbd2fa6b9619
convert: Built-in cvsps for hg cvs import.

This patch adds a built-in cvsps replacement.
To use this, set these options in .hgrc.

        [convert]
        cvsps = builtin
        cvsps.fuzz = cvs changeset datestamp fuzz
        cvsps.mergeto = some regular expression
        cvsps.mergefrom = some regular expression

The built-in cvsps uses cvs rlog on the repository (it does not do
direct cvs server calls, it runs the cvs executable), sorts the commit
log messages, and merges commits with identical messages, author and
branch name and a date within the 60-seconds fuzz window.

A log cache file is written to ~/.hg.cvsps/, which is used to avoid
re-reading all of the CVS log when the conversion is run again.

This builtin cvsps code has been found to work succesfully in cases
where the traditional external cvsps program generates incorrect
changesets.

The builtin cvsps code can merge CVS branches based on magic tags in
the CVS log message. When a log message matches the cvsps.mergefrom
regular expression, then cvsps will set a second parent for a changeset
to indicate a merge from the named branch.
When a log message matches the cvsps.mergeto regular expression then
cvsps will insert a dummy changeset (with no members, so no changes)
merging the branch into the named branch (which must exist).
The cvsps.mergefrom pattern defaults to {{mergefrombranch ([-\w]+)}},
and the cvsps.mergeto pattern defaults to {{mergetobranch ([-\w]+)}}.

For convenience, the cvsps.py script can also be run as a standalone
replacement for cvsps, as long as the mercurial modules are in the
PYTHONPATH. When run standalone, cvsps.py accepts these cvsps options
-b -p -r --root -u -v -x and -z.

diff -r 090f2145df7d -r eb33dd161113 hgext/convert/cvs.py
--- a/hgext/convert/cvs.py	Fri Apr 18 18:07:34 2008 +0200
+++ b/hgext/convert/cvs.py	Sat Apr 19 16:52:47 2008 +0100
@@ -3,8 +3,10 @@
 import os, locale, re, socket
 from cStringIO import StringIO
 from mercurial import util
+from mercurial.i18n import _
 
 from common import NoRepo, commit, converter_source, checktool
+from cvsps import cvsps_create_log,cvsps_create_changeset
 
 class convert_cvs(converter_source):
     def __init__(self, ui, path, rev=None):
@@ -14,10 +16,13 @@
         if not os.path.exists(cvs):
             raise NoRepo("%s does not look like a CVS checkout" % path)
 
+        checktool('cvs')
         self.cmd = ui.config('convert', 'cvsps', 'cvsps -A -u --cvs-direct -q')
         cvspsexe = self.cmd.split(None, 1)[0]
-        for tool in (cvspsexe, 'cvs'):
-            checktool(tool)
+        self.builtin = cvspsexe=='builtin'
+
+        if not self.builtin:
+            checktool(cvspsexe)
 
         self.changeset = {}
         self.files = {}
@@ -28,10 +33,11 @@
         self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
         self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
         self.encoding = locale.getpreferredencoding()
-        self._parse()
+
+        self._parse(ui)
         self._connect()
 
-    def _parse(self):
+    def _parse(self,ui):
         if self.changeset:
             return
 
@@ -56,80 +62,114 @@
             id = None
             state = 0
             filerevids = {}
-            for l in util.popen(cmd):
-                if state == 0: # header
-                    if l.startswith("PatchSet"):
-                        id = l[9:-2]
-                        if maxrev and int(id) > maxrev:
-                            # ignore everything
-                            state = 3
-                    elif l.startswith("Date"):
-                        date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
-                        date = util.datestr(date)
-                    elif l.startswith("Branch"):
-                        branch = l[8:-1]
-                        self.parent[id] = self.lastbranch.get(branch, 'bad')
-                        self.lastbranch[branch] = id
-                    elif l.startswith("Ancestor branch"):
-                        ancestor = l[17:-1]
-                        # figure out the parent later
-                        self.parent[id] = self.lastbranch[ancestor]
-                    elif l.startswith("Author"):
-                        author = self.recode(l[8:-1])
-                    elif l.startswith("Tag:") or l.startswith("Tags:"):
-                        t = l[l.index(':')+1:]
-                        t = [ut.strip() for ut in t.split(',')]
-                        if (len(t) > 1) or (t[0] and (t[0] != "(none)")):
-                            self.tags.update(dict.fromkeys(t, id))
-                    elif l.startswith("Log:"):
-                        # switch to gathering log
-                        state = 1
-                        log = ""
-                elif state == 1: # log
-                    if l == "Members: \n":
-                        # switch to gathering members
-                        files = {}
-                        oldrevs = []
-                        log = self.recode(log[:-1])
-                        state = 2
-                    else:
-                        # gather log
-                        log += l
-                elif state == 2: # members
-                    if l == "\n": # start of next entry
-                        state = 0
-                        p = [self.parent[id]]
-                        if id == "1":
-                            p = []
-                        if branch == "HEAD":
-                            branch = ""
-                        if branch:
-                            latest = None
-                            # the last changeset that contains a base
-                            # file is our parent
-                            for r in oldrevs:
-                                latest = max(filerevids.get(r, None), latest)
-                            if latest:
-                                p = [latest]
 
-                        # add current commit to set
-                        c = commit(author=author, date=date, parents=p,
-                                   desc=log, branch=branch)
-                        self.changeset[id] = c
-                        self.files[id] = files
-                    else:
-                        colon = l.rfind(':')
-                        file = l[1:colon]
-                        rev = l[colon+1:-2]
-                        oldrev, rev = rev.split("->")
-                        files[file] = rev
+            if self.builtin:
+                # builtin cvsps code
+                ui.status(_('using builtin cvsps\n'))
 
-                        # save some information for identifying branch points
-                        oldrevs.append("%s:%s" % (oldrev, file))
-                        filerevids["%s:%s" % (rev, file)] = id
-                elif state == 3:
-                    # swallow all input
-                    continue
+                db=cvsps_create_log(ui,cache='update')
+                db=cvsps_create_changeset(ui,db,
+                      fuzz=int(ui.config('convert','cvsps.fuzz',60)),
+                      mergeto=ui.config('convert','cvsps.mergeto',None),
+                      mergefrom=ui.config('convert','cvsps.mergefrom',None))
+
+                for cs in db:
+                    if maxrev and cs.Id>maxrev:
+                        break
+                    id = str(cs.Id)
+                    cs.Author = self.recode(cs.Author)
+                    self.lastbranch[cs.Branch] = id
+                    cs.Comment = self.recode(cs.Comment)
+                    date = util.datestr(cs.Date)
+                    self.tags.update(dict.fromkeys(cs.Tags,id))
+
+                    files = {}
+                    for f in cs.Entries:
+                        files[f.File]="%s%s"%('.'.join([str(x) for x in f.Revision]),
+                                              ['','(DEAD)'][f.Dead])
+
+                    # add current commit to set
+                    c=commit(author=cs.Author,date=date,
+                             parents=[str(p.Id) for p in cs.Parents],
+                             desc=cs.Comment,branch=cs.Branch or '')
+                    self.changeset[id]=c
+                    self.files[id]=files
+            else:
+                # external cvsps
+                for l in util.popen(cmd):
+                    if state == 0: # header
+                        if l.startswith("PatchSet"):
+                            id = l[9:-2]
+                            if maxrev and int(id) > maxrev:
+                                # ignore everything
+                                state = 3
+                        elif l.startswith("Date"):
+                            date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
+                            date = util.datestr(date)
+                        elif l.startswith("Branch"):
+                            branch = l[8:-1]
+                            self.parent[id] = self.lastbranch.get(branch, 'bad')
+                            self.lastbranch[branch] = id
+                        elif l.startswith("Ancestor branch"):
+                            ancestor = l[17:-1]
+                            # figure out the parent later
+                            self.parent[id] = self.lastbranch[ancestor]
+                        elif l.startswith("Author"):
+                            author = self.recode(l[8:-1])
+                        elif l.startswith("Tag:") or l.startswith("Tags:"):
+                            t = l[l.index(':')+1:]
+                            t = [ut.strip() for ut in t.split(',')]
+                            if (len(t) > 1) or (t[0] and (t[0] != "(none)")):
+                                self.tags.update(dict.fromkeys(t, id))
+                        elif l.startswith("Log:"):
+                            # switch to gathering log
+                            state = 1
+                            log = ""
+                    elif state == 1: # log
+                        if l == "Members: \n":
+                            # switch to gathering members
+                            files = {}
+                            oldrevs = []
+                            log = self.recode(log[:-1])
+                            state = 2
+                        else:
+                            # gather log
+                            log += l
+                    elif state == 2: # members
+                        if l == "\n": # start of next entry
+                            state = 0
+                            p = [self.parent[id]]
+                            if id == "1":
+                                p = []
+                            if branch == "HEAD":
+                                branch = ""
+                            if branch:
+                                latest = None
+                                # the last changeset that contains a base
+                                # file is our parent
+                                for r in oldrevs:
+                                    latest = max(filerevids.get(r, None), latest)
+                                if latest:
+                                    p = [latest]
+
+                            # add current commit to set
+                            c = commit(author=author, date=date, parents=p,
+                                       desc=log, branch=branch)
+                            self.changeset[id] = c
+                            self.files[id] = files
+                        else:
+                            colon = l.rfind(':')
+                            file = l[1:colon]
+                            rev = l[colon+1:-2]
+                            oldrev, rev = rev.split("->")
+                            files[file] = rev
+
+                            # save some information for identifying branch points
+                            oldrevs.append("%s:%s" % (oldrev, file))
+                            filerevids["%s:%s" % (rev, file)] = id
+                    elif state == 3:
+                        # swallow all input
+                        continue
 
             self.heads = self.lastbranch.values()
         finally:
diff -r 090f2145df7d -r eb33dd161113 hgext/convert/cvsps.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/convert/cvsps.py	Sat Apr 19 16:52:47 2008 +0100
@@ -0,0 +1,637 @@
+#!/usr/bin/env python
+#
+# Mercurial built-in replacement for cvsps.
+#
+# Copyright 2008, Frank Kingswood <frank at kingswood-consulting.co.uk>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import os
+import re
+import sys
+import cPickle as pickle
+from mercurial import util
+from mercurial.i18n import _
+
+def listsort(list,key):
+   "helper to sort by key in Python 2.3"
+   try:
+      list.sort(key=key)
+   except TypeError:
+      list.sort(lambda l,r:cmp(key(l),key(r)))
+
+class cvsps_log_entry:
+   __module__="cvsps"
+   '''Class cvsps_log_entry has the following attributes:
+      .Author    - author name as CVS knows it
+      .Branch    - name of branch this revision is on
+      .Branches  - revision tuple of branches starting at this revision
+      .Comment   - commit message
+      .Date      - the commit date as a (time,tz) tuple
+      .Dead      - true if file revision is dead
+      .File      - Name of file
+      .Lines     - a tuple (+lines,-lines) or None
+      .Parent    - Previous revision of this entry
+      .RCS       - name of file as returned from CVS
+      .Revision  - revision number as tuple
+      .Tags      - list of tags on the file
+   '''
+   def __init__(self,**entries):
+      self.__dict__.update(entries)
+
+class cvsps_log_error(Exception):
+   pass
+
+def cvsps_create_log(ui,directory=None,root=None,rlog=True,cache=None):
+   '''Collect the CVS rlog'''
+
+   # reusing strings typically saves about 40% of memory
+   _scache={}
+   def scache(s):
+      "return a shared version of a string"
+      try:
+         return _scache[s]
+      except:
+         _scache[s]=s
+      return s
+
+   ui.status(_('collecting CVS rlog\n'))
+
+   log=[]      # list of cvsps_log_entry objects containing the CVS state
+
+   # patterns to match in CVS (r)log output, by state of use
+   re_00=re.compile('RCS file: (.+)$')
+   re_01=re.compile('cvs \\[r?log aborted\\]: (.+)$')
+   re_02=re.compile('cvs (r?log|server): (.+)\n$')
+   re_03=re.compile("(Cannot access.+CVSROOT)|(can't create temporary directory.+)$")
+   re_10=re.compile('Working file: (.+)$')
+   re_20=re.compile('symbolic names:')
+   re_30=re.compile('\t(.+): ([\\d.]+)$')
+   re_31=re.compile('----------------------------$')
+   re_32=re.compile('=============================================================================$')
+   re_50=re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
+   re_60=re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?')
+   re_70=re.compile('branches: (.+);$')
+
+   prefix=''   # leading path to strip of what we get from CVS
+
+   if directory is None:
+      # Current working directory
+
+      # Get the real directory in the repository
+      try:
+         prefix=directory=file('CVS/Repository').read().strip()
+      except IOError:
+         raise cvsps_log_error('Not a CVS sandbox')
+
+      if not prefix.endswith('/'):
+         prefix+='/'
+
+      # Use the Root file in the sandbox, if it exists
+      try:
+         root=file('CVS/Root').read().strip()
+      except IOError:
+         pass
+
+   if not root:
+      root=os.environ.get('CVSROOT',None)
+
+   # read log cache if one exists
+   oldlog=[]
+   date=None
+
+   if cache:
+      cachefile=os.path.expanduser('~/.hg.cvsps')
+      if not os.path.exists(cachefile):
+         os.mkdir(cachefile)
+      cachefile=os.path.join(cachefile,'-'.join(re.findall(r'\w+','%s-%s'%(root or "",directory)))+'.cache')
+
+   if cache=='update':
+      try:
+         oldlog=file(cachefile)
+         ui.debug(_('reading cvs log cache %s\n')%cachefile)
+         oldlog=pickle.load(oldlog)
+      except:
+         pass
+
+      if oldlog:
+         date=oldlog[-1].Date    # last commit date as a (time,tz) tuple
+         date=util.datestr(date,'%Y/%m/%d %H:%M:%S')
+
+   # build the CVS commandline
+   cmd=['cvs','-q']
+   if root:
+      cmd.append('-d%s'%root)
+      p=root.split(':')[-1]
+      if not p.endswith('/'):
+         p+='/'
+      prefix=p+prefix
+   cmd.append(['log','rlog'][rlog])
+   if date:
+      cmd.append('-d')
+      cmd.append('>%s'%date)
+   cmd.append(directory)
+
+   # state machine begins here
+   tags={}     # dictionary of revisions on current file with their tags
+   state=0
+   store=False # set when a new record can be appended
+
+   cmd=[util.shellquote(arg) for arg in cmd]
+
+   for line in util.popen(' '.join(cmd)):
+      if line.endswith('\n'):
+         line=line[:-1]
+      #ui.status('state=%d line=%r\n'%(state,line))
+
+      if state==0:
+         match=re_00.match(line)
+         if match:
+            rcs=match.group(1)
+            tags={}
+            if rlog:
+               filename=rcs[:-2]
+               if filename.startswith(prefix):
+                  filename=filename[len(prefix):]
+               if filename.startswith('/'):
+                  filename=filename[1:]
+               filename=filename.replace('/Attic/','/')
+               state=2
+               continue
+            state=1
+            continue
+         match=re_01.match(line)
+         if match:
+            raise Exception(match.group(1))
+         match=re_02.match(line)
+         if match:
+            raise Exception(match.group(2))
+         if re_03.match(line):
+            raise Exception(line)
+
+      elif state==1:
+         match=re_10.match(line)
+         assert match,_('RCS file must always be followed by Working file')
+         filename=match.group(1)
+         state=2
+
+      elif state==2:
+         if re_20.match(line):
+            state=3
+
+      elif state==3:
+         match=re_30.match(line)
+         if match:
+            rev=[int(x) for x in match.group(2).split('.')]
+
+            # Convert magic branch number to an odd-numbered one
+            revn=len(rev)
+            if revn>3 and (revn%2)==0 and rev[-2]==0:
+               rev=rev[:-2]+rev[-1:]
+            rev=tuple(rev)
+
+            if rev not in tags:
+               tags[rev]=[]
+            tags[rev].append(match.group(1))
+
+         elif re_31.match(line):
+            state=5
+         elif re_32.match(line):
+            state=0
+
+      elif state==4:
+         if re_31.match(line):
+            state=5
+         else:
+            assert not re_32.match(line),_('Must have at least some revisions')
+
+      elif state==5:
+         match=re_50.match(line)
+         assert match,_('expected revision number')
+         e=cvsps_log_entry(RCS=scache(rcs),File=scache(filename),Revision=tuple([int(x) for x in match.group(1).split('.')]),Branches=[],Parent=None)
+         state=6
+
+      elif state==6:
+         match=re_60.match(line)
+         assert match,_('revision must be followed by date line')
+         d=match.group(1)
+         if d[2]=='/':
+            # Y2K
+            d='19'+d
+
+         if len(d.split())!=3:
+            d=d+' UTC'
+         e.Date=util.parsedate(d,['%y/%m/%d %H:%M:%S','%Y/%m/%d %H:%M:%S','%Y-%m-%d %H:%M:%S'])
+         e.Author=scache(match.group(2))
+         e.Dead=match.group(3).lower()=='dead'
+
+         if match.group(5):
+            if match.group(6):
+               e.Lines=(int(match.group(5)),int(match.group(6)))
+            else:
+               e.Lines=(int(match.group(5)),0)
+         elif match.group(6):
+            e.Lines=(0,int(match.group(6)))
+         else:
+            e.Lines=None
+         e.Comment=[]
+         state=7
+
+      elif state==7:
+         m=re_70.match(line)
+         if m:
+            e.Branches=[tuple([int(y) for y in x.strip().split('.')]) for x in m.group(1).split(';')]
+            state=8
+         elif re_31.match(line):
+            state=5
+            store=True
+         elif re_32.match(line):
+            state=0
+            store=True
+         else:
+            e.Comment.append(line)
+
+      elif state==8:
+         if re_31.match(line):
+            state=5
+            store=True
+         elif re_32.match(line):
+            state=0
+            store=True
+         else:
+            e.Comment.append(line)
+
+      if store:
+         store=False
+         e.Tags=[scache(x) for x in tags.get(e.Revision,[])]
+         e.Tags.sort()
+         e.Comment=scache('\n'.join(e.Comment))
+
+         revn=len(e.Revision)
+         if revn>3 and (revn%2)==0:
+            e.Branch=tags.get(e.Revision[:-1],[None])[0]
+         else:
+            e.Branch=None
+
+         log.append(e)
+
+         if len(log)%100==0:
+            ui.status(util.ellipsis('%d %s'%(len(log),e.File),80)+'\n')
+
+   listsort(log,key=lambda x:(x.RCS,x.Revision))
+
+   # find parent revisions
+   versions={}
+   for e in log:
+      branch=e.Revision[:-1]
+
+      p=versions.get((e.RCS,branch),None)
+      if p is None:
+         p=e.Revision[:-2]
+      e.Parent=p
+      versions[(e.RCS,branch)]=e.Revision
+
+   # update the log cache
+   if cache:
+      if log:
+         # join up the old and new logs
+         listsort(log,key=lambda x:x.Date)
+
+         if oldlog and oldlog[-1].Date>=log[0].Date:
+            raise cvsps_log_error('Log cache overlaps with new log entries, re-run without cache.')
+
+         log=oldlog+log
+
+         # write the new cachefile
+         ui.debug(_('writing cvs log cache %s\n')%cachefile)
+         pickle.dump(log,file(cachefile,'w'))
+      else:
+         log=oldlog
+
+   ui.status(_('%d log entries\n')%len(log))
+
+   return log
+
+
+class cvsps_changeset:
+   '''Class cvsps_changeset has the following attributes:
+      .Author    - author name as CVS knows it
+      .Branch    - name of branch this changeset is on, or None
+      .Comment   - commit message
+      .Date      - the commit date as a (time,tz) tuple
+      .Entries   - list of cvsps_log_entry objects in this changeset
+      .Parent    - list of one or two parent changesets
+      .Tags      - list of tags on this changeset
+   '''
+   def __init__(self,**entries):
+      self.__dict__.update(entries)
+
+
+def cvsps_create_changeset(ui,log,fuzz=60,mergefrom=None,mergeto=None):
+   '''Convert log into changesets.'''
+
+   ui.status(_('creating changesets\n'))
+
+   # Merge changesets
+
+   listsort(log,key=lambda x:(x.Comment,x.Author,x.Branch,x.Date))
+
+   changeset=[]
+   files={}
+   c=None
+   for i,e in enumerate(log):
+
+      # Check if log entry belongs to the current changeset or not.
+      if not (c and
+              e.Comment==c.Comment and
+              e.Author==c.Author and
+              e.Branch==c.Branch and
+              (c.Date[0]+c.Date[1])<=(e.Date[0]+e.Date[1])<=(c.Date[0]+c.Date[1])+fuzz and
+              e.File not in files):
+         c=cvsps_changeset(Comment=e.Comment,Author=e.Author,
+                           Branch=e.Branch,Date=e.Date,Entries=[])
+         changeset.append(c)
+         files={}
+         if len(changeset)%100==0:
+            ui.status(util.ellipsis('%d %s'%(len(changeset),repr(e.Comment)[1:-1]),80)+'\n')
+
+      e.Changeset=c
+      c.Entries.append(e)
+      files[e.File]=True
+      c.Date=e.Date       # changeset date is date of latest commit in it
+
+   # Sort files in each changeset
+
+   for c in changeset:
+      def pathcompare(l,r):
+         'Mimic cvsps sorting order'
+         l=l.split('/')
+         r=r.split('/')
+         nl=len(l)
+         nr=len(r)
+         n=min(nl,nr)
+         for i in range(n):
+            if i+1==nl and nl<nr:
+               return -1
+            elif i+1==nr and nl>nr:
+               return +1
+            elif l[i]<r[i]:
+               return -1
+            elif l[i]>r[i]:
+               return +1
+         return 0
+      def entitycompare(l,r):
+         return pathcompare(l.File,r.File)
+
+      c.Entries.sort(entitycompare)
+
+   # Sort changesets by date
+
+   def cscmp(l,r):
+      d=sum(l.Date)-sum(r.Date)
+      if d:
+         return d
+
+      # detect vendor branches and initial commits on a branch
+      le={}
+      for e in l.Entries:
+         le[e.RCS]=e.Revision
+      re={}
+      for e in r.Entries:
+         re[e.RCS]=e.Revision
+
+      d=0
+      for e in l.Entries:
+         if re.get(e.RCS,None)==e.Parent:
+            assert not d
+            d=1
+            break
+
+      for e in r.Entries:
+         if le.get(e.RCS,None)==e.Parent:
+            assert not d
+            d=-1
+            break
+
+      return d
+
+   changeset.sort(cscmp)
+
+   # Collect tags
+
+   globaltags={}
+   for c in changeset:
+      tags={}
+      for e in c.Entries:
+         for tag in e.Tags:
+            # remember which is the latest changeset to have this tag
+            globaltags[tag]=c
+
+   for c in changeset:
+      tags={}
+      for e in c.Entries:
+         for tag in e.Tags:
+            tags[tag]=True
+      # remember tags only if this is the latest changeset to have it
+      tagnames=[tag for tag in tags if globaltags[tag] is c]
+      tagnames.sort()
+      c.Tags=tagnames
+
+   # Find parent changesets, handle {{mergetobranch BRANCHNAME}}
+   # by inserting dummy changesets with two parents, and handle
+   # {{mergefrombranch BRANCHNAME}} by setting two parents.
+
+   versions={}
+   for i,c in enumerate(changeset):
+      for f in c.Entries:
+         versions[(f.RCS,f.Revision)]=i
+
+   if mergeto is None:
+      mergeto=r'{{mergetobranch ([-\w]+)}}'
+   if mergeto:
+      mergeto=re.compile(mergeto)
+
+   if mergefrom is None:
+      mergefrom=r'{{mergefrombranch ([-\w]+)}}'
+   if mergefrom:
+      mergefrom=re.compile(mergefrom)
+
+   branches={}
+   n=len(changeset)
+   i=0
+   while i<n:
+      c=changeset[i]
+
+      p=None
+      if c.Branch in branches:
+         p=branches[c.Branch]
+      else:
+         for f in c.Entries:
+            p=max(p,versions.get((f.RCS,f.Parent),None))
+
+      c.Parents=[]
+      if p is not None:
+         c.Parents.append(changeset[p])
+
+      if mergefrom:
+         m=mergefrom.search(c.Comment)
+         if m:
+            m=m.group(1)
+            if m=='HEAD':
+               m=None
+            if m in branches and c.Branch!=m:
+               c.Parents.append(changeset[branches[m]])
+
+      if mergeto:
+         m=mergeto.search(c.Comment)
+         if m:
+            try:
+               m=m.group(1)
+               if m=='HEAD':
+                  m=None
+            except:
+               m=None   # if no group found then merge to HEAD
+            if m in branches and c.Branch!=m:
+               # insert empty changeset for merge
+               cc=cvsps_changeset(Author=c.Author,Branch=m,Date=c.Date,
+                     Comment='convert-repo: CVS merge from branch %s'%c.Branch,
+                     Entries=[],Tags=[],Parents=[changeset[branches[m]],c])
+               changeset.insert(i+1,cc)
+               branches[m]=i+1
+
+               # adjust our loop counters now we have inserted a new entry
+               n+=1
+               i+=2
+               continue
+
+      branches[c.Branch]=i
+      i+=1
+
+   # Number changesets
+
+   for i,c in enumerate(changeset):
+      c.Id=i+1
+
+   ui.status(_('%d changeset entries\n')%len(changeset))
+
+   return changeset
+
+
+def main():
+   '''Main program to mimic cvsps.'''
+   from optparse import OptionParser,SUPPRESS_HELP
+
+   op=OptionParser(usage='%prog [-bpruvxz] path',
+                   description='Read CVS rlog for current directory or named '
+                               'path in repository, and convert the log to changesets '
+                               'based on matching commit log entries and dates.')
+
+   # Options that are ignored for compatibility with cvsps-2.1
+   op.add_option('-A',dest='Ignore',action='store_true',help=SUPPRESS_HELP)
+   op.add_option('--cvs-direct',dest='Ignore',action='store_true',help=SUPPRESS_HELP)
+   op.add_option('-q',dest='Ignore',action='store_true',help=SUPPRESS_HELP)
+
+   # Main options shared with cvsps-2.1
+   op.add_option('-b',dest='Branches',action='append',default=[],
+                 help='Only return changes on specified branches')
+   op.add_option('-p',dest='Prefix',action='store',default='',
+                 help='Prefix to remove from file names')
+   op.add_option('-r',dest='Revisions',action='append',default=[],
+                 help='Only return changes after or between specified tags')
+   op.add_option('-u',dest='Cache',action='store_const',const='update',
+                 help="Update cvs log cache")
+   op.add_option('-v',dest='Verbose',action='store_true',
+                 help='Be verbose')
+   op.add_option('-x',dest='Cache',action='store_const',const='write',
+                 help="Create new cvs log cache")
+   op.add_option('-z',dest='Fuzz',action='store',type='int',default=60,
+                 help='Set commit time fuzz',metavar='seconds')
+   op.add_option('--root',dest='Root',action='store',
+                 help='Specify cvsroot',metavar='cvsroot')
+
+   # Options specific to this version
+   op.add_option('--parents',dest='Parents',action='store_true',
+                 help='Show parent changesets')
+
+   options,args=op.parse_args()
+
+   # Create a ui object for printing progress messages
+   class UI:
+      def __init__(self,verbose):
+         if not verbose:
+            self.status=self.debug
+      def status(self,msg):
+         sys.stderr.write(msg)
+      def debug(self,msg):
+         pass
+   ui=UI(options.Verbose)
+
+   try:
+      if args:
+         log=[]
+         for d in args:
+            log+=cvsps_create_log(ui,d,root=options.Root,cache=options.Cache)
+      else:
+         log=cvsps_create_log(ui,root=options.Root,cache=options.Cache)
+   except cvsps_log_error,e:
+      print e
+      return
+
+   changeset=cvsps_create_changeset(ui,log,options.Fuzz)
+   del log
+
+   # Print changesets (optionally filtered)
+
+   off=len(options.Revisions)
+   for cs in changeset:
+
+      # limit by branches
+      if options.Branches and (cs.Branch or 'HEAD') not in options.Branches:
+         continue
+
+      if not off:
+         # Note: trailing spaces on several lines here are needed to have
+         #       bug-for-bug compatibility with cvsps.
+         print '---------------------'
+         print 'PatchSet %d '%cs.Id
+         print 'Date: %s'%util.datestr(cs.Date,'%Y/%m/%d %H:%M:%S')
+         print 'Author: %s'%cs.Author
+         print 'Branch: %s'%(cs.Branch or 'HEAD')
+         print 'Tag%s: %s '%(['','s'][len(cs.Tags)>1],
+                             ','.join(cs.Tags) or '(none)')
+         if options.Parents and cs.Parents:
+            if len(cs.Parents)>1:
+               print 'Parents: %s'%(','.join([str(p.Id) for p in cs.Parents]))
+            else:
+               print 'Parent: %d'%cs.Parents[0].Id
+
+         print 'Log:'
+         print cs.Comment
+         print
+         print 'Members: '
+         for f in cs.Entries:
+            fn=f.File
+            if fn.startswith(options.Prefix):
+               fn=fn[len(options.Prefix):]
+            print '\t%s:%s->%s%s '%(fn,'.'.join([str(x) for x in f.Parent]) or 'INITIAL',
+                                    '.'.join([str(x) for x in f.Revision]),['','(DEAD)'][f.Dead])
+         print
+
+      # have we seen the start tag?
+      if options.Revisions and off:
+         if options.Revisions[0]==str(cs.Id) or \
+            options.Revisions[0] in cs.Tags:
+            off=False
+
+      # see if we reached the end tag
+      if len(options.Revisions)>1 and not off:
+         if options.Revisions[1]==str(cs.Id) or \
+            options.Revisions[1] in cs.Tags:
+            break
+
+
+if __name__=='__main__':
+   main()
+
+# EOF cvsps.py


More information about the Mercurial-devel mailing list