Script to convert a subversion repo to mercurial

Luc Saillard luc at saillard.org
Fri Sep 23 09:59:30 CDT 2005


Hi,
 I write a small script to convert some of my repositories managed by
subversion to mercurial. I know that tailor can do the same, but i only want
to convert once the data. Thanks to Matt to build mercurial as a library so i
can call mercurial directly to apply commit.
The script process a full svn dump backup, and create the mercurial repo.

Luc
-------------- next part --------------
#!/usr/bin/python
#
# Convert a Subversion repository to Mercurial.
#
# Status: Preview
# Version: 0.0.1
# Copyright: Luc Saillard <luc at saillard.org>
# Requires: python>=2.3
# Quick Use: svn2hg -d repo-trunk.hg repo-trunk.svndump
#
# NOTE: This little script only convert one directory at a time
# So if you have this forlder tree
#
# /
# |- trunk
# |- tags
# |  |- 0.1.0
# |  |- 0.2.0
# |- branches
# |  |- experimental
# |  |- toto
#
# You need to do filter to keep only one branch
#  svndumpfilter include trunk < repo.svndump > repo-trunk.svndump
#  svn2hg -d repo-trunk.hg -v repo-trunk.svndump
#
#

import sys, re, os, md5, shutil, time
from stat import *
from optparse import OptionParser
from mercurial import hg,ui

global options

#
# Initialize a new mercurial repository
# @return an hg.repo object
#         NONE if an error occur
#
def create_hg_repo(outputdir):
  
  u = ui.ui()
  try :
    mode = os.stat(outputdir)[ST_MODE]
    if not S_ISDIR(mode):
      os.mkdir(outputdir)
    else:
      u.warn("abort: %s already exist.\n" % outputdir)
      return None
  except OSError:
    os.mkdir(outputdir)

  repo = hg.repository(u, outputdir, create=1)
  return repo

#
# Strip the first n path of the directory
# TODO: for now we just strip the first folder
#
def strip_path(path):
  return os.sep.join(path.split('/')[1:])

#
# Convert a svn format(rfc) date to UnixTime
#
def svn_date_to_hg(date):
  # 2005-06-30T15:39:42.562728Z
  # FIXME: i do not support other timezone
  m = re.search("^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)\.(\d+)Z", date)
  if not m:
    print "error: Bad date format (%s)" % date
    return None
  year = int(m.group(1))
  month = int(m.group(2))
  day = int(m.group(3))
  hours = int(m.group(4))
  minutes = int(m.group(5))
  seconds = int(m.group(6))
  fracseconds = int(m.group(7))
  spec = [year, month, day, hours, minutes, seconds, 0, 0, 0]
  utc = time.mktime(spec) - time.timezone
  return str(int(utc))


#
# Create a directory in the svn repository
# Note: with hg we can't add a directory, we need to wait to have a file in it
# Perhaps is to create a dummy file .keepme, if the directory is empty before
# commit.
#
def svn_mkdir(hdrs):

  path = strip_path(hdrs['Node-path'])
  # By default remove trunk
  if path == "":
    print "debug: Not creating directory:", hdrs['Node-path']
    return
  # Remove the first directory from the path
  os.mkdir(options.outputdir + os.sep + path)

#
# Add/Change the content a new file to the repository
# I ony support full backup, not incremental backup.
#
def svn_change_file(hdrs):

  path = strip_path(hdrs['Node-path'])

  # Get offset of the content of the file
  proplen = textlen = 0
  if hdrs.has_key('Prop-content-length'):
    proplen = int(hdrs['Prop-content-length'])
  if hdrs.has_key('Text-content-length'):
    textlen = int(hdrs['Text-content-length'])

  filename = options.outputdir + os.sep + path

  # Sometime the file is copied, from an old file
  # HACK: what to do when a copyfrom-path and content-length!=0
  if hdrs.has_key('Node-copyfrom-path') and textlen==0:
    oldpath = strip_path(hdrs['Node-copyfrom-path'])
    oldfilename = options.outputdir + os.sep + oldpath
    shutil.copyfile(oldfilename, filename)

  else:
    # Write file
    f = open(filename, 'w')
    if textlen>0:
      f.write(hdrs['data'][proplen:textlen+proplen])
    f.close()

  if hdrs.has_key('props'):
    if hdrs['props'].has_key('svn:executable'):
      if hdrs['props']['svn:executable'] == '*':
	os.chmod(filename,0755)
      else:
	os.chmod(filename,0644)

  # Verify the file
  cksum = md5.new(open(filename).read()).hexdigest()
  if hdrs.has_key('Text-content-md5'):
    if hdrs['Text-content-md5'] != cksum:
      print "ERROR: bad md5sum for file %s" % path
      print "md5 of the file:", cksum
      print "md5 in subversion:", hdrs['Text-content-md5']
      sys.exit(2)
  elif not hdrs.has_key('Node-copyfrom-path'):
    print "warning: Missing md5sum for file:", path

#
# delete a file in the repository
#
def svn_delete_file(hdrs):

  path = strip_path(hdrs['Node-path'])
  filename = options.outputdir + os.sep + path
  os.unlink(filename)


#
# Read a block from the file descriptor f.
# @return hash of all headers
#
# Block is rfc-822 style headers
# We create a hash for each headers.
#
rfc822_line_re = re.compile("^([\w-]+): (.*)$");
def read_svn_block(f):

  hdrs = {}
  s = f.readline()
  while s:
    if s == "\n":
      if len(hdrs)>0: # if we have found at least an header
        break	# End of the header
    else:
      m = rfc822_line_re.search(s)
      if m:
	hdrs[m.group(1)] = m.group(2)
      else:
	print "Bad line: ", s
	break
    s = f.readline()

  if hdrs.has_key('Content-length'):
    hdrs['data'] = f.read(int(hdrs['Content-length']))

  if options.verbose>3:
    for k in hdrs:
    	print "%s: [[[%s]]]" % (k, hdrs[k])
  return hdrs


#
# Convert a string of properties into a hash
# properties is like this:
#    K 7
#    svn:log
#    V 16
#    initial version
#    K 10
#    svn:author
#    V 3
#    luc
#    K 8
#    svn:date
#    V 27
#    2004-04-27T09:17:28.907785Z
#    PROPS-END
#
def parse_prop_content(data):

  k = v = ""
  klen = vlen = 0
  props = {}

  for s in data.splitlines():
    m = re.search("([KV])\s+(\d+)$", s)
    if m:
    	if m.group(1) == "K":
	  klen = int(m.group(2))
	  k = ""
	elif m.group(1) == "V":
	  vlen = int(m.group(2))
	  v = ""
	else:
	  print "Bad type:", s
	  return

    elif s == "PROPS-END":
	return props

    else:
    	# Data
	#print "klen=%d  vlen=%d  k=[%s] v=[%s]" % (klen, vlen, k, v)
	if len(k)<klen:
	  k += s
	elif len(v)<vlen or vlen==0:
	  if v != "":
	    v += "\n"
	  v += s
	  if len(v)>=vlen:
	    props[k] = v
	    #print "==> [%s: %s]" % (k, v)
	else:
	  print "Bug while parsing properties data",s
	  print "================================="
	  sys.exit(1)

#
# Perform the action for this node.
# Call mercurial when needed
#
def do_action_for_node_path(repo, hdrs):

  proplen = 0
  if hdrs.has_key('Prop-content-length'):
    proplen = int(hdrs['Prop-content-length'])
    hdrs['props'] = parse_prop_content(hdrs['data'])

  if hdrs.has_key('Text-content-length'):
    textlen = int(hdrs['Text-content-length'])
    if (textlen + proplen) != int(hdrs['Content-length']):
      print "Text-content-length+Prop-content-length != Content-length for revision", current_revision
      sys.exit(1)

  if hdrs['Node-action'] == "add":
    if hdrs['Node-kind'] == "dir":
      svn_mkdir(hdrs)
    elif hdrs['Node-kind'] == "file":
      svn_change_file(hdrs)
      repo.add([strip_path(hdrs['Node-path'])])
    else:
      print "(add operation) Unknown Node-kind:", hdrs['Node-kind']
      sys.exit(1)
  elif hdrs['Node-action'] == "change":
    if hdrs['Node-kind'] == "file":
      svn_change_file(hdrs)
    else:
      print "(change operation) Unknown Node-kind:", hdrs['Node-kind']
      sys.exit(1)
  elif hdrs['Node-action'] == "delete":
    svn_delete_file(hdrs)
    repo.remove([strip_path(hdrs['Node-path'])])
  
  else:
    print "Unknown Node-action:", hdrs['Node-action']
    sys.exit(1)

#
# Main routine to do the conversion
#
def conv(config, file):

  svndumpversion = 0
  uuid = ""
  repo = None

  f = open(file, 'r')
  s = f.readline()
  m = re.search("^SVN-fs-dump-format-version: (\d+)$", s)
  if m:
    svndumpversion = int(m.group(1))
    if svndumpversion != 2:
      print "Unsupported subversion dump version: %d" % svndumpversion
      return -1
  else:
    print "This file is not a subversion dump"
    return -1

  repo = create_hg_repo(options.outputdir)
  if repo is None:
    return -2

  # Suck an empty line
  s = f.readline()

  current_revision = -1
  props = None
  while 1:

    hdrs = read_svn_block(f)

    if hdrs.has_key('UUID'):
	if options.verbose>0:
	  print "UUID key:", hdrs['UUID']
	  print ""

    elif hdrs.has_key('Revision-number'):
	# Commit the last revision
	if current_revision>1:
	  message = "No commit log"
	  user = None
	  date = None
	  if props:
	    if props.has_key('svn:log'):
	      message = props['svn:log']
	    if props.has_key('svn:author'):
	      user = props['svn:author']
	    date = svn_date_to_hg(props['svn:date'])
	  repo.commit(text=message, user=user, date=date)

	if options.verbose>0:
	  print "Revision-number:", hdrs['Revision-number']

	if hdrs.has_key('Prop-content-length'):
	  proplen = hdrs['Prop-content-length']
	  props = parse_prop_content(hdrs['data'])
	  if options.verbose>2:
	    for k in props:
	      print k,"=", props[k]
	    print ""
	else:
	  props = None
	current_revision = int(hdrs['Revision-number'])

    elif hdrs.has_key('Node-path'):
	if options.verbose>1:
	  print "Node-path:", hdrs['Node-path']
	  print "Node-action:", hdrs['Node-action']
	do_action_for_node_path(repo, hdrs)

    else:
    	# End of the file
	break

  
  f.close()

if __name__ == "__main__":
    parser = OptionParser()
    parser.add_option("-v", action="count", dest="verbose", help="More verbose")
    parser.add_option("-r", type="int", dest="revision", help="Start conversion from revision N")
    parser.add_option("-d", type="string", dest="outputdir", help="Output directory")
    (options, args) = parser.parse_args()
    if len(args) != 1:
      parser.error("incorrect number of arguments")
    if options.outputdir is None:
      parser.error("Please specify an directory to create the new repository")
    conv(options, args[0])



More information about the Mercurial mailing list