Script to convert a subversion repo to mercurial
Luc Saillard
luc at saillard.org
Fri Sep 23 09:59:30 CDT 2005
Hi,
I write a small script to convert some of my repositories managed by
subversion to mercurial. I know that tailor can do the same, but i only want
to convert once the data. Thanks to Matt to build mercurial as a library so i
can call mercurial directly to apply commit.
The script process a full svn dump backup, and create the mercurial repo.
Luc
-------------- next part --------------
#!/usr/bin/python
#
# Convert a Subversion repository to Mercurial.
#
# Status: Preview
# Version: 0.0.1
# Copyright: Luc Saillard <luc at saillard.org>
# Requires: python>=2.3
# Quick Use: svn2hg -d repo-trunk.hg repo-trunk.svndump
#
# NOTE: This little script only convert one directory at a time
# So if you have this forlder tree
#
# /
# |- trunk
# |- tags
# | |- 0.1.0
# | |- 0.2.0
# |- branches
# | |- experimental
# | |- toto
#
# You need to do filter to keep only one branch
# svndumpfilter include trunk < repo.svndump > repo-trunk.svndump
# svn2hg -d repo-trunk.hg -v repo-trunk.svndump
#
#
import sys, re, os, md5, shutil, time
from stat import *
from optparse import OptionParser
from mercurial import hg,ui
global options
#
# Initialize a new mercurial repository
# @return an hg.repo object
# NONE if an error occur
#
def create_hg_repo(outputdir):
u = ui.ui()
try :
mode = os.stat(outputdir)[ST_MODE]
if not S_ISDIR(mode):
os.mkdir(outputdir)
else:
u.warn("abort: %s already exist.\n" % outputdir)
return None
except OSError:
os.mkdir(outputdir)
repo = hg.repository(u, outputdir, create=1)
return repo
#
# Strip the first n path of the directory
# TODO: for now we just strip the first folder
#
def strip_path(path):
return os.sep.join(path.split('/')[1:])
#
# Convert a svn format(rfc) date to UnixTime
#
def svn_date_to_hg(date):
# 2005-06-30T15:39:42.562728Z
# FIXME: i do not support other timezone
m = re.search("^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)\.(\d+)Z", date)
if not m:
print "error: Bad date format (%s)" % date
return None
year = int(m.group(1))
month = int(m.group(2))
day = int(m.group(3))
hours = int(m.group(4))
minutes = int(m.group(5))
seconds = int(m.group(6))
fracseconds = int(m.group(7))
spec = [year, month, day, hours, minutes, seconds, 0, 0, 0]
utc = time.mktime(spec) - time.timezone
return str(int(utc))
#
# Create a directory in the svn repository
# Note: with hg we can't add a directory, we need to wait to have a file in it
# Perhaps is to create a dummy file .keepme, if the directory is empty before
# commit.
#
def svn_mkdir(hdrs):
path = strip_path(hdrs['Node-path'])
# By default remove trunk
if path == "":
print "debug: Not creating directory:", hdrs['Node-path']
return
# Remove the first directory from the path
os.mkdir(options.outputdir + os.sep + path)
#
# Add/Change the content a new file to the repository
# I ony support full backup, not incremental backup.
#
def svn_change_file(hdrs):
path = strip_path(hdrs['Node-path'])
# Get offset of the content of the file
proplen = textlen = 0
if hdrs.has_key('Prop-content-length'):
proplen = int(hdrs['Prop-content-length'])
if hdrs.has_key('Text-content-length'):
textlen = int(hdrs['Text-content-length'])
filename = options.outputdir + os.sep + path
# Sometime the file is copied, from an old file
# HACK: what to do when a copyfrom-path and content-length!=0
if hdrs.has_key('Node-copyfrom-path') and textlen==0:
oldpath = strip_path(hdrs['Node-copyfrom-path'])
oldfilename = options.outputdir + os.sep + oldpath
shutil.copyfile(oldfilename, filename)
else:
# Write file
f = open(filename, 'w')
if textlen>0:
f.write(hdrs['data'][proplen:textlen+proplen])
f.close()
if hdrs.has_key('props'):
if hdrs['props'].has_key('svn:executable'):
if hdrs['props']['svn:executable'] == '*':
os.chmod(filename,0755)
else:
os.chmod(filename,0644)
# Verify the file
cksum = md5.new(open(filename).read()).hexdigest()
if hdrs.has_key('Text-content-md5'):
if hdrs['Text-content-md5'] != cksum:
print "ERROR: bad md5sum for file %s" % path
print "md5 of the file:", cksum
print "md5 in subversion:", hdrs['Text-content-md5']
sys.exit(2)
elif not hdrs.has_key('Node-copyfrom-path'):
print "warning: Missing md5sum for file:", path
#
# delete a file in the repository
#
def svn_delete_file(hdrs):
path = strip_path(hdrs['Node-path'])
filename = options.outputdir + os.sep + path
os.unlink(filename)
#
# Read a block from the file descriptor f.
# @return hash of all headers
#
# Block is rfc-822 style headers
# We create a hash for each headers.
#
rfc822_line_re = re.compile("^([\w-]+): (.*)$");
def read_svn_block(f):
hdrs = {}
s = f.readline()
while s:
if s == "\n":
if len(hdrs)>0: # if we have found at least an header
break # End of the header
else:
m = rfc822_line_re.search(s)
if m:
hdrs[m.group(1)] = m.group(2)
else:
print "Bad line: ", s
break
s = f.readline()
if hdrs.has_key('Content-length'):
hdrs['data'] = f.read(int(hdrs['Content-length']))
if options.verbose>3:
for k in hdrs:
print "%s: [[[%s]]]" % (k, hdrs[k])
return hdrs
#
# Convert a string of properties into a hash
# properties is like this:
# K 7
# svn:log
# V 16
# initial version
# K 10
# svn:author
# V 3
# luc
# K 8
# svn:date
# V 27
# 2004-04-27T09:17:28.907785Z
# PROPS-END
#
def parse_prop_content(data):
k = v = ""
klen = vlen = 0
props = {}
for s in data.splitlines():
m = re.search("([KV])\s+(\d+)$", s)
if m:
if m.group(1) == "K":
klen = int(m.group(2))
k = ""
elif m.group(1) == "V":
vlen = int(m.group(2))
v = ""
else:
print "Bad type:", s
return
elif s == "PROPS-END":
return props
else:
# Data
#print "klen=%d vlen=%d k=[%s] v=[%s]" % (klen, vlen, k, v)
if len(k)<klen:
k += s
elif len(v)<vlen or vlen==0:
if v != "":
v += "\n"
v += s
if len(v)>=vlen:
props[k] = v
#print "==> [%s: %s]" % (k, v)
else:
print "Bug while parsing properties data",s
print "================================="
sys.exit(1)
#
# Perform the action for this node.
# Call mercurial when needed
#
def do_action_for_node_path(repo, hdrs):
proplen = 0
if hdrs.has_key('Prop-content-length'):
proplen = int(hdrs['Prop-content-length'])
hdrs['props'] = parse_prop_content(hdrs['data'])
if hdrs.has_key('Text-content-length'):
textlen = int(hdrs['Text-content-length'])
if (textlen + proplen) != int(hdrs['Content-length']):
print "Text-content-length+Prop-content-length != Content-length for revision", current_revision
sys.exit(1)
if hdrs['Node-action'] == "add":
if hdrs['Node-kind'] == "dir":
svn_mkdir(hdrs)
elif hdrs['Node-kind'] == "file":
svn_change_file(hdrs)
repo.add([strip_path(hdrs['Node-path'])])
else:
print "(add operation) Unknown Node-kind:", hdrs['Node-kind']
sys.exit(1)
elif hdrs['Node-action'] == "change":
if hdrs['Node-kind'] == "file":
svn_change_file(hdrs)
else:
print "(change operation) Unknown Node-kind:", hdrs['Node-kind']
sys.exit(1)
elif hdrs['Node-action'] == "delete":
svn_delete_file(hdrs)
repo.remove([strip_path(hdrs['Node-path'])])
else:
print "Unknown Node-action:", hdrs['Node-action']
sys.exit(1)
#
# Main routine to do the conversion
#
def conv(config, file):
svndumpversion = 0
uuid = ""
repo = None
f = open(file, 'r')
s = f.readline()
m = re.search("^SVN-fs-dump-format-version: (\d+)$", s)
if m:
svndumpversion = int(m.group(1))
if svndumpversion != 2:
print "Unsupported subversion dump version: %d" % svndumpversion
return -1
else:
print "This file is not a subversion dump"
return -1
repo = create_hg_repo(options.outputdir)
if repo is None:
return -2
# Suck an empty line
s = f.readline()
current_revision = -1
props = None
while 1:
hdrs = read_svn_block(f)
if hdrs.has_key('UUID'):
if options.verbose>0:
print "UUID key:", hdrs['UUID']
print ""
elif hdrs.has_key('Revision-number'):
# Commit the last revision
if current_revision>1:
message = "No commit log"
user = None
date = None
if props:
if props.has_key('svn:log'):
message = props['svn:log']
if props.has_key('svn:author'):
user = props['svn:author']
date = svn_date_to_hg(props['svn:date'])
repo.commit(text=message, user=user, date=date)
if options.verbose>0:
print "Revision-number:", hdrs['Revision-number']
if hdrs.has_key('Prop-content-length'):
proplen = hdrs['Prop-content-length']
props = parse_prop_content(hdrs['data'])
if options.verbose>2:
for k in props:
print k,"=", props[k]
print ""
else:
props = None
current_revision = int(hdrs['Revision-number'])
elif hdrs.has_key('Node-path'):
if options.verbose>1:
print "Node-path:", hdrs['Node-path']
print "Node-action:", hdrs['Node-action']
do_action_for_node_path(repo, hdrs)
else:
# End of the file
break
f.close()
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-v", action="count", dest="verbose", help="More verbose")
parser.add_option("-r", type="int", dest="revision", help="Start conversion from revision N")
parser.add_option("-d", type="string", dest="outputdir", help="Output directory")
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error("incorrect number of arguments")
if options.outputdir is None:
parser.error("Please specify an directory to create the new repository")
conv(options, args[0])
More information about the Mercurial
mailing list