mirror of
https://github.com/frej/fast-export.git
synced 2025-11-02 01:05:48 +01:00
Initial import
This is the initial import of 'hg2git' being a converter which feeds a hg repository into git-fast-import(1).
This commit is contained in:
230
hg2git.py
Normal file
230
hg2git.py
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2007 Rocco Rutte <pdmef@gmx.net>
|
||||
# License: GPLv2
|
||||
|
||||
"""hg2git.py - A mercurial-to-git filter for git-fast-import(1)
|
||||
Usage: hg2git.py <hg repo url> <marks file> <heads file> <tip file>
|
||||
"""
|
||||
|
||||
from mercurial import repo,hg,cmdutil,util,ui,revlog
|
||||
from tempfile import mkstemp
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
# silly regex to see if user field has email address
|
||||
user_re=re.compile('[^<]+ <[^>]+>$')
|
||||
# git branch for hg's default 'HEAD' branch
|
||||
cfg_master='master'
|
||||
# insert 'checkpoint' command after this many commits
|
||||
cfg_checkpoint_count=1000
|
||||
|
||||
def usage(ret):
|
||||
sys.stderr.write(__doc__)
|
||||
return ret
|
||||
|
||||
def setup_repo(url):
|
||||
myui=ui.ui()
|
||||
return myui,hg.repository(myui,url)
|
||||
|
||||
def get_changeset(ui,repo,revision):
|
||||
def get_branch(name):
|
||||
if name=='HEAD':
|
||||
name=cfg_master
|
||||
return name
|
||||
def fixup_user(user):
|
||||
if user_re.match(user)==None:
|
||||
if '@' not in user:
|
||||
return user+' <none@none>'
|
||||
return user+' <'+user+'>'
|
||||
return user
|
||||
node=repo.lookup(revision)
|
||||
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
|
||||
tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
|
||||
branch=get_branch(extra.get('branch','master'))
|
||||
return (manifest,fixup_user(user),(time,tz),files,desc,branch,extra)
|
||||
|
||||
def gitmode(x):
|
||||
return x and '100755' or '100644'
|
||||
|
||||
def wr(msg=''):
|
||||
print msg
|
||||
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
|
||||
|
||||
def checkpoint(count):
|
||||
count=count+1
|
||||
if count%cfg_checkpoint_count==0:
|
||||
sys.stderr.write("Checkpoint after %d commits\n" % count)
|
||||
wr('checkpoint')
|
||||
wr()
|
||||
return count
|
||||
|
||||
def get_parent_mark(parent,marks):
|
||||
p=marks.get(str(parent),None)
|
||||
if p==None:
|
||||
# if we didn't see parent previously, assume we saw it in this run
|
||||
p=':%d' % (parent+1)
|
||||
return p
|
||||
|
||||
def export_commit(ui,repo,revision,marks,heads,last,max,count):
|
||||
sys.stderr.write('Exporting revision %d (tip %d) as [:%d]\n' % (revision,max,revision+1))
|
||||
|
||||
(_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision)
|
||||
parents=repo.changelog.parentrevs(revision)
|
||||
|
||||
# we need this later to write out tags
|
||||
marks[str(revision)]=':%d'%(revision+1)
|
||||
|
||||
wr('commit refs/heads/%s' % branch)
|
||||
wr('mark :%d' % (revision+1))
|
||||
wr('committer %s %d %s' % (user,time,timezone))
|
||||
wr('data %d' % (len(desc)+1)) # wtf?
|
||||
wr(desc)
|
||||
wr()
|
||||
|
||||
src=heads.get(branch,'')
|
||||
link=''
|
||||
if src!='':
|
||||
# if we have a cached head, this is an incremental import: initialize it
|
||||
# and kill reference so we won't init it again
|
||||
wr('from %s' % src)
|
||||
heads[branch]=''
|
||||
elif not heads.has_key(branch) and revision>0:
|
||||
# newly created branch and not the first one: connect to parent
|
||||
tmp=get_parent_mark(parents[0],marks)
|
||||
wr('from %s' % tmp)
|
||||
sys.stderr.write('Link new branch [%s] to parent [%s]\n' %
|
||||
(branch,tmp))
|
||||
link=tmp # avoid making a merge commit for branch fork
|
||||
|
||||
if parents:
|
||||
l=last.get(branch,revision)
|
||||
for p in parents:
|
||||
# 1) as this commit implicitely is the child of the most recent
|
||||
# commit of this branch, ignore this parent
|
||||
# 2) ignore nonexistent parents
|
||||
# 3) merge otherwise
|
||||
if p==l or p==revision or p<0:
|
||||
continue
|
||||
tmp=get_parent_mark(p,marks)
|
||||
# if we fork off a branch, don't merge via 'merge' as we have
|
||||
# 'from' already above
|
||||
if tmp==link:
|
||||
continue
|
||||
sys.stderr.write('Merging branch [%s] with parent [%s] from [r%d]\n' %
|
||||
(branch,tmp,p))
|
||||
wr('merge %s' % tmp)
|
||||
|
||||
last[branch]=revision
|
||||
heads[branch]=''
|
||||
|
||||
ctx=repo.changectx(str(revision))
|
||||
man=ctx.manifest()
|
||||
|
||||
wr('deleteall')
|
||||
|
||||
for f in man.keys():
|
||||
fctx=ctx.filectx(f)
|
||||
d=fctx.data()
|
||||
wr('M %s inline %s' % (gitmode(man.execf(f)),f))
|
||||
wr('data %d' % len(d)) # had some trouble with size()
|
||||
wr(d)
|
||||
|
||||
wr()
|
||||
return checkpoint(count)
|
||||
|
||||
def export_tags(ui,repo,cache,count):
|
||||
l=repo.tagslist()
|
||||
for tag,node in l:
|
||||
if tag=='tip':
|
||||
continue
|
||||
rev=repo.changelog.rev(node)
|
||||
ref=cache.get(str(rev),None)
|
||||
if ref==None:
|
||||
sys.stderr.write('Failed to find reference for creating tag'
|
||||
' %s at r%d\n' % (tag,rev))
|
||||
continue
|
||||
(_,user,(time,timezone),_,desc,branch,_)=get_changeset(ui,repo,rev)
|
||||
sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
|
||||
wr('tag %s' % tag)
|
||||
wr('from %s' % ref)
|
||||
wr('tagger %s %d %s' % (user,time,timezone))
|
||||
msg='hg2git created tag %s for hg revision %d on branch %s on (summary):\n\t%s' % (tag,
|
||||
rev,branch,desc.split('\n')[0])
|
||||
wr('data %d' % (len(msg)+1))
|
||||
wr(msg)
|
||||
wr()
|
||||
count=checkpoint(count)
|
||||
return count
|
||||
|
||||
def load_cache(filename):
|
||||
cache={}
|
||||
if not os.path.exists(filename):
|
||||
return cache
|
||||
f=open(filename,'r')
|
||||
l=0
|
||||
for line in f.readlines():
|
||||
l+=1
|
||||
fields=line.split(' ')
|
||||
if fields==None or not len(fields)==2 or fields[0][0]!=':':
|
||||
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
|
||||
continue
|
||||
# put key:value in cache, key without ^:
|
||||
cache[fields[0][1:]]=fields[1].split('\n')[0]
|
||||
f.close()
|
||||
return cache
|
||||
|
||||
def save_cache(filename,cache):
|
||||
f=open(filename,'w+')
|
||||
map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
|
||||
f.close()
|
||||
|
||||
def verify_heads(ui,repo,cache):
|
||||
def getsha1(branch):
|
||||
f=open(os.getenv('GIT_DIR','/dev/null')+'/refs/heads/'+branch)
|
||||
sha1=f.readlines()[0].split('\n')[0]
|
||||
f.close()
|
||||
return sha1
|
||||
|
||||
for b in cache.keys():
|
||||
sys.stderr.write('Verifying branch [%s]\n' % b)
|
||||
sha1=getsha1(b)
|
||||
c=cache.get(b)
|
||||
if sha1!=c:
|
||||
sys.stderr.write('Warning: Branch [%s] modified outside hg2git:'
|
||||
'\n%s (repo) != %s (cache)\n' % (b,sha1,c))
|
||||
return True
|
||||
|
||||
if __name__=='__main__':
|
||||
if len(sys.argv)!=6: sys.exit(usage(1))
|
||||
repourl,m,marksfile,headsfile,tipfile=sys.argv[1:]
|
||||
_max=int(m)
|
||||
|
||||
marks_cache=load_cache(marksfile)
|
||||
heads_cache=load_cache(headsfile)
|
||||
state_cache=load_cache(tipfile)
|
||||
|
||||
ui,repo=setup_repo(repourl)
|
||||
|
||||
if not verify_heads(ui,repo,heads_cache):
|
||||
sys.exit(1)
|
||||
|
||||
tip=repo.changelog.count()
|
||||
|
||||
min=int(state_cache.get('tip',0))
|
||||
max=_max
|
||||
if _max<0:
|
||||
max=tip
|
||||
|
||||
c=int(state_cache.get('count',0))
|
||||
last={}
|
||||
for rev in range(min,max):
|
||||
c=export_commit(ui,repo,rev,marks_cache,heads_cache,last,tip,c)
|
||||
|
||||
c=export_tags(ui,repo,marks_cache,c)
|
||||
|
||||
state_cache['tip']=max
|
||||
state_cache['count']=c
|
||||
state_cache['repo']=repourl
|
||||
save_cache(tipfile,state_cache)
|
||||
Reference in New Issue
Block a user