| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-25 11:25:22 +01:00
										 |  |  | # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others. | 
					
						
							| 
									
										
										
										
											2007-03-14 10:29:24 +00:00
										 |  |  | # License: MIT <http://www.opensource.org/licenses/mit-license.php> | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-07 01:35:09 +02:00
										 |  |  | from mercurial import node | 
					
						
							| 
									
										
										
										
											2008-09-18 21:10:16 +02:00
										 |  |  | from hg2git import setup_repo,fixup_user,get_branch,get_changeset | 
					
						
							| 
									
										
										
										
											2008-09-19 18:31:53 +02:00
										 |  |  | from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  | from optparse import OptionParser | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | import re | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-09-30 07:24:16 +01:00
										 |  |  | if sys.platform == "win32": | 
					
						
							|  |  |  |   # On Windows, sys.stdout is initially opened in text mode, which means that | 
					
						
							|  |  |  |   # when a LF (\n) character is written to sys.stdout, it will be converted | 
					
						
							|  |  |  |   # into CRLF (\r\n).  That makes git blow up, so use this platform-specific | 
					
						
							|  |  |  |   # code to change the mode of sys.stdout to binary. | 
					
						
							|  |  |  |   import msvcrt | 
					
						
							|  |  |  |   msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-09 12:07:08 +00:00
										 |  |  | # silly regex to catch Signed-off-by lines in log message | 
					
						
							|  |  |  | sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') | 
					
						
							| 
									
										
										
										
											2007-03-08 09:37:23 +00:00
										 |  |  | # insert 'checkpoint' command after this many commits or none at all if 0 | 
					
						
							|  |  |  | cfg_checkpoint_count=0 | 
					
						
							| 
									
										
										
										
											2007-03-13 10:59:22 +00:00
										 |  |  | # write some progress message every this many file contents written | 
					
						
							|  |  |  | cfg_export_boundary=1000 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-17 14:11:53 +02:00
										 |  |  | def gitmode(flags): | 
					
						
							|  |  |  |   return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def wr(msg=''): | 
					
						
							| 
									
										
										
										
											2012-03-21 23:16:11 +00:00
										 |  |  |   if msg: | 
					
						
							|  |  |  |     sys.stdout.write(msg) | 
					
						
							|  |  |  |   sys.stdout.write('\n') | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |   #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def checkpoint(count): | 
					
						
							|  |  |  |   count=count+1 | 
					
						
							| 
									
										
										
										
											2007-03-08 09:37:23 +00:00
										 |  |  |   if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |     sys.stderr.write("Checkpoint after %d commits\n" % count) | 
					
						
							|  |  |  |     wr('checkpoint') | 
					
						
							|  |  |  |     wr() | 
					
						
							|  |  |  |   return count | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  | def revnum_to_revref(rev, old_marks): | 
					
						
							|  |  |  |   """Convert an hg revnum to a git-fast-import rev reference (an SHA1
 | 
					
						
							|  |  |  |   or a mark)"""
 | 
					
						
							|  |  |  |   return old_marks.get(rev) or ':%d' % (rev+1) | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  | def file_mismatch(f1,f2): | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |   """See if two revisions of a file are not equal.""" | 
					
						
							|  |  |  |   return node.hex(f1)!=node.hex(f2) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  | def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch): | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |   """Loop over our repository and find all changed and missing files.""" | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |   for left in dleft.keys(): | 
					
						
							|  |  |  |     right=dright.get(left,None) | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |     if right==None: | 
					
						
							|  |  |  |       # we have the file but our parent hasn't: add to left set | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |       l.append(left) | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  |     elif match(dleft[left],right): | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |       # we have it but checksums mismatch: add to center set | 
					
						
							|  |  |  |       c.append(left) | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |   for right in dright.keys(): | 
					
						
							|  |  |  |     left=dleft.get(right,None) | 
					
						
							|  |  |  |     if left==None: | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |       # if parent has file but we don't: add to right set | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |       r.append(right) | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |     # change is already handled when comparing child against parent | 
					
						
							|  |  |  |   return l,c,r | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def get_filechanges(repo,revision,parents,mleft): | 
					
						
							|  |  |  |   """Given some repository and revision, find all changed/deleted files.""" | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |   l,c,r=[],[],[] | 
					
						
							| 
									
										
										
										
											2007-03-07 01:52:58 +00:00
										 |  |  |   for p in parents: | 
					
						
							|  |  |  |     if p<0: continue | 
					
						
							|  |  |  |     mright=repo.changectx(p).manifest() | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  |     l,c,r=split_dict(mleft,mright,l,c,r) | 
					
						
							|  |  |  |   l.sort() | 
					
						
							|  |  |  |   c.sort() | 
					
						
							|  |  |  |   r.sort() | 
					
						
							| 
									
										
										
										
											2007-03-07 11:38:56 +00:00
										 |  |  |   return l,c,r | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-09 12:07:08 +00:00
										 |  |  | def get_author(logmessage,committer,authors): | 
					
						
							|  |  |  |   """As git distincts between author and committer of a patch, try to
 | 
					
						
							|  |  |  |   extract author by detecting Signed-off-by lines. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   This walks from the end of the log message towards the top skipping | 
					
						
							|  |  |  |   empty lines. Upon the first non-empty line, it walks all Signed-off-by | 
					
						
							|  |  |  |   lines upwards to find the first one. For that (if found), it extracts | 
					
						
							|  |  |  |   authorship information the usual way (authors table, cleaning, etc.) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   If no Signed-off-by line is found, this defaults to the committer. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   This may sound stupid (and it somehow is), but in log messages we | 
					
						
							|  |  |  |   accidentially may have lines in the middle starting with | 
					
						
							|  |  |  |   "Signed-off-by: foo" and thus matching our detection regex. Prevent | 
					
						
							|  |  |  |   that."""
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   loglines=logmessage.split('\n') | 
					
						
							|  |  |  |   i=len(loglines) | 
					
						
							|  |  |  |   # from tail walk to top skipping empty lines | 
					
						
							|  |  |  |   while i>=0: | 
					
						
							|  |  |  |     i-=1 | 
					
						
							|  |  |  |     if len(loglines[i].strip())==0: continue | 
					
						
							|  |  |  |     break | 
					
						
							|  |  |  |   if i>=0: | 
					
						
							|  |  |  |     # walk further upwards to find first sob line, store in 'first' | 
					
						
							|  |  |  |     first=None | 
					
						
							|  |  |  |     while i>=0: | 
					
						
							|  |  |  |       m=sob_re.match(loglines[i]) | 
					
						
							|  |  |  |       if m==None: break | 
					
						
							|  |  |  |       first=m | 
					
						
							|  |  |  |       i-=1 | 
					
						
							|  |  |  |     # if the last non-empty line matches our Signed-Off-by regex: extract username | 
					
						
							|  |  |  |     if first!=None: | 
					
						
							|  |  |  |       r=fixup_user(first.group(1),authors) | 
					
						
							|  |  |  |       return r | 
					
						
							|  |  |  |   return committer | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-13 10:59:22 +00:00
										 |  |  | def export_file_contents(ctx,manifest,files): | 
					
						
							|  |  |  |   count=0 | 
					
						
							|  |  |  |   max=len(files) | 
					
						
							|  |  |  |   for file in files: | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |     # Skip .hgtags files. They only get us in trouble. | 
					
						
							|  |  |  |     if file == ".hgtags": | 
					
						
							|  |  |  |       sys.stderr.write('Skip %s\n' % (file)) | 
					
						
							|  |  |  |       continue | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  |     d=ctx.filectx(file).data() | 
					
						
							| 
									
										
										
										
											2008-08-17 14:11:53 +02:00
										 |  |  |     wr('M %s inline %s' % (gitmode(manifest.flags(file)),file)) | 
					
						
							| 
									
										
										
										
											2007-03-13 10:59:22 +00:00
										 |  |  |     wr('data %d' % len(d)) # had some trouble with size() | 
					
						
							|  |  |  |     wr(d) | 
					
						
							|  |  |  |     count+=1 | 
					
						
							|  |  |  |     if count%cfg_export_boundary==0: | 
					
						
							|  |  |  |       sys.stderr.write('Exported %d/%d files\n' % (count,max)) | 
					
						
							|  |  |  |   if max>cfg_export_boundary: | 
					
						
							|  |  |  |     sys.stderr.write('Exported %d/%d files\n' % (count,max)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-26 16:06:40 +02:00
										 |  |  | def sanitize_name(name,what="branch"): | 
					
						
							|  |  |  |   """Sanitize input roughly according to git-check-ref-format(1)""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   def dot(name): | 
					
						
							|  |  |  |     if name[0] == '.': return '_'+name[1:] | 
					
						
							|  |  |  |     return name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   n=name | 
					
						
							| 
									
										
										
											
												hg-fast-export.py: sanitize tildes (~) in branch names
In git-check-ref-format (1), there is the following rule for refnames:
	3. It cannot have ASCII control character (i.e. bytes
	   whose values are lower than \040, or \177 DEL), space,
	   tilde ~, caret ^, colon :, question-mark ?, asterisk *,
	   or open bracket [ anywhere;
and indeed, this rule is enforced by "git fast-import". hg-fast-export
already checked for all of the visible characters listed except for ~
and converted them to underscores. For some reason the tilde was
forgotten. This patch makes good on the omission.
Note that control characters are still left alone.
Signed-off-by: Jonathan Nieder <jrnieder@uchicago.edu>
Signed-off-by: Rocco Rutte <pdmef@gmx.net>
											
										 
											2008-06-02 13:23:48 -05:00
										 |  |  |   p=re.compile('([[ ~^:?*]|\.\.)') | 
					
						
							| 
									
										
										
										
											2007-10-26 16:06:40 +02:00
										 |  |  |   n=p.sub('_', n) | 
					
						
							| 
									
										
										
										
											2012-05-16 23:44:00 +02:00
										 |  |  |   if n[-1] in ('/', '.'): n=n[:-1]+'_' | 
					
						
							| 
									
										
										
										
											2007-10-26 16:06:40 +02:00
										 |  |  |   n='/'.join(map(dot,n.split('/'))) | 
					
						
							|  |  |  |   p=re.compile('_+') | 
					
						
							|  |  |  |   n=p.sub('_', n) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if n!=name: | 
					
						
							|  |  |  |     sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) | 
					
						
							|  |  |  |   return n | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  | def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap): | 
					
						
							| 
									
										
										
										
											2007-10-26 17:11:57 +02:00
										 |  |  |   def get_branchname(name): | 
					
						
							|  |  |  |     if brmap.has_key(name): | 
					
						
							|  |  |  |       return brmap[name] | 
					
						
							|  |  |  |     n=sanitize_name(name) | 
					
						
							|  |  |  |     brmap[name]=n | 
					
						
							|  |  |  |     return n | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  |   (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-26 17:11:57 +02:00
										 |  |  |   branch=get_branchname(branch) | 
					
						
							| 
									
										
										
										
											2007-10-26 16:06:40 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-12-05 23:13:57 +00:00
										 |  |  |   parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if len(parents)==0 and revision != 0: | 
					
						
							|  |  |  |     wr('reset refs/heads/%s' % branch) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   wr('commit refs/heads/%s' % branch) | 
					
						
							|  |  |  |   wr('mark :%d' % (revision+1)) | 
					
						
							| 
									
										
										
										
											2007-03-12 08:00:18 +00:00
										 |  |  |   if sob: | 
					
						
							|  |  |  |     wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone)) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   wr('committer %s %d %s' % (user,time,timezone)) | 
					
						
							|  |  |  |   wr('data %d' % (len(desc)+1)) # wtf? | 
					
						
							|  |  |  |   wr(desc) | 
					
						
							|  |  |  |   wr() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |   # Sort the parents based on revision ids so that we always get the | 
					
						
							|  |  |  |   # same resulting git repo, no matter how the revisions were | 
					
						
							|  |  |  |   # numbered. | 
					
						
							|  |  |  |   parents.sort(key=repo.changelog.node, reverse=True) | 
					
						
							| 
									
										
										
										
											2007-03-06 19:47:51 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   ctx=repo.changectx(str(revision)) | 
					
						
							|  |  |  |   man=ctx.manifest() | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  |   added,changed,removed,type=[],[],[],'' | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |   if len(parents) == 0: | 
					
						
							| 
									
										
										
										
											2007-03-13 10:59:22 +00:00
										 |  |  |     # first revision: feed in full manifest | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  |     added=man.keys() | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  |     added.sort() | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  |     type='full' | 
					
						
							|  |  |  |   else: | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |     wr('from %s' % revnum_to_revref(parents[0], old_marks)) | 
					
						
							|  |  |  |     if len(parents) == 1: | 
					
						
							|  |  |  |       # later non-merge revision: feed in changed manifest | 
					
						
							|  |  |  |       # if we have exactly one parent, just take the changes from the | 
					
						
							|  |  |  |       # manifest without expensively comparing checksums | 
					
						
							|  |  |  |       f=repo.status(repo.lookup(parents[0]),revnode)[:3] | 
					
						
							|  |  |  |       added,changed,removed=f[1],f[0],f[2] | 
					
						
							|  |  |  |       type='simple delta' | 
					
						
							|  |  |  |     else: # a merge with two parents | 
					
						
							|  |  |  |       wr('merge %s' % revnum_to_revref(parents[1], old_marks)) | 
					
						
							|  |  |  |       # later merge revision: feed in changed manifest | 
					
						
							|  |  |  |       # for many files comparing checksums is expensive so only do it for | 
					
						
							|  |  |  |       # merges where we really need it due to hg's revlog logic | 
					
						
							|  |  |  |       added,changed,removed=get_filechanges(repo,revision,parents,man) | 
					
						
							|  |  |  |       type='thorough delta' | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-22 10:06:58 +02:00
										 |  |  |   sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % | 
					
						
							|  |  |  |       (branch,type,revision+1,max,len(added),len(changed),len(removed))) | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   map(lambda r: wr('D %s' % r),removed) | 
					
						
							| 
									
										
										
										
											2007-10-25 15:21:46 +02:00
										 |  |  |   export_file_contents(ctx,man,added) | 
					
						
							|  |  |  |   export_file_contents(ctx,man,changed) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   wr() | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   return checkpoint(count) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  | def export_tags(ui,repo,old_marks,mapping_cache,count,authors): | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   l=repo.tagslist() | 
					
						
							|  |  |  |   for tag,node in l: | 
					
						
							| 
									
										
										
										
											2007-10-26 16:06:40 +02:00
										 |  |  |     tag=sanitize_name(tag,"tag") | 
					
						
							| 
									
										
										
										
											2007-03-07 11:33:03 +00:00
										 |  |  |     # ignore latest revision | 
					
						
							|  |  |  |     if tag=='tip': continue | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |     # ignore tags to nodes that are missing (ie, 'in the future') | 
					
						
							|  |  |  |     if node.encode('hex_codec') not in mapping_cache: | 
					
						
							|  |  |  |       sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) | 
					
						
							|  |  |  |       continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     rev=int(mapping_cache[node.encode('hex_codec')]) | 
					
						
							| 
									
										
										
										
											2007-03-07 11:33:03 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |     ref=revnum_to_revref(rev, old_marks) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |     if ref==None: | 
					
						
							| 
									
										
										
										
											2007-03-07 11:33:03 +00:00
										 |  |  |       sys.stderr.write('Failed to find reference for creating tag' | 
					
						
							|  |  |  |           ' %s at r%d\n' % (tag,rev)) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |       continue | 
					
						
							|  |  |  |     sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) | 
					
						
							| 
									
										
										
										
											2007-03-14 08:34:18 +00:00
										 |  |  |     wr('reset refs/tags/%s' % tag) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |     wr('from %s' % ref) | 
					
						
							|  |  |  |     wr() | 
					
						
							|  |  |  |     count=checkpoint(count) | 
					
						
							|  |  |  |   return count | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-12 08:54:30 +00:00
										 |  |  | def load_authors(filename): | 
					
						
							|  |  |  |   cache={} | 
					
						
							|  |  |  |   if not os.path.exists(filename): | 
					
						
							|  |  |  |     return cache | 
					
						
							|  |  |  |   f=open(filename,'r') | 
					
						
							|  |  |  |   l=0 | 
					
						
							| 
									
										
										
										
											2007-03-12 10:26:46 +00:00
										 |  |  |   lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') | 
					
						
							| 
									
										
										
										
											2007-03-12 08:54:30 +00:00
										 |  |  |   for line in f.readlines(): | 
					
						
							|  |  |  |     l+=1 | 
					
						
							|  |  |  |     m=lre.match(line) | 
					
						
							|  |  |  |     if m==None: | 
					
						
							|  |  |  |       sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) | 
					
						
							|  |  |  |       continue | 
					
						
							|  |  |  |     # put key:value in cache, key without ^: | 
					
						
							| 
									
										
										
										
											2007-03-12 10:45:32 +00:00
										 |  |  |     cache[m.group(1).strip()]=m.group(2).strip() | 
					
						
							| 
									
										
										
										
											2007-03-12 08:54:30 +00:00
										 |  |  |   f.close() | 
					
						
							|  |  |  |   sys.stderr.write('Loaded %d authors\n' % l) | 
					
						
							|  |  |  |   return cache | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-10 08:32:27 -02:00
										 |  |  | def branchtip(repo, heads): | 
					
						
							|  |  |  |   '''return the tipmost branch head in heads''' | 
					
						
							|  |  |  |   tip = heads[-1] | 
					
						
							|  |  |  |   for h in reversed(heads): | 
					
						
							| 
									
										
										
										
											2014-03-14 22:18:08 -07:00
										 |  |  |     if 'close' not in repo.changelog.read(h)[5]: | 
					
						
							| 
									
										
										
										
											2014-02-10 08:32:27 -02:00
										 |  |  |       tip = h | 
					
						
							|  |  |  |       break | 
					
						
							|  |  |  |   return tip | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  | def verify_heads(ui,repo,cache,force): | 
					
						
							| 
									
										
										
										
											2014-03-14 21:06:53 -07:00
										 |  |  |   branches={} | 
					
						
							|  |  |  |   for bn, heads in repo.branchmap().iteritems(): | 
					
						
							|  |  |  |     branches[bn] = branchtip(repo, heads) | 
					
						
							| 
									
										
										
										
											2007-03-10 14:28:45 +00:00
										 |  |  |   l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] | 
					
						
							|  |  |  |   l.sort() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-13 16:31:57 +00:00
										 |  |  |   # get list of hg's branches to verify, don't take all git has | 
					
						
							| 
									
										
										
										
											2007-03-10 14:28:45 +00:00
										 |  |  |   for _,_,b in l: | 
					
						
							|  |  |  |     b=get_branch(b) | 
					
						
							| 
									
										
										
										
											2007-03-19 09:27:37 +00:00
										 |  |  |     sha1=get_git_sha1(b) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |     c=cache.get(b) | 
					
						
							|  |  |  |     if sha1!=c: | 
					
						
							| 
									
										
										
										
											2007-03-14 10:29:24 +00:00
										 |  |  |       sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:' | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |         '\n%s (repo) != %s (cache)\n' % (b,sha1,c)) | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |       if not force: return False | 
					
						
							| 
									
										
										
										
											2007-03-13 16:31:57 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   # verify that branch has exactly one head | 
					
						
							|  |  |  |   t={} | 
					
						
							|  |  |  |   for h in repo.heads(): | 
					
						
							| 
									
										
										
										
											2007-03-14 10:02:15 +00:00
										 |  |  |     (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) | 
					
						
							| 
									
										
										
										
											2007-03-13 16:31:57 +00:00
										 |  |  |     if t.get(branch,False): | 
					
						
							|  |  |  |       sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' % | 
					
						
							|  |  |  |           repo.changelog.rev(h)) | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |       if not force: return False | 
					
						
							| 
									
										
										
										
											2007-03-13 16:31:57 +00:00
										 |  |  |     t[branch]=True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  | def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False): | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   _max=int(m) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |   old_marks=load_cache(marksfile,lambda s: int(s)-1) | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   mapping_cache=load_cache(mappingfile) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   heads_cache=load_cache(headsfile) | 
					
						
							|  |  |  |   state_cache=load_cache(tipfile) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ui,repo=setup_repo(repourl) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |   if not verify_heads(ui,repo,heads_cache,force): | 
					
						
							| 
									
										
										
										
											2007-03-08 11:21:21 +00:00
										 |  |  |     return 1 | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-19 08:01:53 +02:00
										 |  |  |   try: | 
					
						
							|  |  |  |     tip=repo.changelog.count() | 
					
						
							|  |  |  |   except AttributeError: | 
					
						
							|  |  |  |     tip=len(repo) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   min=int(state_cache.get('tip',0)) | 
					
						
							|  |  |  |   max=_max | 
					
						
							| 
									
										
										
										
											2007-10-25 15:23:17 +02:00
										 |  |  |   if _max<0 or max>tip: | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |     max=tip | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   for rev in range(0,max): | 
					
						
							|  |  |  |   	(revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) | 
					
						
							|  |  |  |   	mapping_cache[revnode.encode('hex_codec')] = str(rev) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-07 11:06:34 +00:00
										 |  |  |   c=0 | 
					
						
							| 
									
										
										
										
											2007-10-26 17:11:57 +02:00
										 |  |  |   brmap={} | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  |   for rev in range(min,max): | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |     c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap) | 
					
						
							| 
									
										
										
										
											2007-03-06 17:00:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   state_cache['tip']=max | 
					
						
							|  |  |  |   state_cache['repo']=repourl | 
					
						
							|  |  |  |   save_cache(tipfile,state_cache) | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   save_cache(mappingfile,mapping_cache) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-09-20 10:55:24 +01:00
										 |  |  |   c=export_tags(ui,repo,old_marks,mapping_cache,c,authors) | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |   sys.stderr.write('Issued %d commands\n' % c) | 
					
						
							| 
									
										
										
										
											2007-03-08 11:21:21 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__=='__main__': | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  |   def bail(parser,opt): | 
					
						
							|  |  |  |     sys.stderr.write('Error: No %s option given\n' % opt) | 
					
						
							|  |  |  |     parser.print_help() | 
					
						
							|  |  |  |     sys.exit(2) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   parser=OptionParser() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   parser.add_option("-m","--max",type="int",dest="max", | 
					
						
							|  |  |  |       help="Maximum hg revision to import") | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   parser.add_option("--mapping",dest="mappingfile", | 
					
						
							|  |  |  |       help="File to read last run's hg-to-git SHA1 mapping") | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  |   parser.add_option("--marks",dest="marksfile", | 
					
						
							|  |  |  |       help="File to read git-fast-import's marks from") | 
					
						
							|  |  |  |   parser.add_option("--heads",dest="headsfile", | 
					
						
							|  |  |  |       help="File to read last run's git heads from") | 
					
						
							|  |  |  |   parser.add_option("--status",dest="statusfile", | 
					
						
							|  |  |  |       help="File to read status from") | 
					
						
							|  |  |  |   parser.add_option("-r","--repo",dest="repourl", | 
					
						
							|  |  |  |       help="URL of repo to import") | 
					
						
							| 
									
										
										
										
											2007-03-12 08:00:18 +00:00
										 |  |  |   parser.add_option("-s",action="store_true",dest="sob", | 
					
						
							|  |  |  |       default=False,help="Enable parsing Signed-off-by lines") | 
					
						
							| 
									
										
										
										
											2007-03-12 08:54:30 +00:00
										 |  |  |   parser.add_option("-A","--authors",dest="authorfile", | 
					
						
							|  |  |  |       help="Read authormap from AUTHORFILE") | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |   parser.add_option("-f","--force",action="store_true",dest="force", | 
					
						
							|  |  |  |       default=False,help="Ignore validation errors by force") | 
					
						
							| 
									
										
										
										
											2008-09-18 21:10:16 +02:00
										 |  |  |   parser.add_option("-M","--default-branch",dest="default_branch", | 
					
						
							|  |  |  |       help="Set the default branch") | 
					
						
							| 
									
										
										
										
											2008-09-19 18:31:53 +02:00
										 |  |  |   parser.add_option("-o","--origin",dest="origin_name", | 
					
						
							|  |  |  |       help="use <name> as namespace to track upstream") | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   (options,args)=parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-12 08:00:18 +00:00
										 |  |  |   m=-1 | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  |   if options.max!=None: m=options.max | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if options.marksfile==None: bail(parser,'--marks') | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   if options.mappingfile==None: bail(parser,'--mapping') | 
					
						
							| 
									
										
										
										
											2007-03-19 09:05:51 +00:00
										 |  |  |   if options.headsfile==None: bail(parser,'--heads') | 
					
						
							|  |  |  |   if options.statusfile==None: bail(parser,'--status') | 
					
						
							|  |  |  |   if options.repourl==None: bail(parser,'--repo') | 
					
						
							| 
									
										
										
										
											2007-03-12 07:33:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-12 08:54:30 +00:00
										 |  |  |   a={} | 
					
						
							|  |  |  |   if options.authorfile!=None: | 
					
						
							|  |  |  |     a=load_authors(options.authorfile) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-18 21:10:16 +02:00
										 |  |  |   if options.default_branch!=None: | 
					
						
							|  |  |  |     set_default_branch(options.default_branch) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-19 18:31:53 +02:00
										 |  |  |   if options.origin_name!=None: | 
					
						
							|  |  |  |     set_origin_name(options.origin_name) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-11 09:05:05 -05:00
										 |  |  |   sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,options.headsfile, | 
					
						
							| 
									
										
										
										
											2007-03-13 16:43:20 +00:00
										 |  |  |     options.statusfile,authors=a,sob=options.sob,force=options.force)) |