diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4cab1f4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto diff --git a/.gitignore b/.gitignore index 2226203..7259ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +*.orig *.pyc .dotest +.idea/ diff --git a/README.md b/README.md index 3219240..34c6bf0 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,57 @@ if [ "$3" == "1" ]; then cat; else dos2unix; fi -- End of crlf-filter.sh -- ``` + +Plugins +----------------- + +hg-fast-export supports plugins to manipulate the file data and commit +metadata. The plugins are enabled with the --plugin option. The value +of said option is a plugin name (by folder in the plugins directory), +and optionally, and equals-sign followed by an initialization string. + +There is a readme accompanying each of the bundled plugins, with a +description of the usage. To create a new plugin, one must simply +add a new folder under the `plugins` directory, with the name of the +new plugin. Inside, there must be an `__init__.py` file, which contains +at a minimum: + +``` +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + pass + #Or don't pass, if you want to do some init code here +``` + +Beyond the boilerplate initialization, you can see the two different +defined filter methods in the [dos2unix](./plugins/dos2unix) and +[branch_name_in_commit](./plugins/branch_name_in_commit) plugins. + +``` +commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} + +def commit_message_filter(self,commit_data): +``` +The `commit_message_filter` method is called for each commit, after parsing +from hg, but before outputting to git. The dictionary `commit_data` contains the +above attributes about the commit, and can be modified by any filter. The +values in the dictionary after filters have been run are used to create the git +commit. + +``` +file_data = {'filename':filename,'file_ctx':file_ctx,'d':d} + +def file_data_filter(self,file_data): +``` +The `file_data_filter` method is called for each file within each commit. +The dictionary `file_data` contains the above attributes about the file, and +can be modified by any filter. `file_ctx` is the filecontext from the +mercurial python library. After all filters have been run, the values +are used to add the file to the git commit. + Notes/Limitations ----------------- diff --git a/hg-fast-export.py b/hg-fast-export.py index a21148e..e53b5dd 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -11,6 +11,7 @@ from optparse import OptionParser import re import sys import os +import pluginloader if sys.platform == "win32": # On Windows, sys.stdout is initially opened in text mode, which means that @@ -123,7 +124,7 @@ def get_author(logmessage,committer,authors): return r return committer -def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None): +def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) for file in files: @@ -137,18 +138,15 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=N filename=file file_ctx=ctx.filectx(file) d=file_ctx.data() - if filter_contents: - import subprocess - filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0'] - try: - filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE) - d,_=filter_proc.communicate(d) - except: - sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) - raise - filter_ret=filter_proc.poll() - if filter_ret: - raise subprocess.CalledProcessError(filter_ret,filter_cmd) + + if plugins and plugins['file_data_filters']: + file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} + for filter in plugins['file_data_filters']: + filter(file_data) + d=file_data['data'] + filename=file_data['filename'] + file_ctx=file_data['file_ctx'] + wr('M %s inline %s' % (gitmode(manifest.flags(file)), strip_leading_slash(filename))) wr('data %d' % len(d)) # had some trouble with size() @@ -198,7 +196,8 @@ def strip_leading_slash(filename): return filename def export_commit(ui,repo,revision,old_marks,max,count,authors, - branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None): + branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='', + plugins={}): def get_branchname(name): if brmap.has_key(name): return brmap[name] @@ -211,6 +210,16 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, branch=get_branchname(branch) parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] + author = get_author(desc,user,authors) + + if plugins and plugins['commit_message_filters']: + commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} + for filter in plugins['commit_message_filters']: + filter(commit_data) + branch = commit_data['branch'] + parents = commit_data['parents'] + author = commit_data['author'] + desc = commit_data['desc'] if len(parents)==0 and revision != 0: wr('reset refs/heads/%s' % branch) @@ -218,7 +227,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, wr('commit refs/heads/%s' % branch) wr('mark :%d' % (revision+1)) if sob: - wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone)) + wr('author %s %d %s' % (author,time,timezone)) wr('committer %s %d %s' % (user,time,timezone)) wr('data %d' % (len(desc)+1)) # wtf? wr(desc) @@ -259,8 +268,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, removed=[strip_leading_slash(x) for x in removed] map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents) - export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents) + export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) + export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) wr() return checkpoint(count) @@ -396,7 +405,8 @@ def verify_heads(ui,repo,cache,force,branchesmap): def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, authors={},branchesmap={},tagsmap={}, - sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None): + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='', + plugins={}): def check_cache(filename, contents): if len(contents) == 0: sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename) @@ -438,7 +448,8 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, brmap={} for rev in range(min,max): c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, - sob,brmap,hgtags,encoding,fn_encoding,filter_contents) + sob,brmap,hgtags,encoding,fn_encoding, + plugins) if notes: for rev in range(min,max): c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0) @@ -500,6 +511,10 @@ if __name__=='__main__': help="Assume mappings are raw = lines") parser.add_option("--filter-contents",dest="filter_contents", help="Pipe contents of each exported file through FILTER_CONTENTS ") + parser.add_option("--plugin-path", type="string", dest="pluginpath", + help="Additional search path for plugins ") + parser.add_option("--plugin", action="append", type="string", dest="plugins", + help="Add a plugin with the given init string ") (options,args)=parser.parse_args() @@ -538,13 +553,34 @@ if __name__=='__main__': if options.fn_encoding!=None: fn_encoding=options.fn_encoding - filter_contents=None + plugins=[] + if options.plugins!=None: + plugins+=options.plugins + if options.filter_contents!=None: - import shlex - filter_contents=shlex.split(options.filter_contents) + plugins+=['shell_filter_file_contents='+options.filter_contents] + + plugins_dict={} + plugins_dict['commit_message_filters']=[] + plugins_dict['file_data_filters']=[] + + if plugins and options.pluginpath: + sys.stderr.write('Using additional plugin path: ' + options.pluginpath + '\n') + + for plugin in plugins: + split = plugin.split('=') + name, opts = split[0], '='.join(split[1:]) + i = pluginloader.get_plugin(name,options.pluginpath) + sys.stderr.write('Loaded plugin ' + i['name'] + ' from path: ' + i['path'] +' with opts: ' + opts + '\n') + plugin = pluginloader.load_plugin(i).build_filter(opts) + if hasattr(plugin,'file_data_filter') and callable(plugin.file_data_filter): + plugins_dict['file_data_filters'].append(plugin.file_data_filter) + if hasattr(plugin, 'commit_message_filter') and callable(plugin.commit_message_filter): + plugins_dict['commit_message_filters'].append(plugin.commit_message_filter) sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, options.headsfile, options.statusfile, authors=a,branchesmap=b,tagsmap=t, sob=options.sob,force=options.force,hgtags=options.hgtags, - notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents)) + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding, + plugins=plugins_dict)) diff --git a/hg-fast-export.sh b/hg-fast-export.sh index 531b3c5..6239253 100755 --- a/hg-fast-export.sh +++ b/hg-fast-export.sh @@ -58,6 +58,8 @@ Options: --mappings-are-raw Assume mappings are raw = lines --filter-contents Pipe contents of each exported file through with as arguments + --plugin Add a plugin with the given init string (repeatable) + --plugin-path Add an additional plugin lookup path " case "$1" in -h|--help) diff --git a/pluginloader/__init__.py b/pluginloader/__init__.py new file mode 100644 index 0000000..82373a5 --- /dev/null +++ b/pluginloader/__init__.py @@ -0,0 +1,19 @@ +import os +import imp +PluginFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),"..","plugins") +MainModule = "__init__" + +def get_plugin(name, plugin_path): + search_dirs = [PluginFolder] + if plugin_path: + search_dirs = [plugin_path] + search_dirs + for dir in search_dirs: + location = os.path.join(dir, name) + if not os.path.isdir(location) or not MainModule + ".py" in os.listdir(location): + continue + info = imp.find_module(MainModule, [location]) + return {"name": name, "info": info, "path": location} + raise Exception("Could not find plugin with name " + name) + +def load_plugin(plugin): + return imp.load_module(MainModule, *plugin["info"]) diff --git a/plugins/branch_name_in_commit/README.md b/plugins/branch_name_in_commit/README.md new file mode 100644 index 0000000..b11982b --- /dev/null +++ b/plugins/branch_name_in_commit/README.md @@ -0,0 +1,10 @@ +## Branch Name in Commit Message + +Mercurial has a much stronger notion of branches than Git, +and some parties may not wish to lose the branch information +during the migration to Git. You can use this plugin to either +prepend or append the branch name from the mercurial +commit into the commit message in Git. + +To use the plugin, add +`--plugin branch_name_in_commit=(start|end)`. diff --git a/plugins/branch_name_in_commit/__init__.py b/plugins/branch_name_in_commit/__init__.py new file mode 100644 index 0000000..20abe5b --- /dev/null +++ b/plugins/branch_name_in_commit/__init__.py @@ -0,0 +1,14 @@ +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + if not args in ['start','end']: + raise Exception('Cannot have branch name anywhere but start and end') + self.pos = args + + def commit_message_filter(self,commit_data): + if self.pos == 'start': + commit_data['desc'] = commit_data['branch'] + '\n' + commit_data['desc'] + if self.pos == 'end': + commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch'] diff --git a/plugins/dos2unix/README.md b/plugins/dos2unix/README.md new file mode 100644 index 0000000..5f35f11 --- /dev/null +++ b/plugins/dos2unix/README.md @@ -0,0 +1,9 @@ +## Dos2unix filter + +This plugin converts CRLF line ending to LF in text files in the repo. +It is recommended that you have a .gitattributes file that maintains +the usage of LF endings going forward, for after you have converted your +repository. + +To use the plugin, add +`--plugin dos2unix`. diff --git a/plugins/dos2unix/__init__.py b/plugins/dos2unix/__init__.py new file mode 100644 index 0000000..bf676a0 --- /dev/null +++ b/plugins/dos2unix/__init__.py @@ -0,0 +1,11 @@ +def build_filter(args): + return Filter(args) + +class Filter(): + def __init__(self, args): + pass + + def file_data_filter(self,file_data): + file_ctx = file_data['file_ctx'] + if not file_ctx.isbinary(): + file_data['data'] = file_data['data'].replace('\r\n', '\n') diff --git a/plugins/shell_filter_file_contents/README.md b/plugins/shell_filter_file_contents/README.md new file mode 100644 index 0000000..108cd2a --- /dev/null +++ b/plugins/shell_filter_file_contents/README.md @@ -0,0 +1,30 @@ +## Shell Script File Filter + +This plugin uses shell scripts in order to perform filtering of files. +If your preferred scripting is done via shell, this tool is for you. +Be noted, though, that this method can cause an order of magnitude slow +down. For small repositories, this wont be an issue. + +To use the plugin, add +`--plugin shell_filter_file_contents=path/to/shell/script.sh`. +The filter script is supplied to the plugin option after the plugin name, +which is in turned passed to the plugin initialization. hg-fast-export +runs the filter for each exported file, pipes its content to the filter's +standard input, and uses the filter's standard output in place +of the file's original content. An example use of this feature +is to convert line endings in text files from CRLF to git's preferred LF, +although this task is faster performed using the native plugin. + +The script is called with the following syntax: +`FILTER_CONTENTS ` + +``` +-- Start of crlf-filter.sh -- +#!/bin/sh +# $1 = pathname of exported file relative to the root of the repo +# $2 = Mercurial's hash of the file +# $3 = "1" if Mercurial reports the file as binary, otherwise "0" + +if [ "$3" == "1" ]; then cat; else dos2unix; fi +-- End of crlf-filter.sh -- +``` diff --git a/plugins/shell_filter_file_contents/__init__.py b/plugins/shell_filter_file_contents/__init__.py new file mode 100644 index 0000000..84fd938 --- /dev/null +++ b/plugins/shell_filter_file_contents/__init__.py @@ -0,0 +1,28 @@ +#Pipe contents of each exported file through FILTER_CONTENTS " +import subprocess +import shlex +import sys +from mercurial import node + +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + self.filter_contents = shlex.split(args) + + def file_data_filter(self,file_data): + d = file_data['data'] + file_ctx = file_data['file_ctx'] + filename = file_data['filename'] + filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0'] + try: + filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + d, _ = filter_proc.communicate(d) + except: + sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) + raise + filter_ret = filter_proc.poll() + if filter_ret: + raise subprocess.CalledProcessError(filter_ret, filter_cmd) + file_data['data'] = d