From 850094c4987d9f7c5f8aec3eea1012f6694b8f16 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:23:04 -0800 Subject: [PATCH 1/5] Add gitattributes, additional ignores --- .gitattributes | 2 ++ .gitignore | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4cab1f4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto diff --git a/.gitignore b/.gitignore index 2226203..7259ca6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +*.orig *.pyc .dotest +.idea/ From e895ce087f436c5e27247f4fc2e07e5565fd06d2 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:23:35 -0800 Subject: [PATCH 2/5] Add plugin system --- README.md | 48 +++++++++++++++++++++++++++++ hg-fast-export.py | 66 +++++++++++++++++++++++++++++++++++----- hg-fast-export.sh | 2 ++ pluginloader/__init__.py | 19 ++++++++++++ 4 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 pluginloader/__init__.py diff --git a/README.md b/README.md index 3219240..bb5815d 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,54 @@ if [ "$3" == "1" ]; then cat; else dos2unix; fi -- End of crlf-filter.sh -- ``` + +Plugins +----------------- + +hg-fast-export supports plugins to manipulate the file data and commit +metadata. The plugins are enabled with the --plugin option. The value +of said option is a plugin name (by folder in the plugins directory), +and optionally, and equals-sign followed by an initialization string. + +There is a readme accompanying each of the bundled plugins, with a +description of the usage. To create a new plugin, one must simply +add a new folder under the `plugins` directory, with the name of the +new plugin. Inside, there must be an `__init__.py` file, which contains +at a minimum: + +``` +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + pass + #Or don't pass, if you want to do some init code here +``` + + +``` +commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} + +def commit_message_filter(self,commit_data): +``` +The `commit_message_filter` method is called for each commit, after parsing +from hg, but before outputting to git. The dictionary `commit_data` contains the +above attributes about the commit, and can be modified by any filter. The +values in the dictionary after filters have been run are used to create the git +commit. + +``` +file_data = {'filename':filename,'file_ctx':file_ctx,'d':d} + +def file_data_filter(self,file_data): +``` +The `file_data_filter` method is called for each file within each commit. +The dictionary `file_data` contains the above attributes about the file, and +can be modified by any filter. `file_ctx` is the filecontext from the +mercurial python library. After all filters have been run, the values +are used to add the file to the git commit. + Notes/Limitations ----------------- diff --git a/hg-fast-export.py b/hg-fast-export.py index a21148e..253055d 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -11,6 +11,7 @@ from optparse import OptionParser import re import sys import os +import pluginloader if sys.platform == "win32": # On Windows, sys.stdout is initially opened in text mode, which means that @@ -123,7 +124,7 @@ def get_author(logmessage,committer,authors): return r return committer -def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None): +def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None,plugins={}): count=0 max=len(files) for file in files: @@ -149,6 +150,15 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=N filter_ret=filter_proc.poll() if filter_ret: raise subprocess.CalledProcessError(filter_ret,filter_cmd) + + if plugins and plugins['file_data_filters']: + file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} + for filter in plugins['file_data_filters']: + filter(file_data) + d=file_data['data'] + filename=file_data['filename'] + file_ctx=file_data['file_ctx'] + wr('M %s inline %s' % (gitmode(manifest.flags(file)), strip_leading_slash(filename))) wr('data %d' % len(d)) # had some trouble with size() @@ -198,7 +208,8 @@ def strip_leading_slash(filename): return filename def export_commit(ui,repo,revision,old_marks,max,count,authors, - branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None): + branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None, + plugins={}): def get_branchname(name): if brmap.has_key(name): return brmap[name] @@ -211,6 +222,16 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, branch=get_branchname(branch) parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] + author = get_author(desc,user,authors) + + if plugins and plugins['commit_message_filters']: + commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} + for filter in plugins['commit_message_filters']: + filter(commit_data) + branch = commit_data['branch'] + parents = commit_data['parents'] + author = commit_data['author'] + desc = commit_data['desc'] if len(parents)==0 and revision != 0: wr('reset refs/heads/%s' % branch) @@ -218,7 +239,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, wr('commit refs/heads/%s' % branch) wr('mark :%d' % (revision+1)) if sob: - wr('author %s %d %s' % (get_author(desc,user,authors),time,timezone)) + wr('author %s %d %s' % (author,time,timezone)) wr('committer %s %d %s' % (user,time,timezone)) wr('data %d' % (len(desc)+1)) # wtf? wr(desc) @@ -259,8 +280,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, removed=[strip_leading_slash(x) for x in removed] map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents) - export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents) + export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents,plugins) + export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents,plugins) wr() return checkpoint(count) @@ -396,7 +417,8 @@ def verify_heads(ui,repo,cache,force,branchesmap): def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, authors={},branchesmap={},tagsmap={}, - sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None): + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None, + plugins={}): def check_cache(filename, contents): if len(contents) == 0: sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename) @@ -438,7 +460,8 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, brmap={} for rev in range(min,max): c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, - sob,brmap,hgtags,encoding,fn_encoding,filter_contents) + sob,brmap,hgtags,encoding,fn_encoding,filter_contents, + plugins) if notes: for rev in range(min,max): c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0) @@ -500,6 +523,10 @@ if __name__=='__main__': help="Assume mappings are raw = lines") parser.add_option("--filter-contents",dest="filter_contents", help="Pipe contents of each exported file through FILTER_CONTENTS ") + parser.add_option("--plugin-path", type="string", dest="pluginpath", + help="Additional search path for plugins ") + parser.add_option("--plugin", action="append", type="string", dest="plugins", + help="Add a plugin with the given init string ") (options,args)=parser.parse_args() @@ -538,13 +565,36 @@ if __name__=='__main__': if options.fn_encoding!=None: fn_encoding=options.fn_encoding + plugins=[] + if options.plugins!=None: + plugins+=options.plugins + filter_contents=None if options.filter_contents!=None: import shlex filter_contents=shlex.split(options.filter_contents) + plugins_dict={} + plugins_dict['commit_message_filters']=[] + plugins_dict['file_data_filters']=[] + + if plugins and options.pluginpath: + sys.stderr.write('Using additional plugin path: ' + options.pluginpath + '\n') + + for plugin in plugins: + split = plugin.split('=') + name, opts = split[0], '='.join(split[1:]) + i = pluginloader.get_plugin(name,options.pluginpath) + sys.stderr.write('Loaded plugin ' + i['name'] + ' from path: ' + i['path'] +' with opts: ' + opts + '\n') + plugin = pluginloader.load_plugin(i).build_filter(opts) + if hasattr(plugin,'file_data_filter') and callable(plugin.file_data_filter): + plugins_dict['file_data_filters'].append(plugin.file_data_filter) + if hasattr(plugin, 'commit_message_filter') and callable(plugin.commit_message_filter): + plugins_dict['commit_message_filters'].append(plugin.commit_message_filter) + sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, options.headsfile, options.statusfile, authors=a,branchesmap=b,tagsmap=t, sob=options.sob,force=options.force,hgtags=options.hgtags, - notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents)) + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents, + plugins=plugins_dict)) diff --git a/hg-fast-export.sh b/hg-fast-export.sh index 531b3c5..6239253 100755 --- a/hg-fast-export.sh +++ b/hg-fast-export.sh @@ -58,6 +58,8 @@ Options: --mappings-are-raw Assume mappings are raw = lines --filter-contents Pipe contents of each exported file through with as arguments + --plugin Add a plugin with the given init string (repeatable) + --plugin-path Add an additional plugin lookup path " case "$1" in -h|--help) diff --git a/pluginloader/__init__.py b/pluginloader/__init__.py new file mode 100644 index 0000000..82373a5 --- /dev/null +++ b/pluginloader/__init__.py @@ -0,0 +1,19 @@ +import os +import imp +PluginFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),"..","plugins") +MainModule = "__init__" + +def get_plugin(name, plugin_path): + search_dirs = [PluginFolder] + if plugin_path: + search_dirs = [plugin_path] + search_dirs + for dir in search_dirs: + location = os.path.join(dir, name) + if not os.path.isdir(location) or not MainModule + ".py" in os.listdir(location): + continue + info = imp.find_module(MainModule, [location]) + return {"name": name, "info": info, "path": location} + raise Exception("Could not find plugin with name " + name) + +def load_plugin(plugin): + return imp.load_module(MainModule, *plugin["info"]) From 679103795b9d58ea6ee79f75c8404e85c6ecabb5 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:23:54 -0800 Subject: [PATCH 3/5] Add dos2unix plugin --- README.md | 2 ++ plugins/dos2unix/README.md | 9 +++++++++ plugins/dos2unix/__init__.py | 11 +++++++++++ 3 files changed, 22 insertions(+) create mode 100644 plugins/dos2unix/README.md create mode 100644 plugins/dos2unix/__init__.py diff --git a/README.md b/README.md index bb5815d..b808d88 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,8 @@ class Filter: #Or don't pass, if you want to do some init code here ``` +Beyond the boilerplate initialization, you can see the one of the +defined filter methods in the [dos2unix](./plugins/dos2unix) plugin. ``` commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} diff --git a/plugins/dos2unix/README.md b/plugins/dos2unix/README.md new file mode 100644 index 0000000..5f35f11 --- /dev/null +++ b/plugins/dos2unix/README.md @@ -0,0 +1,9 @@ +## Dos2unix filter + +This plugin converts CRLF line ending to LF in text files in the repo. +It is recommended that you have a .gitattributes file that maintains +the usage of LF endings going forward, for after you have converted your +repository. + +To use the plugin, add +`--plugin dos2unix`. diff --git a/plugins/dos2unix/__init__.py b/plugins/dos2unix/__init__.py new file mode 100644 index 0000000..bf676a0 --- /dev/null +++ b/plugins/dos2unix/__init__.py @@ -0,0 +1,11 @@ +def build_filter(args): + return Filter(args) + +class Filter(): + def __init__(self, args): + pass + + def file_data_filter(self,file_data): + file_ctx = file_data['file_ctx'] + if not file_ctx.isbinary(): + file_data['data'] = file_data['data'].replace('\r\n', '\n') From 5e7895ca6bfa800d0cbb765a4b81234d9470f578 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:24:15 -0800 Subject: [PATCH 4/5] Add branch_name_in_commit plugin --- README.md | 5 +++-- plugins/branch_name_in_commit/README.md | 10 ++++++++++ plugins/branch_name_in_commit/__init__.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 plugins/branch_name_in_commit/README.md create mode 100644 plugins/branch_name_in_commit/__init__.py diff --git a/README.md b/README.md index b808d88..34c6bf0 100644 --- a/README.md +++ b/README.md @@ -145,8 +145,9 @@ class Filter: #Or don't pass, if you want to do some init code here ``` -Beyond the boilerplate initialization, you can see the one of the -defined filter methods in the [dos2unix](./plugins/dos2unix) plugin. +Beyond the boilerplate initialization, you can see the two different +defined filter methods in the [dos2unix](./plugins/dos2unix) and +[branch_name_in_commit](./plugins/branch_name_in_commit) plugins. ``` commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc} diff --git a/plugins/branch_name_in_commit/README.md b/plugins/branch_name_in_commit/README.md new file mode 100644 index 0000000..b11982b --- /dev/null +++ b/plugins/branch_name_in_commit/README.md @@ -0,0 +1,10 @@ +## Branch Name in Commit Message + +Mercurial has a much stronger notion of branches than Git, +and some parties may not wish to lose the branch information +during the migration to Git. You can use this plugin to either +prepend or append the branch name from the mercurial +commit into the commit message in Git. + +To use the plugin, add +`--plugin branch_name_in_commit=(start|end)`. diff --git a/plugins/branch_name_in_commit/__init__.py b/plugins/branch_name_in_commit/__init__.py new file mode 100644 index 0000000..20abe5b --- /dev/null +++ b/plugins/branch_name_in_commit/__init__.py @@ -0,0 +1,14 @@ +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + if not args in ['start','end']: + raise Exception('Cannot have branch name anywhere but start and end') + self.pos = args + + def commit_message_filter(self,commit_data): + if self.pos == 'start': + commit_data['desc'] = commit_data['branch'] + '\n' + commit_data['desc'] + if self.pos == 'end': + commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch'] From cadcfcbe9020ddfb0a97b53fc0df323f1259cb56 Mon Sep 17 00:00:00 2001 From: Johan Henkens Date: Wed, 5 Dec 2018 09:24:56 -0800 Subject: [PATCH 5/5] Move filter_contents to plugin system --- hg-fast-export.py | 30 +++++-------------- plugins/shell_filter_file_contents/README.md | 30 +++++++++++++++++++ .../shell_filter_file_contents/__init__.py | 28 +++++++++++++++++ 3 files changed, 66 insertions(+), 22 deletions(-) create mode 100644 plugins/shell_filter_file_contents/README.md create mode 100644 plugins/shell_filter_file_contents/__init__.py diff --git a/hg-fast-export.py b/hg-fast-export.py index 253055d..e53b5dd 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -124,7 +124,7 @@ def get_author(logmessage,committer,authors): return r return committer -def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None,plugins={}): +def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) for file in files: @@ -138,18 +138,6 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=N filename=file file_ctx=ctx.filectx(file) d=file_ctx.data() - if filter_contents: - import subprocess - filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0'] - try: - filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE) - d,_=filter_proc.communicate(d) - except: - sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) - raise - filter_ret=filter_proc.poll() - if filter_ret: - raise subprocess.CalledProcessError(filter_ret,filter_cmd) if plugins and plugins['file_data_filters']: file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} @@ -208,7 +196,7 @@ def strip_leading_slash(filename): return filename def export_commit(ui,repo,revision,old_marks,max,count,authors, - branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None, + branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='', plugins={}): def get_branchname(name): if brmap.has_key(name): @@ -280,8 +268,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, removed=[strip_leading_slash(x) for x in removed] map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents,plugins) - export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents,plugins) + export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) + export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) wr() return checkpoint(count) @@ -417,7 +405,7 @@ def verify_heads(ui,repo,cache,force,branchesmap): def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, authors={},branchesmap={},tagsmap={}, - sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None, + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='', plugins={}): def check_cache(filename, contents): if len(contents) == 0: @@ -460,7 +448,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, brmap={} for rev in range(min,max): c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, - sob,brmap,hgtags,encoding,fn_encoding,filter_contents, + sob,brmap,hgtags,encoding,fn_encoding, plugins) if notes: for rev in range(min,max): @@ -569,10 +557,8 @@ if __name__=='__main__': if options.plugins!=None: plugins+=options.plugins - filter_contents=None if options.filter_contents!=None: - import shlex - filter_contents=shlex.split(options.filter_contents) + plugins+=['shell_filter_file_contents='+options.filter_contents] plugins_dict={} plugins_dict['commit_message_filters']=[] @@ -596,5 +582,5 @@ if __name__=='__main__': options.headsfile, options.statusfile, authors=a,branchesmap=b,tagsmap=t, sob=options.sob,force=options.force,hgtags=options.hgtags, - notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents, + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding, plugins=plugins_dict)) diff --git a/plugins/shell_filter_file_contents/README.md b/plugins/shell_filter_file_contents/README.md new file mode 100644 index 0000000..108cd2a --- /dev/null +++ b/plugins/shell_filter_file_contents/README.md @@ -0,0 +1,30 @@ +## Shell Script File Filter + +This plugin uses shell scripts in order to perform filtering of files. +If your preferred scripting is done via shell, this tool is for you. +Be noted, though, that this method can cause an order of magnitude slow +down. For small repositories, this wont be an issue. + +To use the plugin, add +`--plugin shell_filter_file_contents=path/to/shell/script.sh`. +The filter script is supplied to the plugin option after the plugin name, +which is in turned passed to the plugin initialization. hg-fast-export +runs the filter for each exported file, pipes its content to the filter's +standard input, and uses the filter's standard output in place +of the file's original content. An example use of this feature +is to convert line endings in text files from CRLF to git's preferred LF, +although this task is faster performed using the native plugin. + +The script is called with the following syntax: +`FILTER_CONTENTS ` + +``` +-- Start of crlf-filter.sh -- +#!/bin/sh +# $1 = pathname of exported file relative to the root of the repo +# $2 = Mercurial's hash of the file +# $3 = "1" if Mercurial reports the file as binary, otherwise "0" + +if [ "$3" == "1" ]; then cat; else dos2unix; fi +-- End of crlf-filter.sh -- +``` diff --git a/plugins/shell_filter_file_contents/__init__.py b/plugins/shell_filter_file_contents/__init__.py new file mode 100644 index 0000000..84fd938 --- /dev/null +++ b/plugins/shell_filter_file_contents/__init__.py @@ -0,0 +1,28 @@ +#Pipe contents of each exported file through FILTER_CONTENTS " +import subprocess +import shlex +import sys +from mercurial import node + +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, args): + self.filter_contents = shlex.split(args) + + def file_data_filter(self,file_data): + d = file_data['data'] + file_ctx = file_data['file_ctx'] + filename = file_data['filename'] + filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0'] + try: + filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + d, _ = filter_proc.communicate(d) + except: + sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) + raise + filter_ret = filter_proc.poll() + if filter_ret: + raise subprocess.CalledProcessError(filter_ret, filter_cmd) + file_data['data'] = d