From 38741e0bbfcf05d4b6b277e68f1cae326eea4b78 Mon Sep 17 00:00:00 2001 From: Mark Nauwelaerts Date: Sat, 18 Jun 2016 13:47:08 +0200 Subject: [PATCH] Add git-hg-helper --- git-hg-helper | 559 ++++++++++++++++++++++++++++++++++++++++++++++++++ test/Makefile | 2 +- test/helper.t | 179 ++++++++++++++++ 3 files changed, 739 insertions(+), 1 deletion(-) create mode 100755 git-hg-helper create mode 100755 test/helper.t diff --git a/git-hg-helper b/git-hg-helper new file mode 100755 index 0000000..280757c --- /dev/null +++ b/git-hg-helper @@ -0,0 +1,559 @@ +#!/usr/bin/env python2 +# +# Copyright (c) 2016 Mark Nauwelaerts +# + +from mercurial import hg, ui, commands, util + +import re +import sys +import os +import subprocess +import argparse +import textwrap +import logging + +# thanks go to git-remote-helper for some helper functions + +def die(msg, *args): + sys.stderr.write('ERROR: %s\n' % (msg % args)) + sys.exit(1) + +def warn(msg, *args): + sys.stderr.write('WARNING: %s\n' % (msg % args)) + +def info(msg, *args): + logger.info(msg, *args) + +def debug(msg, *args): + logger.debug(msg, *args) + +def log(msg, *args): + logger.log(logging.LOG, msg, *args) + +class GitHgRepo: + + def __init__(self, topdir=None, gitdir=None): + if gitdir != None: + self.gitdir = gitdir + self.topdir = os.path.join(gitdir, '..') # will have to do + else: + self.topdir = None + if not topdir: + topdir = self.run_cmd(['rev-parse', '--show-cdup']).strip() + if not topdir: + if not os.path.exists('.git'): + # now we lost where we are + raise Exception('failed to determine topdir') + topdir = '.' + self.topdir = topdir + self.gitdir = self.run_cmd(['rev-parse', '--git-dir']).strip() + if not self.gitdir: + raise Exception('failed to determine gitdir') + # the above was run in topdir + if not os.path.isabs(self.gitdir): + self.gitdir = os.path.join(self.topdir, self.gitdir) + self.hg_repos = {} + + def identity(self): + return '[%s|%s]' % (os.getcwd(), self.topdir) + + # run a git cmd in repo dir, captures stdout and stderr by default + # override in kwargs if otherwise desired + def run_cmd(self, args, check=False, **kwargs): + cmd = ['git'] + args + popen_options = { 'cwd': self.topdir, + 'stdout': subprocess.PIPE, 'stderr': subprocess.PIPE } + popen_options.update(kwargs) + log('%s running cmd %s with options %s', self.identity(), + cmd, popen_options) + process = subprocess.Popen(cmd, **popen_options) + output = process.communicate()[0] + if check and process.returncode != 0: + die('command failed: %s', ' '.join(cmd)) + return output + + def get_config(self, config, getall=False): + get = { True : '--get-all', False: '--get' } + cmd = ['git', 'config', get[getall] , config] + return self.run_cmd(['config', get[getall] , config], stderr=None) + + def get_config_bool(self, config, default=False): + value = self.get_config(config).rstrip('\n') + if value == "true": + return True + elif value == "false": + return False + else: + return default + + def get_hg_repo_url(self, remote): + url = self.get_config('remote.%s.url' % (remote)) + if url and url[0:4] == 'hg::': + url = url[4:].strip() + else: + url = None + return url + + def get_hg_rev(self, commit): + hgrev = self.run_cmd(['notes', '--ref', 'refs/notes/hg', 'show', commit]) + return hgrev + + def rev_parse(self, ref): + args = [ref] if not isinstance(ref, list) else ref + args[0:0] = ['rev-parse', '--verify', '-q'] + return self.run_cmd(args).strip() + + def update_ref(self, ref, value): + self.run_cmd(['update-ref', '-m', 'update by helper', ref, value]) + # let's check it happened + return git_rev_parse(ref) == git_rev_parse(value) + + def cat_file(self, ref): + return self.run_cmd(['cat-file', '-p', ref]) + + def get_git_commit(self, rev): + mydir = os.path.dirname(__file__) + import imp + remotehg = imp.load_source('remotehg', os.path.join(mydir, 'git-remote-hg')) + for r in self.get_hg_repos(): + try: + hgpath = os.path.join(self.gitdir, 'hg', r) + m = remotehg.Marks(os.path.join(hgpath, 'marks-hg'), None) + mark = m.from_rev(rev) + m = GitMarks(os.path.join(hgpath, 'marks-git')) + return m.to_rev(mark) + except: + pass + + # returns dict: (alias: local hg repo dir) + def get_hg_repos(self): + # minor caching + if self.hg_repos: + return self.hg_repos + + # check any local hg repo to see if rev is in there + shared_path = os.path.join(self.gitdir, 'hg') + hg_path = os.path.join(shared_path, '.hg') + if os.path.exists(shared_path): + repos = os.listdir(shared_path) + for r in repos: + # skip the shared repo + if r == '.hg': + continue + local_path = os.path.join(shared_path, r, 'clone') + local_hg = os.path.join(local_path, '.hg') + if not os.path.exists(local_hg): + # could be a local repo without proxy, fetch url + local_path = self.get_hg_repo_url(r) + if not local_path: + warn('failed to find local hg for remote %s', r) + continue + else: + # make sure the shared path is always up-to-date + util.writefile(os.path.join(local_hg, 'sharedpath'), hg_path) + self.hg_repos[r] = os.path.join(local_path) + + log('%s determined hg_repos %s', self.identity(), self.hg_repos) + return self.hg_repos + + # returns hg repo object + def get_hg_repo(self, r): + repos = self.get_hg_repos() + if r in repos: + local_path = repos[r] + hushui = ui.ui() + hushui.setconfig('ui', 'interactive', 'off') + hushui.fout = open(os.devnull, 'w') + return hg.repository(hushui, local_path) + + def find_hg_repo(self, rev): + repos = self.get_hg_repos() + for r in repos: + srepo = self.get_hg_repo(r) + # if this one had it, we are done + if srepo and rev in srepo and srepo[rev]: + return srepo + + +class SubCommand: + + def __init__(self, subcmdname, githgrepo): + self.subcommand = subcmdname + self.githgrepo = githgrepo + self.argparser = self.argumentparser() + + def argumentparser(self): + return argparse.ArgumentParser() + + def get_remote(self, args): + if len(args): + return (args[0], args[1:]) + else: + self.usage('missing argument: ') + + def get_remote_url_hg(self, remote): + url = self.githgrepo.get_hg_repo_url(remote) + if not url: + self.usage('%s is not a remote hg repository' % (remote)) + return url + + def execute(self, args): + (self.options, self.args) = self.argparser.parse_known_args(args) + self.do(self.options, self.args) + + def usage(self, msg): + if msg: + self.argparser.error(msg) + else: + self.argparser.print_usage(sys.stderr) + sys.exit(2) + + def do(self, options, args): + pass + + +class HgRevCommand(SubCommand): + + def argumentparser(self): + usage = '%%(prog)s %s [options] ' % (self.subcommand) + p = argparse.ArgumentParser(usage=usage) + p.epilog = textwrap.dedent("""\ + Determines the hg revision corresponding to . + """) + return p + + def do(self, options, args): + if len(args): + hgrev = self.githgrepo.get_hg_rev(args[0]) + if hgrev: + print hgrev + + +class GitMarks: + + def __init__(self, path): + self.path = path + self.clear() + self.load() + + def clear(self): + self.marks = {} + self.rev_marks = {} + + def load(self): + if not os.path.exists(self.path): + return + + for l in file(self.path): + m, c = l.strip().split(' ', 2) + m = int(m[1:]) + self.marks[c] = m + self.rev_marks[m] = c + + def store(self): + marks = self.rev_marks.keys() + marks.sort() + with open(self.path, 'w') as f: + for m in marks: + f.write(':%d %s\n' % (m, self.rev_marks[m])) + + def from_rev(self, rev): + return self.marks[rev] + + def to_rev(self, mark): + return str(self.rev_marks[mark]) + + +class GitRevCommand(SubCommand): + + def argumentparser(self): + usage = '%%(prog)s %s [options] ' % (self.subcommand) + p = argparse.ArgumentParser(usage=usage) + p.epilog = textwrap.dedent("""\ + Determines the git commit id corresponding to hg . + """) + return p + + def do(self, options, args): + if len(args): + rev = args[0] + gitcommit = self.githgrepo.get_git_commit(rev) + if gitcommit: + print gitcommit + + +class MarksCommand(SubCommand): + + def argumentparser(self): + usage = '%%(prog)s %s [options] ...' % (self.subcommand) + p = argparse.ArgumentParser(usage=usage, \ + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-n', '--dry-run', action='store_true', + help='do not actually update any metadata files') + p.add_argument('--keep', metavar='REVID', + help='only retain ancestors of REVID (including) in \ + hg tracking metadata, akin to hg\'s strip') + p.epilog = textwrap.dedent("""\ + Performs checks on 's marks files and ensures these are consistent + (never affecting or touching any git repository objects or history). + The marks files are considered consistent if they "join" + on the :mark number (with no dangling hg or git commit id on either side). + + While fetching from an hg remote usually results in a sane state, there + are some cases where that might not suffice (or not even succeed in the first + place). Also, fetching will only ever add tracked metadata marks, whereas + sometimes forgetting about some state might be required for consistent state + recovery (e.g. a strip performed on a remote Mercurial repo). Executing this + command should allow a subsequent fetch to succeed and restore a sane state + quickly. In particular, making good use of --keep following a strip allows + a subsequent fetch to recover quickly without extensive history processing. + + Furthermore, since git-fast-import (used during fetch) also dumps + non-commit SHA-1 in the marks file, the latter can become pretty large. + It will reduce in size by either performing a push (git-fast-export only + dumps commit objects to marks file) or by running this helper command. + """) + return p + + def do(self, options, args): + mydir = os.path.dirname(__file__) + import imp + remotehg = imp.load_source('remotehg', os.path.join(mydir, 'git-remote-hg')) + + hg_repos = self.githgrepo.get_hg_repos() + if not args: + self.usage('no remote specified') + for remote in args: + if not remote in hg_repos: + self.usage('%s is not a valid hg remote' % (remote)) + hgpath = os.path.join(self.githgrepo.gitdir, 'hg', remote) + hgm = remotehg.Marks(os.path.join(hgpath, 'marks-hg'), None) + gm = GitMarks(os.path.join(hgpath, 'marks-git')) + repo = hg.repository(ui.ui(), hg_repos[remote]) + ctx = ctxrev = None + if options.keep != None: + strip = options.keep + if strip in repo and repo[strip]: + ctx = repo[strip] + ctxrev = ctx.rev() + if not ctxrev >= 0: + self.usage('revision %s not found in repository %s' % (strip, repo.root)) + # reduce down to marks that are common to both + common_marks = set(hgm.rev_marks.keys()).intersection(gm.rev_marks.keys()) + hg_rev_marks = {} + git_rev_marks = {} + for m in common_marks: + rev = hgm.rev_marks[m] + # also check if still around in repo + if rev in repo and \ + not (ctxrev != None and repo[rev].rev() > ctxrev): + hg_rev_marks[m] = hgm.rev_marks[m] + git_rev_marks[m] = gm.rev_marks[m] + # common marks will not not include any refs/notes/hg + # let's not discard those casually, though they are not vital + revlist = subprocess.Popen(['git', 'rev-list', 'refs/notes/hg'], stdout=subprocess.PIPE) + for l in revlist.stdout.readlines(): + c = l.strip() + m = gm.marks.get(c, 0) + if m: + git_rev_marks[m] = c + # also save last-note mark + if hgm.last_note: + git_rev_marks[hgm.last_note] = gm.rev_marks[hgm.last_note] + # some status report + if len(hgm.rev_marks) != len(hg_rev_marks): + print "Trimmed hg marks from #%d down to #%d" % (len(hgm.rev_marks), len(hg_rev_marks)) + if len(gm.rev_marks) != len(git_rev_marks): + print "Trimmed git marks from #%d down to #%d" % (len(gm.rev_marks), len(git_rev_marks)) + # make hg tips as consistent as possible + for b in hgm.tips: + tip = hgm.tips[b] + if tip not in repo or not repo[tip]: + if not ctx: + print "Could not determine safe value for tip of %s (rev %s)" % (b, tip) + print "Please use --strip to provide fallback value." + return + else: + # basically only the revision number is used by a fetch + tip = ctx.hex() + else: + hgrevs = set(hg_rev_marks.values()) + while True: + if tip in hgrevs: + break + parent = repo[tip].parents()[0].hex() + if parent == tip: + break + tip = parent + if hgm.tips[b] != tip: + hgm.tips[b] = tip + print "Updated tip of %s to %s" % (b, tip) + # now update and store + if not options.dry_run: + # hg marks + hgm.rev_marks = hg_rev_marks + hgm.marks = {} + for mark, rev in hg_rev_marks.iteritems(): + hgm.marks[rev] = mark + hgm.store() + # git marks + gm.rev_marks = git_rev_marks + gm.marks = {} + for mark, rev in git_rev_marks.iteritems(): + gm.marks[rev] = mark + gm.store() + + +class RepoCommand(SubCommand): + + def argumentparser(self): + usage = '%%(prog)s %s [options] ...' % (self.subcommand) + p = argparse.ArgumentParser(usage=usage) + p.epilog = textwrap.dedent("""\ + Determines the local hg repository of . + This can either be a separate and independent local hg repository + or a local proxy repo (within the .git directory). + """) + return p + + def do(self, options, args): + (remote, args) = self.get_remote(args) + repos = self.githgrepo.get_hg_repos() + if remote in repos: + print repos[remote].rstrip('/') + + +class HgCommand(SubCommand): + + def argumentparser(self): + usage = '%%(prog)s %s ...' % (self.subcommand) + p = argparse.ArgumentParser(usage=usage) + hgdir = self.githgrepo.get_hg_repos()[self.subcommand] + p.epilog = textwrap.dedent("""\ + Executes on the backing repository of %s (%s) + (by supplying it with the standard -R option). + """ % (self.subcommand, hgdir)) + return p + + def do(self, options, args): + # subcommand name is already a known valid alias of hg repo + remote = self.subcommand + repos = self.githgrepo.get_hg_repos() + if len(args) and remote in repos: + if args[0].find('hg') < 0: + args.insert(0, 'hg') + args[1:1] = ['-R', repos[remote]] + p = subprocess.Popen(args, stdout=None) + p.wait() + else: + if len(args): + self.usage('invalid repo: %s' % remote) + else: + self.usage('missing command') + + +class HelpCommand(SubCommand): + + def do(self, options, args): + if len(args): + cmd = args[0] + if cmd in subcommands: + p = subcommands[cmd].argumentparser() + p.print_help(sys.stderr) + return + do_usage() + + +def get_subcommands(): + commands = { + 'hg-rev': HgRevCommand, + 'git-rev': GitRevCommand, + 'repo': RepoCommand, + 'marks': MarksCommand, + 'help' : HelpCommand + } + # add remote named subcommands + repos = githgrepo.get_hg_repos() + for r in repos: + if not r in commands: + commands[r] = HgCommand + # now turn into instances + for c in commands: + commands[c] = commands[c](c, githgrepo) + return commands + + +def do_usage(): + usage = textwrap.dedent(""" + git-hg-helper subcommands: + + hg-rev \t show hg revision corresponding to a git revision + git-rev \t find git revision corresponding to a hg revision + marks \t perform maintenance on repo tracking marks + repo \t show local hg repo backing a remote + + If the subcommand is the name of a remote hg repo, then any remaining arguments + are considered a "hg command", e.g. hg heads, or thg, and it is then executed + with -R set appropriately to the local hg repo backing the specified remote. + Do note, however, that the local proxy repos are not maintained as exact mirrors + of their respective remote, and also use shared storage. As such, depending + on the command, the result may not be exactly as could otherwise be expected + (e.g. might involve more heads, etc). + + Available hg remotes: + """) + for r in githgrepo.get_hg_repos(): + usage += '\t%s\n' % (r) + usage += '\n' + sys.stderr.write(usage) + sys.stderr.flush() + sys.exit(2) + +def init_git(gitdir=None): + global githgrepo + + try: + githgrepo = GitHgRepo(gitdir=gitdir) + except Exception, e: + die(str(e)) + +def init_logger(): + global logger + + # setup logging + logging.LOG = 5 + logging.addLevelName(logging.LOG, 'LOG') + envlevel = os.environ.get('GIT_HG_HELPER_DEBUG', 'WARN') + loglevel = logging.getLevelName(envlevel) + logging.basicConfig(level=loglevel, \ + format='%(asctime)-15s %(levelname)s %(message)s') + logger = logging.getLogger() + +def main(argv): + global subcommands + + # init repo dir + # we will take over dir management ... + init_git(os.environ.pop('GIT_DIR', None)) + + # as an alias, cwd is top dir, change again to original directory + reldir = os.environ.get('GIT_PREFIX') + if reldir: + os.chdir(reldir) + + subcommands = get_subcommands() + + cmd = '' + if len(argv) > 1: + cmd = argv[1] + argv = argv[2:] + if cmd in subcommands: + c = subcommands[cmd] + c.execute(argv) + else: + do_usage() + +init_logger() +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/test/Makefile b/test/Makefile index 155cd67..fa16d61 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,6 +1,6 @@ RM ?= rm -f -T = main.t main-push.t bidi.t +T = main.t main-push.t bidi.t helper.t TEST_DIRECTORY := $(CURDIR) export TEST_DIRECTORY diff --git a/test/helper.t b/test/helper.t new file mode 100755 index 0000000..77fd261 --- /dev/null +++ b/test/helper.t @@ -0,0 +1,179 @@ +#!/bin/sh +# +# Copyright (c) 2016 Mark Nauwelaerts +# +# Base commands from hg-git tests: +# https://bitbucket.org/durin42/hg-git/src +# + +test_description='Test git-hg-helper' + +test -n "$TEST_DIRECTORY" || TEST_DIRECTORY=$(dirname $0)/ +. "$TEST_DIRECTORY"/test-lib.sh + +if ! test_have_prereq PYTHON +then + skip_all='skipping remote-hg tests; python not available' + test_done +fi + +if ! python2 -c 'import mercurial' > /dev/null 2>&1 +then + skip_all='skipping remote-hg tests; mercurial not available' + test_done +fi + +setup () { + cat > "$HOME"/.hgrc <<-EOF && + [ui] + username = H G Wells + [extensions] + mq = + strip = + EOF + + GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0230" && + GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE" && + export GIT_COMMITTER_DATE GIT_AUTHOR_DATE +} + +setup + +setup_repos () { + ( + hg init hgrepo && + cd hgrepo && + echo zero > content && + hg add content && + hg commit -m zero + ) && + + git clone hg::hgrepo gitrepo +} + +test_expect_success 'subcommand help' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + setup_repos && + + ( + cd gitrepo && + test_expect_code 2 git-hg-helper help 2> ../help + ) + # remotes should be in help output + grep origin help +' + +test_expect_success 'subcommand repo - no local proxy' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + setup_repos && + + ( + cd hgrepo && + pwd >../expected + ) && + + ( + cd gitrepo && + git-hg-helper repo origin > ../actual + ) && + + test_cmp expected actual +' + +GIT_REMOTE_HG_TEST_REMOTE=1 && +export GIT_REMOTE_HG_TEST_REMOTE + +test_expect_success 'subcommand repo - with local proxy' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + setup_repos && + + ( + cd gitrepo && + export gitdir=`git rev-parse --git-dir` + # trick to normalize path + ( cd $gitdir/hg/origin/clone && pwd ) >../expected && + ( cd `git-hg-helper repo origin` && pwd ) > ../actual + ) && + + test_cmp expected actual +' + +test_expect_success 'subcommands hg-rev and git-rev' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + setup_repos && + + ( + cd gitrepo && + git rev-parse HEAD > rev-HEAD && + test -s rev-HEAD && + git-hg-helper hg-rev `cat rev-HEAD` > hg-HEAD && + git-hg-helper git-rev `cat hg-HEAD` > git-HEAD && + test_cmp rev-HEAD git-HEAD + ) +' + +test_expect_success 'subcommand mark' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + ( + hg init hgrepo && + cd hgrepo && + echo zero > content && + hg add content && + hg commit -m zero + echo one > content && + hg commit -m one && + echo two > content && + hg commit -m two && + echo three > content && + hg commit -m three && + hg identify -r 0 --id >../root + ) && + + hgroot=`cat root` && + + git clone hg::hgrepo gitrepo && + + ( + cd hgrepo && + hg strip -r 1 + ) && + + ( + cd gitrepo && + git-hg-helper marks origin --keep $hgroot > output && + cat output && + grep "hg marks" output && + grep "git marks" output && + grep "Updated" output | grep $hgroot + ) +' + +test_expect_success 'subcommand [some-repo]' ' + test_when_finished "rm -rf gitrepo* hgrepo*" && + + setup_repos && + + ( + cd hgrepo && + echo one > content && + hg commit -m one + ) && + + ( + cd gitrepo && + git fetch origin + ) && + + hg log -R hgrepo > expected && + # not inside gitrepo; test shared path handling + GIT_DIR=gitrepo/.git git-hg-helper origin log > actual + + test_cmp expected actual +' + +test_done \ No newline at end of file