2007-03-06 17:00:25 +00:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
2008-11-25 11:25:22 +01:00
|
|
|
# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
|
2007-03-14 10:29:24 +00:00
|
|
|
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
|
|
|
|
|
|
2017-05-20 10:11:00 +02:00
|
|
|
READLINK="readlink"
|
|
|
|
|
if command -v greadlink > /dev/null; then
|
|
|
|
|
READLINK="greadlink" # Prefer greadlink over readlink
|
|
|
|
|
fi
|
2017-06-05 18:41:44 +02:00
|
|
|
|
2017-08-25 11:26:45 +02:00
|
|
|
if ! $READLINK -f "$(which "$0")" > /dev/null 2>&1 ; then
|
2017-06-05 18:41:44 +02:00
|
|
|
ROOT="$(dirname "$(which "$0")")"
|
|
|
|
|
if [ ! -f "$ROOT/hg-fast-export.py" ] ; then
|
|
|
|
|
echo "hg-fast-exports requires a readlink implementation which knows" \
|
|
|
|
|
" how to canonicalize paths in order to be called via a symlink."
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
ROOT="$(dirname "$($READLINK -f "$(which "$0")")")"
|
|
|
|
|
fi
|
|
|
|
|
|
2007-03-06 17:00:25 +00:00
|
|
|
REPO=""
|
|
|
|
|
PFX="hg2git"
|
2008-12-11 09:05:05 -05:00
|
|
|
SFX_MAPPING="mapping"
|
2007-03-06 17:00:25 +00:00
|
|
|
SFX_MARKS="marks"
|
|
|
|
|
SFX_HEADS="heads"
|
|
|
|
|
SFX_STATE="state"
|
2010-11-17 00:32:12 +00:00
|
|
|
GFI_OPTS=""
|
2019-09-01 17:07:47 +02:00
|
|
|
|
|
|
|
|
if [ -z "${PYTHON}" ]; then
|
Support Python 3
Port hg-fast-import to Python 2/3 polyglot code.
Since mercurial accepts and returns bytestrings for all repository data,
the approach I've taken here is to use bytestrings throughout the
hg-fast-import code. All strings pertaining to repository data are
bytestrings. This means the code is using the same string datatype for
this data on Python 3 as it did (and still does) on Python 2.
Repository data coming from subprocess calls to git, or read from files,
is also left as the bytestrings either returned from
subprocess.check_output or as read from the file in 'rb' mode.
Regexes and string literals that are used with repository data have
all had a b'' prefix added.
When repository data is used in error/warning messages, it is decoded
with the UTF8 codec for printing.
With this patch, hg-fast-export.py writes binary output to
sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it
still uses sys.stdout.
The only strings that are left as "native" strings and not coerced to
bytestrings are filepaths passed in on the command line, and dictionary
keys for internal data structures used by hg-fast-import.py, that do
not originate in repository data.
Mapping files are read in 'rb' mode, and thus bytestrings are read from
them. When an encoding is given, their contents are decoded with that
encoding, but then immediately encoded again with UTF8 and they are
returned as the resulting bytestrings
Other necessary changes were:
- indexing byestrings with a single index returns an integer on Python.
These indexing operations have been replaced with a one-element
slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring.
- raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash)
- str(integer) -> b'%d' % integer
- 'string_escape' codec replaced with 'unicode_escape' (which was
backported to python 2.7). Strings decoded with this codec were then
immediately re-encoded with UTF8.
- Calls to map() intended to execute their contents immediately were
unwrapped or converted to list comprehensions, since map() is an
iterator and does not execute until iterated over.
hg-fast-export.sh has been modified to not require Python 2. Instead, if
PYTHON has not been defined, it checks python2, python, then python3,
and uses the first one that exists and can import the mercurial module.
2020-02-10 21:39:13 -05:00
|
|
|
# $PYTHON is not set, so we try to find a working python with mercurial:
|
|
|
|
|
for python_cmd in python2 python python3; do
|
|
|
|
|
if command -v $python_cmd > /dev/null; then
|
2020-02-25 11:51:36 -08:00
|
|
|
$python_cmd -c 'from mercurial.scmutil import revsymbol' 2> /dev/null
|
Support Python 3
Port hg-fast-import to Python 2/3 polyglot code.
Since mercurial accepts and returns bytestrings for all repository data,
the approach I've taken here is to use bytestrings throughout the
hg-fast-import code. All strings pertaining to repository data are
bytestrings. This means the code is using the same string datatype for
this data on Python 3 as it did (and still does) on Python 2.
Repository data coming from subprocess calls to git, or read from files,
is also left as the bytestrings either returned from
subprocess.check_output or as read from the file in 'rb' mode.
Regexes and string literals that are used with repository data have
all had a b'' prefix added.
When repository data is used in error/warning messages, it is decoded
with the UTF8 codec for printing.
With this patch, hg-fast-export.py writes binary output to
sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it
still uses sys.stdout.
The only strings that are left as "native" strings and not coerced to
bytestrings are filepaths passed in on the command line, and dictionary
keys for internal data structures used by hg-fast-import.py, that do
not originate in repository data.
Mapping files are read in 'rb' mode, and thus bytestrings are read from
them. When an encoding is given, their contents are decoded with that
encoding, but then immediately encoded again with UTF8 and they are
returned as the resulting bytestrings
Other necessary changes were:
- indexing byestrings with a single index returns an integer on Python.
These indexing operations have been replaced with a one-element
slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring.
- raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash)
- str(integer) -> b'%d' % integer
- 'string_escape' codec replaced with 'unicode_escape' (which was
backported to python 2.7). Strings decoded with this codec were then
immediately re-encoded with UTF8.
- Calls to map() intended to execute their contents immediately were
unwrapped or converted to list comprehensions, since map() is an
iterator and does not execute until iterated over.
hg-fast-export.sh has been modified to not require Python 2. Instead, if
PYTHON has not been defined, it checks python2, python, then python3,
and uses the first one that exists and can import the mercurial module.
2020-02-10 21:39:13 -05:00
|
|
|
if [ $? -eq 0 ]; then
|
|
|
|
|
PYTHON=$python_cmd
|
|
|
|
|
break
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done
|
2019-09-01 17:07:47 +02:00
|
|
|
fi
|
Support Python 3
Port hg-fast-import to Python 2/3 polyglot code.
Since mercurial accepts and returns bytestrings for all repository data,
the approach I've taken here is to use bytestrings throughout the
hg-fast-import code. All strings pertaining to repository data are
bytestrings. This means the code is using the same string datatype for
this data on Python 3 as it did (and still does) on Python 2.
Repository data coming from subprocess calls to git, or read from files,
is also left as the bytestrings either returned from
subprocess.check_output or as read from the file in 'rb' mode.
Regexes and string literals that are used with repository data have
all had a b'' prefix added.
When repository data is used in error/warning messages, it is decoded
with the UTF8 codec for printing.
With this patch, hg-fast-export.py writes binary output to
sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it
still uses sys.stdout.
The only strings that are left as "native" strings and not coerced to
bytestrings are filepaths passed in on the command line, and dictionary
keys for internal data structures used by hg-fast-import.py, that do
not originate in repository data.
Mapping files are read in 'rb' mode, and thus bytestrings are read from
them. When an encoding is given, their contents are decoded with that
encoding, but then immediately encoded again with UTF8 and they are
returned as the resulting bytestrings
Other necessary changes were:
- indexing byestrings with a single index returns an integer on Python.
These indexing operations have been replaced with a one-element
slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring.
- raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash)
- str(integer) -> b'%d' % integer
- 'string_escape' codec replaced with 'unicode_escape' (which was
backported to python 2.7). Strings decoded with this codec were then
immediately re-encoded with UTF8.
- Calls to map() intended to execute their contents immediately were
unwrapped or converted to list comprehensions, since map() is an
iterator and does not execute until iterated over.
hg-fast-export.sh has been modified to not require Python 2. Instead, if
PYTHON has not been defined, it checks python2, python, then python3,
and uses the first one that exists and can import the mercurial module.
2020-02-10 21:39:13 -05:00
|
|
|
if [ -z "${PYTHON}" ]; then
|
2020-02-25 11:51:36 -08:00
|
|
|
echo "Could not find a python interpreter with the mercurial module >= 4.6 available. " \
|
Support Python 3
Port hg-fast-import to Python 2/3 polyglot code.
Since mercurial accepts and returns bytestrings for all repository data,
the approach I've taken here is to use bytestrings throughout the
hg-fast-import code. All strings pertaining to repository data are
bytestrings. This means the code is using the same string datatype for
this data on Python 3 as it did (and still does) on Python 2.
Repository data coming from subprocess calls to git, or read from files,
is also left as the bytestrings either returned from
subprocess.check_output or as read from the file in 'rb' mode.
Regexes and string literals that are used with repository data have
all had a b'' prefix added.
When repository data is used in error/warning messages, it is decoded
with the UTF8 codec for printing.
With this patch, hg-fast-export.py writes binary output to
sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it
still uses sys.stdout.
The only strings that are left as "native" strings and not coerced to
bytestrings are filepaths passed in on the command line, and dictionary
keys for internal data structures used by hg-fast-import.py, that do
not originate in repository data.
Mapping files are read in 'rb' mode, and thus bytestrings are read from
them. When an encoding is given, their contents are decoded with that
encoding, but then immediately encoded again with UTF8 and they are
returned as the resulting bytestrings
Other necessary changes were:
- indexing byestrings with a single index returns an integer on Python.
These indexing operations have been replaced with a one-element
slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring.
- raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash)
- str(integer) -> b'%d' % integer
- 'string_escape' codec replaced with 'unicode_escape' (which was
backported to python 2.7). Strings decoded with this codec were then
immediately re-encoded with UTF8.
- Calls to map() intended to execute their contents immediately were
unwrapped or converted to list comprehensions, since map() is an
iterator and does not execute until iterated over.
hg-fast-export.sh has been modified to not require Python 2. Instead, if
PYTHON has not been defined, it checks python2, python, then python3,
and uses the first one that exists and can import the mercurial module.
2020-02-10 21:39:13 -05:00
|
|
|
"Please use the 'PYTHON' environment variable to specify the interpreter to use."
|
2019-09-01 17:07:47 +02:00
|
|
|
exit 1
|
|
|
|
|
fi
|
2007-03-06 17:00:25 +00:00
|
|
|
|
2015-08-16 17:13:04 +02:00
|
|
|
USAGE="[--quiet] [-r <repo>] [--force] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]"
|
2007-03-08 11:16:28 +00:00
|
|
|
LONG_USAGE="Import hg repository <repo> up to either tip or <max>
|
|
|
|
|
If <repo> is omitted, use last hg repository as obtained from state file,
|
2007-03-12 09:06:48 +00:00
|
|
|
GIT_DIR/$PFX-$SFX_STATE by default.
|
|
|
|
|
|
2007-03-12 10:25:01 +00:00
|
|
|
Note: The argument order matters.
|
|
|
|
|
|
2007-03-12 09:06:48 +00:00
|
|
|
Options:
|
2014-03-15 01:26:08 -07:00
|
|
|
--quiet Passed to git-fast-import(1)
|
|
|
|
|
-r <repo> Mercurial repository to import
|
|
|
|
|
--force Ignore validation errors when converting, and pass --force
|
|
|
|
|
to git-fast-import(1)
|
|
|
|
|
-m <max> Maximum revision to import
|
|
|
|
|
-s Enable parsing Signed-off-by lines
|
|
|
|
|
--hgtags Enable exporting .hgtags files
|
|
|
|
|
-A <file> Read author map from file
|
|
|
|
|
(Same as in git-svnimport(1) and git-cvsimport(1))
|
2015-08-16 17:13:04 +02:00
|
|
|
-B <file> Read branch map from file
|
|
|
|
|
-T <file> Read tags map from file
|
2014-03-15 01:26:08 -07:00
|
|
|
-M <name> Set the default branch name (defaults to 'master')
|
2019-05-10 18:52:57 +02:00
|
|
|
-n Do not perform built-in (broken in many cases) sanitizing
|
|
|
|
|
of branch/tag names.
|
2014-03-15 01:26:08 -07:00
|
|
|
-o <name> Use <name> as branch namespace to track upstream (eg 'origin')
|
2014-04-22 23:00:44 +02:00
|
|
|
--hg-hash Annotate commits with the hg hash as git notes in the
|
|
|
|
|
hg namespace.
|
2014-10-25 13:18:41 +03:00
|
|
|
-e <encoding> Assume commit and author strings retrieved from
|
|
|
|
|
Mercurial are encoded in <encoding>
|
2015-11-03 16:12:46 +09:00
|
|
|
--fe <filename_encoding> Assume filenames from Mercurial are encoded
|
|
|
|
|
in <filename_encoding>
|
2017-09-30 14:51:24 +02:00
|
|
|
--mappings-are-raw Assume mappings are raw <key>=<value> lines
|
2018-06-17 21:09:59 +03:00
|
|
|
--filter-contents <cmd> Pipe contents of each exported file through <cmd>
|
|
|
|
|
with <file-path> <hg-hash> <is-binary> as arguments
|
2018-12-05 09:23:35 -08:00
|
|
|
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
|
|
|
|
--plugin-path <plugin-path> Add an additional plugin lookup path
|
2007-03-12 09:06:48 +00:00
|
|
|
"
|
2014-03-15 01:26:08 -07:00
|
|
|
case "$1" in
|
|
|
|
|
-h|--help)
|
|
|
|
|
echo "usage: $(basename "$0") $USAGE"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "$LONG_USAGE"
|
|
|
|
|
exit 0
|
|
|
|
|
esac
|
2016-09-14 12:00:41 +02:00
|
|
|
|
2016-10-01 14:45:48 +02:00
|
|
|
IS_BARE=$(git rev-parse --is-bare-repository) \
|
2016-09-14 12:00:41 +02:00
|
|
|
|| (echo "Could not find git repo" ; exit 1)
|
2016-10-01 14:45:48 +02:00
|
|
|
if test "z$IS_BARE" != ztrue; then
|
|
|
|
|
# This is not a bare repo, cd to the toplevel
|
|
|
|
|
TOPLEVEL=$(git rev-parse --show-toplevel) \
|
|
|
|
|
|| (echo "Could not find git repo toplevel" ; exit 1)
|
2017-08-18 16:18:10 +02:00
|
|
|
cd "$TOPLEVEL" || exit 1
|
2016-10-01 14:45:48 +02:00
|
|
|
fi
|
|
|
|
|
GIT_DIR=$(git rev-parse --git-dir) || (echo "Could not find git repo" ; exit 1)
|
2007-03-06 17:00:25 +00:00
|
|
|
|
2016-12-30 21:47:01 +02:00
|
|
|
|
|
|
|
|
IGNORECASEWARN=""
|
|
|
|
|
IGNORECASE=`git config core.ignoreCase`
|
|
|
|
|
if [ "true" = "$IGNORECASE" ]; then
|
|
|
|
|
IGNORECASEWARN="true"
|
|
|
|
|
fi;
|
|
|
|
|
|
2020-01-31 17:01:04 +01:00
|
|
|
|
2007-03-06 17:00:25 +00:00
|
|
|
while case "$#" in 0) break ;; esac
|
|
|
|
|
do
|
|
|
|
|
case "$1" in
|
2007-03-12 07:33:40 +00:00
|
|
|
-r|--r|--re|--rep|--repo)
|
2007-03-06 17:00:25 +00:00
|
|
|
shift
|
2007-03-12 07:33:40 +00:00
|
|
|
REPO="$1"
|
2007-03-06 17:00:25 +00:00
|
|
|
;;
|
2007-03-07 11:24:59 +00:00
|
|
|
--q|--qu|--qui|--quie|--quiet)
|
2010-11-17 00:32:12 +00:00
|
|
|
GFI_OPTS="$GFI_OPTS --quiet"
|
|
|
|
|
;;
|
|
|
|
|
--force)
|
|
|
|
|
# pass --force to git-fast-import and hg-fast-export.py
|
|
|
|
|
GFI_OPTS="$GFI_OPTS --force"
|
2016-12-30 21:47:01 +02:00
|
|
|
IGNORECASEWARN="";
|
2020-01-31 17:01:04 +01:00
|
|
|
break
|
|
|
|
|
;;
|
|
|
|
|
-*)
|
|
|
|
|
# pass any other options down to hg2git.py
|
|
|
|
|
break
|
2007-03-06 17:00:25 +00:00
|
|
|
;;
|
2020-01-31 17:01:04 +01:00
|
|
|
*)
|
|
|
|
|
break
|
2007-03-06 17:00:25 +00:00
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
shift
|
|
|
|
|
done
|
|
|
|
|
|
2016-12-30 21:47:01 +02:00
|
|
|
if [ ! -z "$IGNORECASEWARN" ]; then
|
|
|
|
|
echo "Error: The option core.ignoreCase is set to true in the git"
|
|
|
|
|
echo "repository. This will produce empty changesets for renames that just"
|
|
|
|
|
echo "change the case of the file name."
|
|
|
|
|
echo "Use --force to skip this check or change the option with"
|
|
|
|
|
echo "git config core.ignoreCase false"
|
|
|
|
|
exit 1
|
|
|
|
|
fi;
|
|
|
|
|
|
2016-12-28 12:14:04 +01:00
|
|
|
# Make a backup copy of each state file
|
|
|
|
|
for i in $SFX_STATE $SFX_MARKS $SFX_MAPPING $SFX_HEADS ; do
|
|
|
|
|
if [ -f "$GIT_DIR/$PFX-$i" ] ; then
|
|
|
|
|
cp "$GIT_DIR/$PFX-$i" "$GIT_DIR/$PFX-$i~"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
2007-03-08 11:16:28 +00:00
|
|
|
# for convenience: get default repo from state file
|
2007-03-12 07:33:40 +00:00
|
|
|
if [ x"$REPO" = x -a -f "$GIT_DIR/$PFX-$SFX_STATE" ] ; then
|
2014-03-28 16:39:08 -07:00
|
|
|
REPO="`grep '^:repo ' "$GIT_DIR/$PFX-$SFX_STATE" | cut -d ' ' -f 2`"
|
2007-03-08 11:16:28 +00:00
|
|
|
echo "Using last hg repository \"$REPO\""
|
2007-03-06 17:00:25 +00:00
|
|
|
fi
|
|
|
|
|
|
2013-09-08 14:32:08 +02:00
|
|
|
if [ -z "$REPO" ]; then
|
|
|
|
|
echo "no repo given, use -r flag"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2007-03-06 17:00:25 +00:00
|
|
|
# make sure we have a marks cache
|
|
|
|
|
if [ ! -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then
|
|
|
|
|
touch "$GIT_DIR/$PFX-$SFX_MARKS"
|
|
|
|
|
fi
|
|
|
|
|
|
2011-06-13 13:05:47 +01:00
|
|
|
# cleanup on exit
|
|
|
|
|
trap 'rm -f "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp"' 0
|
|
|
|
|
|
2014-03-14 21:02:26 -07:00
|
|
|
_err1=
|
|
|
|
|
_err2=
|
|
|
|
|
exec 3>&1
|
|
|
|
|
{ read -r _err1 || :; read -r _err2 || :; } <<-EOT
|
|
|
|
|
$(
|
|
|
|
|
exec 4>&3 3>&1 1>&4 4>&-
|
|
|
|
|
{
|
|
|
|
|
_e1=0
|
2018-01-24 11:43:22 +01:00
|
|
|
GIT_DIR="$GIT_DIR" "$PYTHON" "$ROOT/hg-fast-export.py" \
|
2014-03-14 21:02:26 -07:00
|
|
|
--repo "$REPO" \
|
|
|
|
|
--marks "$GIT_DIR/$PFX-$SFX_MARKS" \
|
|
|
|
|
--mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \
|
|
|
|
|
--heads "$GIT_DIR/$PFX-$SFX_HEADS" \
|
|
|
|
|
--status "$GIT_DIR/$PFX-$SFX_STATE" \
|
2020-01-31 17:01:04 +01:00
|
|
|
"$@" 3>&- || _e1=$?
|
2014-03-14 21:02:26 -07:00
|
|
|
echo $_e1 >&3
|
|
|
|
|
} | \
|
|
|
|
|
{
|
|
|
|
|
_e2=0
|
|
|
|
|
git fast-import $GFI_OPTS --export-marks="$GIT_DIR/$PFX-$SFX_MARKS.tmp" 3>&- || _e2=$?
|
|
|
|
|
echo $_e2 >&3
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
EOT
|
|
|
|
|
exec 3>&-
|
|
|
|
|
[ "$_err1" = 0 -a "$_err2" = 0 ] || exit 1
|
2007-03-06 17:00:25 +00:00
|
|
|
|
|
|
|
|
# move recent marks cache out of the way...
|
|
|
|
|
if [ -f "$GIT_DIR/$PFX-$SFX_MARKS" ] ; then
|
|
|
|
|
mv "$GIT_DIR/$PFX-$SFX_MARKS" "$GIT_DIR/$PFX-$SFX_MARKS.old"
|
|
|
|
|
else
|
|
|
|
|
touch "$GIT_DIR/$PFX-$SFX_MARKS.old"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# ...to create a new merged one
|
|
|
|
|
cat "$GIT_DIR/$PFX-$SFX_MARKS.old" "$GIT_DIR/$PFX-$SFX_MARKS.tmp" \
|
|
|
|
|
| uniq > "$GIT_DIR/$PFX-$SFX_MARKS"
|
|
|
|
|
|
|
|
|
|
# save SHA1s of current heads for incremental imports
|
|
|
|
|
# and connectivity (plus sanity checking)
|
2007-03-06 19:46:50 +00:00
|
|
|
for head in `git branch | sed 's#^..##'` ; do
|
2014-07-06 14:59:28 +02:00
|
|
|
id="`git rev-parse refs/heads/$head`"
|
2007-03-06 17:00:25 +00:00
|
|
|
echo ":$head $id"
|
|
|
|
|
done > "$GIT_DIR/$PFX-$SFX_HEADS"
|
|
|
|
|
|
|
|
|
|
# check diff with color:
|
|
|
|
|
# ( for i in `find . -type f | grep -v '\.git'` ; do diff -u $i $REPO/$i ; done | cdiff ) | less -r
|