Merge branch 'PR/217'

Closes: #215
This commit is contained in:
Frej Drejhammar
2020-03-26 20:17:20 +01:00
2 changed files with 15 additions and 7 deletions

View File

@@ -80,10 +80,10 @@ author information than git, an author mapping file can be given to
hg-fast-export to fix up malformed author strings. The file is
specified using the -A option. The file should contain lines of the
form `"<key>"="<value>"`. Inside the key and value strings, all escape
sequences understood by the python `string_escape` encoding are
supported. (Versions of fast-export prior to v171002 had a different
syntax, the old syntax can be enabled by the flag
`--mappings-are-raw`.)
sequences understood by the python `unicode_escape` encoding are
supported; strings are otherwise assumed to be UTF8-encoded.
(Versions of fast-export prior to v171002 had a different syntax, the
old syntax can be enabled by the flag `--mappings-are-raw`.)
The example authors.map below will translate `User
<garbage<tab><user@example.com>` to `User <user@example.com>`.

View File

@@ -426,12 +426,20 @@ def load_mapping(name, filename, mapping_is_raw):
return None
return (m.group(1).strip(), m.group(2).strip())
def process_unicode_escape_sequences(s):
# Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
# the UTF8-encoded characters they represent. We need to do an additional
# .decode('utf8').encode('unicode-escape') to convert any non-ascii characters into
# their escape sequences so that the subsequent .decode('unicode-escape') succeeds:
return s.decode('utf8').encode('unicode-escape').decode('unicode-escape').encode('utf8')
def parse_quoted_line(line):
m=quoted_regexp.match(line)
if m==None:
return None
return (m.group(1).decode('unicode_escape').encode('utf8'),
m.group(5).decode('unicode_escape').encode('utf8'))
return
return (process_unicode_escape_sequences(m.group(1)),
process_unicode_escape_sequences(m.group(5)))
cache={}
if not os.path.exists(filename):