mirror of
https://github.com/frej/fast-export.git
synced 2025-11-01 08:55:47 +01:00
Resolve unicode escape sequences not being processed correctly
In `process_unicode_escape_sequences()`, any backslash escape sequences
in the original string are escaped upon the first
`.encode('unicode-escape')` and therefore round-trip the sequence of
`.encode('unicode-escape').decode('unicode-escape')`.
That is not what we want - we want these sequences to be passed-through
the `.encode` unchanged, so that they will be converted to the
character they represent upon `.decode()`.
This patch changes the `.encode()` step to pass through any ascii
characters unchanged, only escaping non-ascii characters. This ensures
any existing backslash escape sequences will be interpreted as the
character they represent upon `.decode()`.
This commit is contained in:
@@ -434,9 +434,15 @@ def load_mapping(name, filename, mapping_is_raw):
|
|||||||
def process_unicode_escape_sequences(s):
|
def process_unicode_escape_sequences(s):
|
||||||
# Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
|
# Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
|
||||||
# the UTF8-encoded characters they represent. We need to do an additional
|
# the UTF8-encoded characters they represent. We need to do an additional
|
||||||
# .decode('utf8').encode('unicode-escape') to convert any non-ascii characters into
|
# .decode('utf8').encode('ascii', 'backslashreplace') to convert any non-ascii
|
||||||
# their escape sequences so that the subsequent .decode('unicode-escape') succeeds:
|
# characters into their escape sequences so that the subsequent
|
||||||
return s.decode('utf8').encode('unicode-escape').decode('unicode-escape').encode('utf8')
|
# .decode('unicode-escape') succeeds:
|
||||||
|
return (
|
||||||
|
s.decode('utf8')
|
||||||
|
.encode('ascii', 'backslashreplace')
|
||||||
|
.decode('unicode-escape')
|
||||||
|
.encode('utf8')
|
||||||
|
)
|
||||||
|
|
||||||
def parse_quoted_line(line):
|
def parse_quoted_line(line):
|
||||||
m=quoted_regexp.match(line)
|
m=quoted_regexp.match(line)
|
||||||
|
|||||||
Reference in New Issue
Block a user