Fix UnboundLocalError with plugins and largefiles

When Plugins are used in a repository that contains largefiles,
the following exception is thrown as soon as the first largefile
is converted:

```
Traceback (most recent call last):
  File "fast-export/hg-fast-export.py", line 728, in <module>
    sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
  File "fast-export/hg-fast-export.py", line 581, in hg2git
    c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
  File "fast-export/hg-fast-export.py", line 366, in export_commit
    export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins)
  File "fast-export/hg-fast-export.py", line 222, in export_file_contents
    file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
UnboundLocalError: local variable 'file_ctx' referenced before assignment
```

This commit fixes the error by:

 * initializing the file_ctx before the largefile handling takes place
 * Providing a new `is_largefile` value for plugins so they can detect
    if largefile handling was applied (and therefore the file_ctx
    object may no longer be in sync with the git version of the file)
This commit is contained in:
Günther Nußmüller
2025-07-25 12:28:23 +02:00
parent 95459e5599
commit d77765a23e
6 changed files with 131 additions and 3 deletions

View File

@@ -188,7 +188,7 @@ values in the dictionary after filters have been run are used to create the git
commit. commit.
``` ```
file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents} file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents, 'is_largefile':largefile_status}
def file_data_filter(self,file_data): def file_data_filter(self,file_data):
``` ```
@@ -203,6 +203,12 @@ but in this case the `data` and `file_ctx` keys map to None. This is
so that a filter which modifies file names can apply the same name so that a filter which modifies file names can apply the same name
transformations when files are deleted. transformations when files are deleted.
The `is_largefile` entry within the `file_data` dictionary will contain
`True` if the original file was a largefile and has been converted
to a normal file before the plugins were invoked. In this case, the `file_ctx`
will still point to the filecontext for the original, unconverted file, while
`filename` and `data` will contain the already converted information.
Submodules Submodules
---------- ----------
See README-SUBMODULES.md for how to convert subrepositories into git See README-SUBMODULES.md for how to convert subrepositories into git

View File

@@ -211,15 +211,18 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
b'Ignoring file %s which cannot be tracked by git\n' % filename b'Ignoring file %s which cannot be tracked by git\n' % filename
) )
continue continue
largefile = False
file_ctx=ctx.filectx(file)
if is_largefile(filename): if is_largefile(filename):
largefile = True
filename = largefile_orig_name(filename) filename = largefile_orig_name(filename)
d = largefile_data(ctx, file, filename) d = largefile_data(ctx, file, filename)
else: else:
file_ctx=ctx.filectx(file)
d=file_ctx.data() d=file_ctx.data()
if plugins and plugins['file_data_filters']: if plugins and plugins['file_data_filters']:
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile}
for filter in plugins['file_data_filters']: for filter in plugins['file_data_filters']:
filter(file_data) filter(file_data)
d=file_data['data'] d=file_data['data']

View File

@@ -0,0 +1,20 @@
blob
mark :1
data 7
a_file
blob
mark :2
data 6
large
reset refs/heads/master
commit refs/heads/master
mark :3
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
data 3
r0
M 100644 :1 a.txt
M 100644 :2 b.txt

69
t/largefile_plugin.t Executable file
View File

@@ -0,0 +1,69 @@
#!/bin/bash
#
# Copyright (c) 2023 Felipe Contreras
# Copyright (c) 2023 Frej Drejhammar
# Copyright (c) 2025 Günther Nußmüller
#
# Check that plugin invocation works with largefiles.
# This test uses the echo_file_data_test_plugin to verify that the
# file data is passed correctly, including the largefile status.
#
test_description='Largefiles and plugin test'
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
git_create() {
git init -q "$1" &&
git -C "$1" config core.ignoreCase false
}
git_convert() {
(
cd "$2" &&
hg-fast-export.sh --repo "../$1" \
-s --hgtags -n \
--plugin ../../plugins/echo_file_data_test_plugin
)
}
setup() {
cat > "$HOME"/.hgrc <<-EOF
[ui]
username = Grevious Bodily Harmsworth <gbh@example.com>
[extensions]
largefiles =
EOF
}
commit0() {
(
cd hgrepo &&
echo "a_file" > a.txt &&
echo "large" > b.txt
hg add a.txt &&
hg add --large b.txt &&
hg commit -d "2023-03-17 01:00Z" -m "r0"
)
}
setup
test_expect_success 'largefile and plugin' '
test_when_finished "rm -rf hgrepo gitrepo" &&
(
hg init hgrepo &&
commit0
) &&
git_create gitrepo &&
git_convert hgrepo gitrepo &&
git -C gitrepo fast-export --all > actual &&
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual &&
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt
'
test_done

View File

@@ -0,0 +1,12 @@
filename: b'b.txt'
data size: 6 bytes
ctx rev: 0
ctx binary: False
is largefile: True
filename: b'a.txt'
data size: 7 bytes
ctx rev: 0
ctx binary: False
is largefile: False

View File

@@ -0,0 +1,18 @@
import sys
from mercurial import node
def build_filter(args):
return Filter(args)
class Filter:
def __init__(self, _):
pass
def file_data_filter(self,file_data):
with open('largefile_info.txt', 'a') as f:
f.write(f"filename: {file_data['filename']}\n")
f.write(f"data size: {len(file_data['data'])} bytes\n")
f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n")
f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n")
f.write(f"is largefile: {file_data.get('is_largefile', False)}\n")
f.write("\n")