Improve import of LFS files for pull and mirror

Pushed-by: Rene Pfeuffer<rene.pfeuffer@cloudogu.com>
Co-authored-by: René Pfeuffer<rene.pfeuffer@cloudogu.com>
Committed-by: René Pfeuffer<rene.pfeuffer@cloudogu.com>
This commit is contained in:
Rene Pfeuffer
2024-03-11 11:13:42 +01:00
parent b3002806ac
commit 658ccdf006
5 changed files with 272 additions and 22 deletions

View File

@@ -51,6 +51,8 @@ import sonia.scm.repository.api.PullResponse;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
public class GitPullCommand extends AbstractGitPushOrPullCommand
@@ -225,6 +227,7 @@ public class GitPullCommand extends AbstractGitPushOrPullCommand
}
private void fetchLfs(PullCommandRequest request, Git git, LfsLoader.LfsLoaderLogger lfsLoaderLogger) throws IOException {
Set<ObjectId> alreadyVisited = new HashSet<>(1000);
open().getRefDatabase().getRefs().forEach(
ref -> lfsLoader.inspectTree(
ref.getObjectId(),
@@ -233,7 +236,8 @@ public class GitPullCommand extends AbstractGitPushOrPullCommand
new MirrorCommandResult.LfsUpdateResult(),
repository,
pullHttpConnectionProvider.createHttpConnectionFactory(request),
request.getRemoteUrl().toString()
request.getRemoteUrl().toString(),
alreadyVisited
)
);
}

View File

@@ -24,6 +24,7 @@
package sonia.scm.repository.spi;
import com.google.common.annotations.VisibleForTesting;
import jakarta.inject.Inject;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lfs.Lfs;
@@ -51,7 +52,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
class LfsLoader {
@@ -71,8 +73,24 @@ class LfsLoader {
sonia.scm.repository.Repository repository,
HttpConnectionFactory httpConnectionFactory,
String url) {
EntryHandler entryHandler = new EntryHandler(repository, gitRepository, mirrorLog, lfsUpdateResult, httpConnectionFactory);
inspectTree(newObjectId, entryHandler, gitRepository, mirrorLog, lfsUpdateResult, url);
inspectTree(newObjectId, gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory, url, new HashSet<>(1000));
}
void inspectTree(ObjectId newObjectId,
Repository gitRepository,
LfsLoaderLogger mirrorLog,
LfsUpdateResult lfsUpdateResult,
sonia.scm.repository.Repository repository,
HttpConnectionFactory httpConnectionFactory,
String url,
Set<ObjectId> alreadyVisited) {
EntryHandler entryHandler = createEntryHandler(gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory);
inspectTree(newObjectId, entryHandler, gitRepository, mirrorLog, lfsUpdateResult, url, alreadyVisited);
}
@VisibleForTesting
EntryHandler createEntryHandler(Repository gitRepository, LfsLoaderLogger mirrorLog, LfsUpdateResult lfsUpdateResult, sonia.scm.repository.Repository repository, HttpConnectionFactory httpConnectionFactory) {
return new EntryHandler(repository, gitRepository, mirrorLog, lfsUpdateResult, httpConnectionFactory);
}
private void inspectTree(ObjectId newObjectId,
@@ -80,20 +98,25 @@ class LfsLoader {
Repository gitRepository,
LfsLoaderLogger mirrorLog,
LfsUpdateResult lfsUpdateResult,
String sourceUrl) {
String sourceUrl,
Set<ObjectId> alreadyVisited) {
try {
gitRepository
.getConfig()
.setString(ConfigConstants.CONFIG_SECTION_LFS, null, ConfigConstants.CONFIG_KEY_URL, computeLfsUrl(sourceUrl));
TreeWalk treeWalk = new TreeWalk(gitRepository);
treeWalk.setFilter(new ScmLfsPointerFilter());
treeWalk.setFilter(new FilteringScmLfsPointerFilter(alreadyVisited));
treeWalk.setRecursive(true);
RevWalk revWalk = new RevWalk(gitRepository);
revWalk.markStart(revWalk.parseCommit(newObjectId));
for (RevCommit commit : revWalk) {
if (!alreadyVisited.add(commit.toObjectId())) {
LOG.trace("skipping commit {}", commit);
break;
}
treeWalk.reset();
treeWalk.addTree(commit.getTree());
while (treeWalk.next()) {
@@ -115,7 +138,31 @@ class LfsLoader {
}
}
private class EntryHandler {
@VisibleForTesting
Path downloadLfsResource(Lfs lfs, Repository gitRepository, HttpConnectionFactory connectionFactory, LfsPointer lfsPointer) throws IOException {
return SmudgeFilter.downloadLfsResource(
lfs,
gitRepository,
connectionFactory,
lfsPointer
)
.iterator()
.next();
}
@VisibleForTesting
void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath, BlobStore lfsBlobStore) throws IOException {
LOG.trace("temporary lfs file: {}", tempFilePath);
Files.copy(
tempFilePath,
lfsBlobStore
.create(oid.name())
.getOutputStream()
);
}
@VisibleForTesting
class EntryHandler {
private final BlobStore lfsBlobStore;
private final Repository gitRepository;
@@ -137,15 +184,19 @@ class LfsLoader {
this.httpConnectionFactory = httpConnectionFactory;
}
private void handleTreeEntry(TreeWalk treeWalk) {
@VisibleForTesting
void handleTreeEntry(TreeWalk treeWalk) {
try (InputStream is = gitRepository.open(treeWalk.getObjectId(0), Constants.OBJ_BLOB).openStream()) {
LfsPointer lfsPointer = LfsPointer.parseLfsPointer(is);
AnyLongObjectId oid = lfsPointer.getOid();
if (lfsBlobStore.get(oid.name()) == null) {
Path tempFilePath = loadLfsFile(lfsPointer);
storeLfsBlob(oid, tempFilePath);
Files.delete(tempFilePath);
try {
storeLfsBlob(oid, tempFilePath, lfsBlobStore);
} finally {
Files.delete(tempFilePath);
}
}
} catch (Exception e) {
LOG.warn("failed to load lfs file", e);
@@ -161,23 +212,12 @@ class LfsLoader {
Lfs lfs = new Lfs(gitRepository);
lfs.getMediaFile(lfsPointer.getOid());
Collection<Path> paths = SmudgeFilter.downloadLfsResource(
return downloadLfsResource(
lfs,
gitRepository,
httpConnectionFactory,
lfsPointer
);
return paths.iterator().next();
}
private void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath) throws IOException {
LOG.trace("temporary lfs file: {}", tempFilePath);
Files.copy(
tempFilePath,
lfsBlobStore
.create(oid.name())
.getOutputStream()
);
}
}
@@ -207,4 +247,22 @@ class LfsLoader {
return super.include(walk);
}
}
private static class FilteringScmLfsPointerFilter extends ScmLfsPointerFilter {
private final Set<ObjectId> alreadyVisited;
private FilteringScmLfsPointerFilter(Set<ObjectId> alreadyVisited) {
this.alreadyVisited = alreadyVisited;
}
@Override
public boolean include(TreeWalk walk) throws IOException {
if (!alreadyVisited.add(walk.getObjectId(0))) {
LOG.trace("skipping object {}", walk.getObjectId(0));
return false;
}
return super.include(walk);
}
}
}

View File

@@ -0,0 +1,186 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.repository.spi;
import org.eclipse.jgit.lfs.Lfs;
import org.eclipse.jgit.lfs.LfsPointer;
import org.eclipse.jgit.lfs.lib.AnyLongObjectId;
import org.eclipse.jgit.lfs.lib.LongObjectId;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.transport.http.HttpConnectionFactory;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnit;
import org.mockito.junit.MockitoRule;
import sonia.scm.repository.GitUtil;
import sonia.scm.repository.api.MirrorCommandResult;
import sonia.scm.store.BlobStore;
import sonia.scm.web.lfs.LfsBlobStoreFactory;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static java.util.Arrays.asList;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
/*
* This test uses a repository with the following layout:
* 7100a0f (branch/b) Add fourth png
* d89ee93 (branch/a) Add third png
| * f3134d6 (master) Add second png
|/
* 62c8598 Add first png
* cccd744 init lfs
*
* Each commit with the text "Add ..." adds exactly one lfs file to the repository.
*/
public class LfsLoaderTest extends ZippedRepositoryTestBase {
@Rule
public MockitoRule rule = MockitoJUnit.rule();
@Mock
private LfsBlobStoreFactory lfsBlobStoreFactory;
@Mock
private BlobStore lfsBlobStore;
@Mock
private LfsLoader.LfsLoaderLogger lfsLoaderLogger;
@Mock
private MirrorCommandResult.LfsUpdateResult lfsUpdateResult;
@Mock
private HttpConnectionFactory httpConnectionFactory;
private LfsLoader lfsLoader;
private LfsLoader.EntryHandler entryHandler;
private final Map<AnyLongObjectId, Path> storedBlobs = new HashMap<>();
private Path lfsTemp;
@Before
public void initLfsBlobStore() {
when(lfsBlobStoreFactory.getLfsBlobStore(repository)).thenReturn(lfsBlobStore);
}
@Before
public void initLfsLoader() {
lfsLoader = new LfsLoader(lfsBlobStoreFactory) {
@Override
Path downloadLfsResource(Lfs lfs, Repository gitRepository, HttpConnectionFactory connectionFactory, LfsPointer lfsPointer) throws IOException {
Path file = lfsTemp.resolve(lfsPointer.getOid().getName());
Files.createFile(file);
return file;
}
@Override
void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath, BlobStore lfsBlobStore) {
storedBlobs.put(oid, tempFilePath);
}
@Override
EntryHandler createEntryHandler(Repository gitRepository, LfsLoaderLogger mirrorLog, MirrorCommandResult.LfsUpdateResult lfsUpdateResult, sonia.scm.repository.Repository repository, HttpConnectionFactory httpConnectionFactory) {
entryHandler = spy(super.createEntryHandler(gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory));
return entryHandler;
}
};
}
@Before
public void initLfsTemp() throws IOException {
lfsTemp = tempFolder.newFolder("lfs").toPath();
}
@Test
public void shouldLoadAllLfsFiles() throws IOException {
lfsLoader.inspectTree(
ObjectId.fromString("f3134d622484981329034ef63c6c1c9b0e5c5232"),
GitUtil.open(repositoryDirectory),
lfsLoaderLogger,
lfsUpdateResult,
repository,
httpConnectionFactory,
"http://localhost:8081/scm/repo.git"
);
assertThat(storedBlobs)
.hasSize(2)
.containsAllEntriesOf(
Map.of(
LongObjectId.fromString("53c234e5e8472b6ac51c1ae1cab3fe06fad053beb8ebfd8977b010655bfdd3c3"), lfsTemp.resolve("53c234e5e8472b6ac51c1ae1cab3fe06fad053beb8ebfd8977b010655bfdd3c3"),
LongObjectId.fromString("4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865"), lfsTemp.resolve("4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865")
));
assertThat(lfsTemp).isEmptyDirectory();
}
@Test
public void shouldCheckLfsFilesOnlyOnce() throws IOException {
Set<ObjectId> alreadyVisited = new HashSet<>();
try (Repository repository = GitUtil.open(repositoryDirectory)) {
asList(
"d89ee931a676bec618177fb4ae6e056f8907a82b", // branch/a
"7100a0f377b142a9a746ee0b18cb4124309c0900" // branch/b
)
.forEach(
objId -> lfsLoader.inspectTree(
ObjectId.fromString(objId),
repository,
lfsLoaderLogger,
lfsUpdateResult,
this.repository,
httpConnectionFactory,
"http://localhost:8081/scm/repo.git",
alreadyVisited
)
);
}
assertThat(storedBlobs).hasSize(3);
// The second entry handler created for the last revision (branch/b) should be used only once, because all other
// object ids for this revision have been handled by the other revision (branch/a) before:
verify(entryHandler).handleTreeEntry(any());
}
@Override
protected String getType() {
return "git";
}
@Override
protected String getZippedRepositoryResource() {
return "sonia/scm/repository/spi/scm-git-spi-lfs-loader-test.zip";
}
}