Expose content type resolver api to plugins (#1752)

Expose an api which makes it easy to detect the content type of files. The api is based on the spotter api, but does not expose spotter classes.

Co-authored-by: René Pfeuffer <rene.pfeuffer@cloudogu.com>
This commit is contained in:
Sebastian Sdorra
2021-08-03 10:41:38 +02:00
committed by GitHub
parent 2a481a75b3
commit e492a30eea
13 changed files with 320 additions and 124 deletions

View File

@@ -0,0 +1,2 @@
- type: Changed
description: Expose content type resolver api to plugins ([#1752](https://github.com/scm-manager/scm-manager/pull/1752))

View File

@@ -0,0 +1,71 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.io;
import java.util.Optional;
/**
* Detected type of content.
*
* @since 2.23.0
*/
public interface ContentType {
/**
* Returns the primary part of the content type (e.g.: text of text/plain).
*
* @return primary content type part
*/
String getPrimary();
/**
* Returns the secondary part of the content type (e.g.: plain of text/plain).
*
* @return secondary content type part
*/
String getSecondary();
/**
* Returns the raw presentation of the content type (e.g.: text/plain).
*
* @return raw presentation
*/
String getRaw();
/**
* Returns {@code true} if the content type is text based.
*
* @return {@code true} for text content
*/
boolean isText();
/**
* Returns an optional with the programming language
* or empty if the content is not programming language.
*
* @return programming language or empty
*/
Optional<String> getLanguage();
}

View File

@@ -0,0 +1,52 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.io;
/**
* ContentTypeResolver is able to detect the {@link ContentType} of files based on their path and (optinally) a few starting bytes. These files do not have to be real files on the file system, but can be hypothetical constructs ("What content type is most probable for a file named like this").
*
* @since 2.23.0
*/
public interface ContentTypeResolver {
/**
* Detects the {@link ContentType} of the given path, by only using path based strategies.
*
* @param path path of the file
*
* @return {@link ContentType} of path
*/
ContentType resolve(String path);
/**
* Detects the {@link ContentType} of the given path, by using path and content based strategies.
*
* @param path path of the file
* @param contentPrefix first few bytes of the content
*
* @return {@link ContentType} of path and content prefix
*/
ContentType resolve(String path, byte[] contentPrefix);
}

View File

@@ -24,7 +24,6 @@
package sonia.scm.api.v2.resources;
import com.github.sdorra.spotter.ContentType;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Content;
@@ -33,7 +32,8 @@ import io.swagger.v3.oas.annotations.responses.ApiResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sonia.scm.NotFoundException;
import sonia.scm.api.v2.ContentTypeResolver;
import sonia.scm.io.ContentType;
import sonia.scm.io.ContentTypeResolver;
import sonia.scm.repository.NamespaceAndName;
import sonia.scm.repository.api.RepositoryService;
import sonia.scm.repository.api.RepositoryServiceFactory;
@@ -61,10 +61,12 @@ public class ContentResource {
private static final int HEAD_BUFFER_SIZE = 1024;
private final RepositoryServiceFactory serviceFactory;
private final ContentTypeResolver contentTypeResolver;
@Inject
public ContentResource(RepositoryServiceFactory serviceFactory) {
public ContentResource(RepositoryServiceFactory serviceFactory, ContentTypeResolver contentTypeResolver) {
this.serviceFactory = serviceFactory;
this.contentTypeResolver = contentTypeResolver;
}
/**
@@ -204,10 +206,10 @@ public class ContentResource {
}
private void appendContentHeader(String path, byte[] head, Response.ResponseBuilder responseBuilder) {
ContentType contentType = ContentTypeResolver.resolve(path, head);
ContentType contentType = contentTypeResolver.resolve(path, head);
responseBuilder.header("Content-Type", contentType.getRaw());
contentType.getLanguage().ifPresent(
language -> responseBuilder.header(ProgrammingLanguages.HEADER, ProgrammingLanguages.getValue(language))
language -> responseBuilder.header(ProgrammingLanguages.HEADER, language)
);
}

View File

@@ -24,10 +24,9 @@
package sonia.scm.api.v2.resources;
import com.github.sdorra.spotter.Language;
import com.google.inject.Inject;
import de.otto.edison.hal.Links;
import sonia.scm.api.v2.ContentTypeResolver;
import sonia.scm.io.ContentTypeResolver;
import sonia.scm.repository.Repository;
import sonia.scm.repository.api.DiffFile;
import sonia.scm.repository.api.DiffLine;
@@ -49,10 +48,12 @@ import static de.otto.edison.hal.Links.linkingTo;
class DiffResultToDiffResultDtoMapper {
private final ResourceLinks resourceLinks;
private final ContentTypeResolver contentTypeResolver;
@Inject
DiffResultToDiffResultDtoMapper(ResourceLinks resourceLinks) {
DiffResultToDiffResultDtoMapper(ResourceLinks resourceLinks, ContentTypeResolver contentTypeResolver) {
this.resourceLinks = resourceLinks;
this.contentTypeResolver = contentTypeResolver;
}
public DiffResultDto mapForIncoming(Repository repository, DiffResult result, String source, String target) {
@@ -154,8 +155,8 @@ class DiffResultToDiffResultDtoMapper {
dto.setOldPath(oldPath);
dto.setOldRevision(file.getOldRevision());
Optional<Language> language = ContentTypeResolver.resolve(path).getLanguage();
language.ifPresent(value -> dto.setLanguage(ProgrammingLanguages.getValue(value)));
Optional<String> language = contentTypeResolver.resolve(path).getLanguage();
language.ifPresent(dto::setLanguage);
List<DiffResultDto.HunkDto> hunks = new ArrayList<>();
for (Hunk hunk : file) {

View File

@@ -24,25 +24,10 @@
package sonia.scm.api.v2.resources;
import com.github.sdorra.spotter.Language;
import java.util.Optional;
final class ProgrammingLanguages {
static final String HEADER = "X-Programming-Language";
private static final String DEFAULT = "text";
private ProgrammingLanguages() {
}
static String getValue(Language language) {
Optional<String> aceMode = language.getAceMode();
if (!aceMode.isPresent()) {
Optional<String> codemirrorMode = language.getCodemirrorMode();
return codemirrorMode.orElse(DEFAULT);
}
return aceMode.get();
}
}

View File

@@ -22,28 +22,45 @@
* SOFTWARE.
*/
package sonia.scm.api.v2.resources;
package sonia.scm.io;
import com.github.sdorra.spotter.Language;
import org.junit.jupiter.api.Test;
import java.util.Optional;
import static org.assertj.core.api.Assertions.assertThat;
public class DefaultContentType implements ContentType {
class ProgrammingLanguagesTest {
private static final String DEFAULT_LANG_MODE = "text";
@Test
void shouldReturnAceModeIfPresent() {
assertThat(ProgrammingLanguages.getValue(Language.GO)).isEqualTo("golang");
assertThat(ProgrammingLanguages.getValue(Language.JAVA)).isEqualTo("java");
private final com.github.sdorra.spotter.ContentType contentType;
DefaultContentType(com.github.sdorra.spotter.ContentType contentType) {
this.contentType = contentType;
}
@Test
void shouldReturnCodemirrorIfAceModeIsMissing() {
assertThat(ProgrammingLanguages.getValue(Language.HTML_ECR)).isEqualTo("htmlmixed");
@Override
public String getPrimary() {
return contentType.getPrimary();
}
@Test
void shouldReturnTextIfNoModeIsPresent() {
assertThat(ProgrammingLanguages.getValue(Language.HXML)).isEqualTo("text");
@Override
public String getSecondary() {
return contentType.getSecondary();
}
@Override
public String getRaw() {
return contentType.getRaw();
}
@Override
public boolean isText() {
return contentType.isText();
}
@Override
public Optional<String> getLanguage() {
return contentType.getLanguage().map(language -> {
Optional<String> aceMode = language.getAceMode();
return aceMode.orElseGet(() -> language.getCodemirrorMode().orElse(DEFAULT_LANG_MODE));
});
}
}

View File

@@ -22,13 +22,12 @@
* SOFTWARE.
*/
package sonia.scm.api.v2;
package sonia.scm.io;
import com.github.sdorra.spotter.ContentType;
import com.github.sdorra.spotter.ContentTypeDetector;
import com.github.sdorra.spotter.Language;
public final class ContentTypeResolver {
public final class DefaultContentTypeResolver implements ContentTypeResolver {
private static final ContentTypeDetector PATH_BASED = ContentTypeDetector.builder()
.defaultPathBased().boost(Language.MARKDOWN)
@@ -38,14 +37,13 @@ public final class ContentTypeResolver {
.defaultPathAndContentBased().boost(Language.MARKDOWN)
.bestEffortMatch();
private ContentTypeResolver() {
@Override
public DefaultContentType resolve(String path) {
return new DefaultContentType(PATH_BASED.detect(path));
}
public static ContentType resolve(String path) {
return PATH_BASED.detect(path);
}
public static ContentType resolve(String path, byte[] contentPrefix) {
return PATH_AND_CONTENT_BASED.detect(path, contentPrefix);
@Override
public DefaultContentType resolve(String path, byte[] contentPrefix) {
return new DefaultContentType(PATH_AND_CONTENT_BASED.detect(path, contentPrefix));
}
}

View File

@@ -59,6 +59,8 @@ import sonia.scm.group.GroupManagerProvider;
import sonia.scm.group.xml.XmlGroupDAO;
import sonia.scm.initialization.DefaultInitializationFinisher;
import sonia.scm.initialization.InitializationFinisher;
import sonia.scm.io.ContentTypeResolver;
import sonia.scm.io.DefaultContentTypeResolver;
import sonia.scm.metrics.MeterRegistryProvider;
import sonia.scm.migration.MigrationDAO;
import sonia.scm.net.SSLContextProvider;
@@ -290,6 +292,8 @@ class ScmServletModule extends ServletModule {
bind(IndexQueue.class, DefaultIndexQueue.class);
bind(SearchEngine.class, LuceneSearchEngine.class);
bind(IndexLogStore.class, DefaultIndexLogStore.class);
bind(ContentTypeResolver.class).to(DefaultContentTypeResolver.class);
}
private <T> void bind(Class<T> clazz, Class<? extends T> defaultImplementation) {

View File

@@ -1,65 +0,0 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.api.v2;
import com.github.sdorra.spotter.ContentType;
import com.github.sdorra.spotter.Language;
import org.junit.jupiter.api.Test;
import java.nio.charset.StandardCharsets;
import static org.assertj.core.api.Assertions.assertThat;
class ContentSearchableTypeResolverTest {
@Test
void shouldResolveMarkdown() {
String content = String.join("\n",
"% Markdown content",
"% Which does not start with markdown"
);
ContentType contentType = ContentTypeResolver.resolve("somedoc.md", content.getBytes(StandardCharsets.UTF_8));
assertThat(contentType.getLanguage()).contains(Language.MARKDOWN);
}
@Test
void shouldResolveMarkdownWithoutContent() {
ContentType contentType = ContentTypeResolver.resolve("somedoc.md");
assertThat(contentType.getLanguage()).contains(Language.MARKDOWN);
}
@Test
void shouldResolveMarkdownEvenWithDotsInFilename() {
ContentType contentType = ContentTypeResolver.resolve("somedoc.1.1.md");
assertThat(contentType.getLanguage()).contains(Language.MARKDOWN);
}
@Test
void shouldResolveDockerfile() {
ContentType contentType = ContentTypeResolver.resolve("Dockerfile");
assertThat(contentType.getLanguage()).contains(Language.DOCKERFILE);
}
}

View File

@@ -29,10 +29,10 @@ import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Answers;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import sonia.scm.NotFoundException;
import sonia.scm.io.DefaultContentTypeResolver;
import sonia.scm.repository.NamespaceAndName;
import sonia.scm.repository.api.CatCommandBuilder;
import sonia.scm.repository.api.RepositoryService;
@@ -68,13 +68,14 @@ public class ContentResourceTest {
@Mock(answer = Answers.RETURNS_DEEP_STUBS)
private RepositoryServiceFactory repositoryServiceFactory;
@InjectMocks
private ContentResource contentResource;
private CatCommandBuilder catCommand;
@Before
public void initService() throws Exception {
contentResource = new ContentResource(repositoryServiceFactory, new DefaultContentTypeResolver());
NamespaceAndName existingNamespaceAndName = new NamespaceAndName(NAMESPACE, REPO_NAME);
RepositoryService repositoryService = repositoryServiceFactory.create(existingNamespaceAndName);
catCommand = repositoryService.getCatCommand();
@@ -169,7 +170,7 @@ public class ContentResourceTest {
@Test
public void shouldNotReadCompleteFileForHead() throws Exception {
FailingAfterSomeBytesStream stream = new FailingAfterSomeBytesStream();
doAnswer(invocation -> stream).when(catCommand).getStream(eq("readHeadOnly"));
doAnswer(invocation -> stream).when(catCommand).getStream("readHeadOnly");
Response response = contentResource.metadata(NAMESPACE, REPO_NAME, REV, "readHeadOnly");
assertEquals(200, response.getStatus());
@@ -201,7 +202,7 @@ public class ContentResourceTest {
outputStream.close();
return null;
}).when(catCommand).retriveContent(any(), eq(path));
doAnswer(invocation -> new ByteArrayInputStream(content)).when(catCommand).getStream(eq(path));
doAnswer(invocation -> new ByteArrayInputStream(content)).when(catCommand).getStream(path);
}
private ByteArrayOutputStream readOutputStream(Response response) throws IOException {

View File

@@ -28,6 +28,7 @@ import de.otto.edison.hal.Link;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import sonia.scm.io.DefaultContentTypeResolver;
import sonia.scm.repository.Repository;
import sonia.scm.repository.api.DiffFile;
import sonia.scm.repository.api.DiffLine;
@@ -52,7 +53,7 @@ class DiffResultToDiffResultDtoMapperTest {
private static final Repository REPOSITORY = new Repository("1", "git", "space", "X");
ResourceLinks resourceLinks = ResourceLinksMock.createMock(create("/scm/api/v2"));
DiffResultToDiffResultDtoMapper mapper = new DiffResultToDiffResultDtoMapper(resourceLinks);
DiffResultToDiffResultDtoMapper mapper = new DiffResultToDiffResultDtoMapper(resourceLinks, new DefaultContentTypeResolver());
@Test
void shouldMapDiffResult() {

View File

@@ -0,0 +1,127 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.io;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import java.nio.charset.StandardCharsets;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
class DefaultContentTypeResolverTest {
private final DefaultContentTypeResolver contentTypeResolver = new DefaultContentTypeResolver();
@Test
void shouldReturnPrimaryPart() {
ContentType contentType = contentTypeResolver.resolve("hog.pdf");
assertThat(contentType.getPrimary()).isEqualTo("application");
}
@Test
void shouldReturnSecondaryPart() {
ContentType contentType = contentTypeResolver.resolve("hog.pdf");
assertThat(contentType.getSecondary()).isEqualTo("pdf");
}
@Test
void shouldReturnRaw() {
ContentType contentType = contentTypeResolver.resolve("hog.pdf");
assertThat(contentType.getRaw()).isEqualTo("application/pdf");
}
@Nested
class IsTextTests {
@ParameterizedTest(name = "shouldReturnIsTextFor: {argumentsWithNames}")
@ValueSource(strings = {"App.java", "Dockerfile", "Playbook.yml", "README.md", "LICENSE.txt"})
void shouldReturnIsTextFor(String path) {
ContentType contentType = contentTypeResolver.resolve(path);
assertThat(contentType.isText()).isTrue();
}
@ParameterizedTest(name = "shouldReturnIsNotTextFor: {argumentsWithNames}")
@ValueSource(strings = {"scan.exe", "hog.pdf", "library.so", "awesome.dll", "something.dylib"})
void shouldReturnIsNotTextFor(String path) {
ContentType contentType = contentTypeResolver.resolve(path);
assertThat(contentType.isText()).isFalse();
}
}
@Nested
class LanguageTests {
@Test
void shouldResolveMarkdown() {
String content = String.join("\n",
"% Markdown content",
"% Which does not start with markdown"
);
ContentType contentType = contentTypeResolver.resolve("somedoc.md", content.getBytes(StandardCharsets.UTF_8));
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
}
@Test
void shouldResolveMarkdownWithoutContent() {
ContentType contentType = contentTypeResolver.resolve("somedoc.md");
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
}
@Test
void shouldResolveMarkdownEvenWithDotsInFilename() {
ContentType contentType = contentTypeResolver.resolve("somedoc.1.1.md");
Assertions.assertThat(contentType.getLanguage()).contains("markdown");
}
@Test
void shouldResolveDockerfile() {
ContentType contentType = contentTypeResolver.resolve("Dockerfile");
Assertions.assertThat(contentType.getLanguage()).contains("dockerfile");
}
@Test
void shouldReturnAceModeIfPresent() {
assertThat(contentTypeResolver.resolve("app.go").getLanguage()).contains("golang"); // codemirror is just go
assertThat(contentTypeResolver.resolve("App.java").getLanguage()).contains("java"); // codemirror is clike
}
@Test
void shouldReturnCodemirrorIfAceModeIsMissing() {
assertThat(contentTypeResolver.resolve("index.ecr").getLanguage()).contains("htmlmixed");
}
@Test
void shouldReturnTextIfNoModeIsPresent() {
assertThat(contentTypeResolver.resolve("index.hxml").getLanguage()).contains("text");
}
}
}