Fix error with redundant line breaks

This fixes an error with a repository, where an added file has \r\r\n
as line breaks. This interesting combination is handled as a single
line break by the diff operation, while Java's Scanner implementation
handles this as two lines (the first one delimited by the first \r,
the second one delimited by \r\n. This led to empty lines inside the
diff, where we only expect lines that contain at least one character
(' ', '+' or '-'), and this in turn led to an index out of bounds
exception.

Now we handle each combination of any kind of new line delimiter
characters as a single delimiter. This should be safe, because, as
mentioned earlier, we always expect at least one character in a line
for a diff output.
This commit is contained in:
René Pfeuffer
2020-05-04 15:21:57 +02:00
parent ffcc146f75
commit 52a09a96d2
3 changed files with 33 additions and 5 deletions

View File

@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed ### Fixed
- Protocol URI for git commands under windows ([#1108](https://github.com/scm-manager/scm-manager/pull/1108)) - Protocol URI for git commands under windows ([#1108](https://github.com/scm-manager/scm-manager/pull/1108))
- Fix usage of invalid cipher algorith on newer java versions ([#1110](https://github.com/scm-manager/scm-manager/issues/1110),[#1112](https://github.com/scm-manager/scm-manager/pull/1112)) - Fix usage of invalid cipher algorith on newer java versions ([#1110](https://github.com/scm-manager/scm-manager/issues/1110),[#1112](https://github.com/scm-manager/scm-manager/pull/1112))
- Handle obscure line breaks in diff viewer ([#1129](https://github.com/scm-manager/scm-manager/pull/1129))
## [2.0.0-rc7] - 2020-04-09 ## [2.0.0-rc7] - 2020-04-09
### Added ### Added

View File

@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE. * SOFTWARE.
*/ */
package sonia.scm.repository.spi; package sonia.scm.repository.spi;
import sonia.scm.repository.api.DiffLine; import sonia.scm.repository.api.DiffLine;
@@ -49,9 +49,9 @@ final class GitHunkParser {
public List<Hunk> parse(String content) { public List<Hunk> parse(String content) {
List<Hunk> hunks = new ArrayList<>(); List<Hunk> hunks = new ArrayList<>();
try (Scanner scanner = new Scanner(content)) { try (Scanner scanner = new Scanner(content).useDelimiter("[\n\r\u2028\u2029\u0085]+")) {
while (scanner.hasNextLine()) { while (scanner.hasNext()) {
String line = scanner.nextLine(); String line = scanner.next();
if (line.startsWith("@@")) { if (line.startsWith("@@")) {
parseHeader(hunks, line); parseHeader(hunks, line);
} else if (currentGitHunk != null) { } else if (currentGitHunk != null) {

View File

@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE. * SOFTWARE.
*/ */
package sonia.scm.repository.spi; package sonia.scm.repository.spi;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@@ -103,6 +103,15 @@ class GitHunkParserTest {
"+added line\n" + "+added line\n" +
"\\ No newline at end of file\n"; "\\ No newline at end of file\n";
private static final String MULTIPLE_LINE_BREAKS = "diff --git a/.editorconfig b/.editorconfig\n" +
"index ea2a3ba..2f02f32 100644\n" +
"--- a/.editorconfig\n" +
"+++ b/.editorconfig\n" +
"@@ -10,3 +10,4 @@\n" +
" indent_style = space\r\r\n" +
" indent_size = 2\r\r\n" +
" charset = utf-8\n";
@Test @Test
void shouldParseHunks() { void shouldParseHunks() {
List<Hunk> hunks = new GitHunkParser().parse(DIFF_001); List<Hunk> hunks = new GitHunkParser().parse(DIFF_001);
@@ -183,6 +192,24 @@ class GitHunkParserTest {
assertThat(lastLine.getContent()).isEqualTo("added line"); assertThat(lastLine.getContent()).isEqualTo("added line");
} }
@Test
void shouldHandleMultipleLineBreaks() {
List<Hunk> hunks = new GitHunkParser().parse(MULTIPLE_LINE_BREAKS);
Hunk hunk = hunks.get(0);
Iterator<DiffLine> lines = hunk.iterator();
DiffLine line1 = lines.next();
assertThat(line1.getOldLineNumber()).hasValue(10);
assertThat(line1.getNewLineNumber()).hasValue(10);
assertThat(line1.getContent()).isEqualTo("indent_style = space");
lines.next();
lines.next();
assertThat(lines.hasNext()).isFalse();
}
private void assertHunk(Hunk hunk, int oldStart, int oldLineCount, int newStart, int newLineCount) { private void assertHunk(Hunk hunk, int oldStart, int oldLineCount, int newStart, int newLineCount) {
assertThat(hunk.getOldStart()).isEqualTo(oldStart); assertThat(hunk.getOldStart()).isEqualTo(oldStart);
assertThat(hunk.getOldLineCount()).isEqualTo(oldLineCount); assertThat(hunk.getOldLineCount()).isEqualTo(oldLineCount);